namespace Nominatim;
+require_once(CONST_LibDir.'/SimpleWordList.php');
+
class Tokenizer
{
private $oDB;
$aWordLists = array();
$aTokens = array();
foreach ($aNormPhrases as $sPhrase) {
- if (strlen($sPhrase) > 0) {
- $aWords = explode(' ', $sPhrase);
- Tokenizer::addTokens($aTokens, $aWords);
- $aWordLists[] = $aWords;
- } else {
- $aWordLists[] = array();
+ $oWordList = new SimpleWordList($sPhrase);
+
+ foreach ($oWordList->getTokens() as $sToken) {
+ $aTokens[' '.$sToken] = ' '.$sToken;
+ $aTokens[$sToken] = $sToken;
}
+
+ $aWordLists[] = $oWordList;
}
Debug::printVar('Tokens', $aTokens);
$oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
foreach ($aPhrases as $iPhrase => $oPhrase) {
- $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+ $oPhrase->setWordSets($aWordLists[$iPhrase]->getWordSets($oValidTokens));
}
return $oValidTokens;
}
}
}
-
-
- /**
- * Add the tokens from this phrase to the given list of tokens.
- *
- * @param string[] $aTokens List of tokens to append.
- *
- * @return void
- */
- private static function addTokens(&$aTokens, $aWords)
- {
- $iNumWords = count($aWords);
-
- for ($i = 0; $i < $iNumWords; $i++) {
- $sPhrase = $aWords[$i];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
-
- for ($j = $i + 1; $j < $iNumWords; $j++) {
- $sPhrase .= ' '.$aWords[$j];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
- }
- }
- }
}