protected $oDB;
protected $oPlaceLookup;
+ protected $oTokenizer;
protected $aLangPrefOrder = array();
protected $sQuery = false;
protected $aStructuredQuery = false;
- protected $oNormalizer = null;
-
public function __construct(&$oDB)
{
$this->oDB =& $oDB;
$this->oPlaceLookup = new PlaceLookup($this->oDB);
- $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
- }
-
- private function normTerm($sTerm)
- {
- if ($this->oNormalizer === null) {
- return $sTerm;
- }
-
- return $this->oNormalizer->transliterate($sTerm);
+ $this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
}
public function setLanguagePreference($aLangPref)
if ($this->aCountryCodes) {
$oCtx->setCountryList($this->aCountryCodes);
}
+ $this->oTokenizer->setCountryRestriction($this->aCountryCodes);
Debug::newSection('Query Preprocessing');
- $sNormQuery = $this->normTerm($this->sQuery);
- Debug::printVar('Normalized query', $sNormQuery);
-
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
);
}
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
- $sSpecialTerm = pg_escape_string($sSpecialTerm);
- $sToken = $this->oDB->getOne(
- 'SELECT make_standard_name(:term)',
- array(':term' => $sSpecialTerm),
- 'Cannot decode query. Wrong encoding?'
- );
- $sSQL = 'SELECT class, type FROM word ';
- $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')';
- $sSQL .= ' AND class is not null AND class not in (\'place\')';
-
- Debug::printSQL($sSQL);
- $aSearchWords = $this->oDB->getAll($sSQL);
- $aNewSearches = array();
- foreach ($aSearches as $oSearch) {
- foreach ($aSearchWords as $aSearchTerm) {
- $oNewSearch = clone $oSearch;
- $oNewSearch->setPoiSearch(
- Operator::TYPE,
- $aSearchTerm['class'],
- $aSearchTerm['type']
- );
- $aNewSearches[] = $oNewSearch;
+ $aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
+
+ if (!empty($aTokens)) {
+ $aNewSearches = array();
+ foreach ($aSearches as $oSearch) {
+ foreach ($aTokens as $oToken) {
+ $oNewSearch = clone $oSearch;
+ $oNewSearch->setPoiSearch(
+ $oToken->iOperator,
+ $oToken->sClass,
+ $oToken->sType
+ );
+ $aNewSearches[] = $oNewSearch;
+ }
}
+ $aSearches = $aNewSearches;
}
- $aSearches = $aNewSearches;
}
// Split query into phrases
// Commas are used to reduce the search space by indicating where phrases split
+ $aPhrases = array();
if ($this->aStructuredQuery) {
- $aInPhrases = $this->aStructuredQuery;
+ foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
+ $aPhrases[] = new Phrase($sPhrase, $iPhrase);
+ }
} else {
- $aInPhrases = explode(',', $sQuery);
+ foreach (explode(',', $sQuery) as $sPhrase) {
+ $aPhrases[] = new Phrase($sPhrase, '');
+ }
}
Debug::printDebugArray('Search context', $oCtx);
Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
- Debug::printVar('Final query phrases', $aInPhrases);
- // Convert each phrase to standard form
- // Create a list of standard words
- // Get all 'sets' of words
- // Generate a complete list of all
Debug::newSection('Tokenization');
- $aTokens = array();
- $aPhrases = array();
- foreach ($aInPhrases as $iPhrase => $sPhrase) {
- $sPhrase = $this->oDB->getOne(
- 'SELECT make_standard_name(:phrase)',
- array(':phrase' => $sPhrase),
- 'Cannot normalize query string (is it a UTF-8 string?)'
- );
- if (trim($sPhrase)) {
- $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
- $oPhrase->addTokens($aTokens);
- $aPhrases[] = $oPhrase;
- }
- }
-
- Debug::printVar('Tokens', $aTokens);
-
- $oValidTokens = new TokenList();
-
- if (!empty($aTokens)) {
- $oValidTokens->addTokensFromDB(
- $this->oDB,
- $aTokens,
- $this->aCountryCodes,
- $sNormQuery,
- $this->oNormalizer
- );
+ $oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
+ if ($oValidTokens->count() > 0) {
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
- // Try more interpretations for Tokens that could not be matched.
- foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
- // US ZIP+4 codes - merge in the 5-digit ZIP code
- $oValidTokens->addToken(
- $sToken,
- new Token\Postcode(null, $aData[1], 'us')
- );
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
- // Unknown single word token with a number.
- // Assume it is a house number.
- $oValidTokens->addToken(
- $sToken,
- new Token\HouseNumber(null, trim($sToken))
- );
- }
- }
- }
+ $aPhrases = array_filter($aPhrases, function ($oPhrase) {
+ return $oPhrase->getWordSets() !== null;
+ });
// Any words that have failed completely?
// TODO: suggestions
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
-
- foreach ($aPhrases as $oPhrase) {
- $oPhrase->computeWordSets($oValidTokens);
- }
Debug::printDebugTable('Phrases', $aPhrases);
Debug::newSection('Search candidates');
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
- // Space-separated words of the phrase.
- private $aWords;
// Possible segmentations of the phrase.
private $aWordSets;
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
- $this->aWords = explode(' ', $this->sPhrase);
+ }
+
+ /**
+ * Get the orginal phrase of the string.
+ */
+ public function getPhrase()
+ {
+ return $this->sPhrase;
}
/**
return $this->aWordSets;
}
- /**
- * Add the tokens from this phrase to the given list of tokens.
- *
- * @param string[] $aTokens List of tokens to append.
- *
- * @return void
- */
- public function addTokens(&$aTokens)
- {
- $iNumWords = count($this->aWords);
-
- for ($i = 0; $i < $iNumWords; $i++) {
- $sPhrase = $this->aWords[$i];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
-
- for ($j = $i + 1; $j < $iNumWords; $j++) {
- $sPhrase .= ' '.$this->aWords[$j];
- $aTokens[' '.$sPhrase] = ' '.$sPhrase;
- $aTokens[$sPhrase] = $sPhrase;
- }
- }
- }
-
/**
* Invert the set of possible segmentations.
*
}
}
- public function computeWordSets($oTokens)
+ public function computeWordSets($aWords, $oTokens)
{
- $iNumWords = count($this->aWords);
+ $iNumWords = count($aWords);
+
+ if ($iNumWords == 0) {
+ $this->aWordSets = null;
+ return;
+ }
+
// Caches the word set for the partial phrase up to word i.
$aSetCache = array_fill(0, $iNumWords, array());
// Initialise first element of cache. There can only be the word.
- if ($oTokens->containsAny($this->aWords[0])) {
- $aSetCache[0][] = array($this->aWords[0]);
+ if ($oTokens->containsAny($aWords[0])) {
+ $aSetCache[0][] = array($aWords[0]);
}
// Now do the next elements using what we already have.
for ($i = 1; $i < $iNumWords; $i++) {
for ($j = $i; $j > 0; $j--) {
- $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
+ $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
$aPartial = array($sPartial);
foreach ($aSetCache[$j - 1] as $aSet) {
}
// finally the current full phrase
- $sPartial = $this->aWords[0].' '.$sPartial;
+ $sPartial = $aWords[0].' '.$sPartial;
if ($oTokens->containsAny($sPartial)) {
$aSetCache[$i][] = array($sPartial);
}
return array(
'Type' => $this->sPhraseType,
'Phrase' => $this->sPhrase,
- 'Words' => $this->aWords,
'WordSets' => $this->aWordSets
);
}
return $ids;
}
- /**
- * Add token information from the word table in the database.
- *
- * @param object $oDB Nominatim::DB instance.
- * @param string[] $aTokens List of tokens to look up in the database.
- * @param string[] $aCountryCodes List of country restrictions.
- * @param string $sNormQuery Normalized query string.
- * @param object $oNormalizer Normalizer function to use on tokens.
- *
- * @return void
- */
- public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
- {
- // Check which tokens we have, get the ID numbers
- $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
- $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
- $sSQL .= ' FROM word WHERE word_token in (';
- $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
-
- Debug::printSQL($sSQL);
-
- $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
-
- foreach ($aDBWords as $aWord) {
- $oToken = null;
- $iId = (int) $aWord['word_id'];
-
- if ($aWord['class']) {
- // Special terms need to appear in their normalized form.
- if ($aWord['word']) {
- $sNormWord = $aWord['word'];
- if ($oNormalizer != null) {
- $sNormWord = $oNormalizer->transliterate($aWord['word']);
- }
- if (strpos($sNormQuery, $sNormWord) === false) {
- continue;
- }
- }
-
- if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
- $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
- } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
- if ($aWord['word']
- && pg_escape_string($aWord['word']) == $aWord['word']
- ) {
- $oToken = new Token\Postcode(
- $iId,
- $aWord['word'],
- $aWord['country_code']
- );
- }
- } else {
- // near and in operator the same at the moment
- $oToken = new Token\SpecialTerm(
- $iId,
- $aWord['class'],
- $aWord['type'],
- $aWord['operator'] ? Operator::NEAR : Operator::NONE
- );
- }
- } elseif ($aWord['country_code']) {
- // Filter country tokens that do not match restricted countries.
- if (!$aCountryCodes
- || in_array($aWord['country_code'], $aCountryCodes)
- ) {
- $oToken = new Token\Country($iId, $aWord['country_code']);
- }
- } else {
- $oToken = new Token\Word(
- $iId,
- $aWord['word_token'][0] != ' ',
- (int) $aWord['count'],
- substr_count($aWord['word_token'], ' ')
- );
- }
-
- if ($oToken) {
- $this->addToken($aWord['word_token'], $oToken);
- }
- }
- }
-
/**
* Add a new token for the given word.
*
<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+ private $oDB;
+
+ private $oNormalizer = null;
+ private $aCountryRestriction = null;
+
+ public function __construct(&$oDB)
+ {
+ $this->oDB =& $oDB;
+ $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+ }
+
+
+ public function setCountryRestriction($aCountries)
+ {
+ $this->aCountryRestriction = $aCountries;
+ }
+
+
+ public function normalizeString($sTerm)
+ {
+ if ($this->oNormalizer === null) {
+ return $sTerm;
+ }
+
+ return $this->oNormalizer->transliterate($sTerm);
+ }
+
+
+ public function tokensForSpecialTerm($sTerm)
+ {
+ $aResults = array();
+
+ $sSQL = 'SELECT word_id, class, type FROM word ';
+ $sSQL .= ' WHERE word_token = \' \' || make_standard_name(:term)';
+ $sSQL .= ' AND class is not null AND class not in (\'place\')';
+
+ Debug::printVar('Term', $sTerm);
+ Debug::printSQL($sSQL);
+ $aSearchWords = $this->oDB->getAll($sSQL, array(':term' => $sTerm));
+
+ Debug::printVar('Results', $aSearchWords);
+
+ foreach ($aSearchWords as $aSearchTerm) {
+ $aResults[] = new \Nominatim\Token\SpecialTerm(
+ $aSearchTerm['word_id'],
+ $aSearchTerm['class'],
+ $aSearchTerm['type'],
+ \Nominatim\Operator::TYPE
+ );
+ }
+
+ Debug::printVar('Special term tokens', $aResults);
+
+ return $aResults;
+ }
+
+
+ public function extractTokensFromPhrases(&$aPhrases)
+ {
+ // First get the normalized version of all phrases
+ $sNormQuery = '';
+ $sSQL = 'SELECT ';
+ $aParams = array();
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
+ $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
+ $aParams[':'.$iPhrase] = $oPhrase->getPhrase();
+ }
+ $sSQL = substr($sSQL, 0, -1);
+
+ Debug::printSQL($sSQL);
+ Debug::printVar('SQL parameters', $aParams);
+
+ $aNormPhrases = $this->oDB->getRow($sSQL, $aParams);
+
+ Debug::printVar('SQL result', $aNormPhrases);
+
+ // now compute all possible tokens
+ $aWordLists = array();
+ $aTokens = array();
+ foreach ($aNormPhrases as $sTitle => $sPhrase) {
+ if (strlen($sPhrase) > 0) {
+ $aWords = explode(' ', $sPhrase);
+ Tokenizer::addTokens($aTokens, $aWords);
+ $aWordLists[] = $aWords;
+ } else {
+ $aWordLists[] = array();
+ }
+ }
+
+ Debug::printVar('Tokens', $aTokens);
+ Debug::printVar('WordLists', $aWordLists);
+
+ $oValidTokens = $this->computeValidTokens($aTokens, $sNormQuery);
+
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
+ $oPhrase->computeWordSets($aWordLists[$iPhrase], $oValidTokens);
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function computeValidTokens($aTokens, $sNormQuery)
+ {
+ $oValidTokens = new TokenList();
+
+ if (!empty($aTokens)) {
+ $this->addTokensFromDB($oValidTokens, $aTokens, $sNormQuery);
+
+ // Try more interpretations for Tokens that could not be matched.
+ foreach ($aTokens as $sToken) {
+ if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ // US ZIP+4 codes - merge in the 5-digit ZIP code
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\Postcode(null, $aData[1], 'us')
+ );
+ } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ // Unknown single word token with a number.
+ // Assume it is a house number.
+ $oValidTokens->addToken(
+ $sToken,
+ new Token\HouseNumber(null, trim($sToken))
+ );
+ }
+ }
+ }
+ }
+
+ return $oValidTokens;
+ }
+
+
+ private function addTokensFromDB(&$oValidTokens, $aTokens, $sNormQuery)
+ {
+ // Check which tokens we have, get the ID numbers
+ $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+ $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+ $sSQL .= ' FROM word WHERE word_token in (';
+ $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
+
+ Debug::printSQL($sSQL);
+
+ $aDBWords = $this->oDB->getAll($sSQL, null, 'Could not get word tokens.');
+
+ foreach ($aDBWords as $aWord) {
+ $oToken = null;
+ $iId = (int) $aWord['word_id'];
+
+ if ($aWord['class']) {
+ // Special terms need to appear in their normalized form.
+ // (postcodes are not normalized in the word table)
+ $sNormWord = $this->normalizeString($aWord['word']);
+ if ($aWord['word'] && strpos($sNormQuery, $sNormWord) === false) {
+ continue;
+ }
+
+ if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+ $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+ } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+ if ($aWord['word']
+ && pg_escape_string($aWord['word']) == $aWord['word']
+ ) {
+ $oToken = new Token\Postcode(
+ $iId,
+ $aWord['word'],
+ $aWord['country_code']
+ );
+ }
+ } else {
+ // near and in operator the same at the moment
+ $oToken = new Token\SpecialTerm(
+ $iId,
+ $aWord['class'],
+ $aWord['type'],
+ $aWord['operator'] ? Operator::NEAR : Operator::NONE
+ );
+ }
+ } elseif ($aWord['country_code']) {
+ // Filter country tokens that do not match restricted countries.
+ if (!$this->aCountryRestriction
+ || in_array($aWord['country_code'], $this->aCountryRestriction)
+ ) {
+ $oToken = new Token\Country($iId, $aWord['country_code']);
+ }
+ } else {
+ $oToken = new Token\Word(
+ $iId,
+ $aWord['word_token'][0] != ' ',
+ (int) $aWord['count'],
+ substr_count($aWord['word_token'], ' ')
+ );
+ }
+
+ if ($oToken) {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
+ }
+ }
+
+
+ /**
+ * Add the tokens from this phrase to the given list of tokens.
+ *
+ * @param string[] $aTokens List of tokens to append.
+ *
+ * @return void
+ */
+ private static function addTokens(&$aTokens, $aWords)
+ {
+ $iNumWords = count($aWords);
+
+ for ($i = 0; $i < $iNumWords; $i++) {
+ $sPhrase = $aWords[$i];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+
+ for ($j = $i + 1; $j < $iNumWords; $j++) {
+ $sPhrase .= ' '.$aWords[$j];
+ $aTokens[' '.$sPhrase] = ' '.$sPhrase;
+ $aTokens[$sPhrase] = $sPhrase;
+ }
+ }
+ }
+}
public function testEmptyPhrase()
{
$oPhrase = new Phrase('', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array(), new TokensFullSet());
- $this->assertEquals(
- array(array('')),
- $oPhrase->getWordSets()
- );
+ $this->assertNull($oPhrase->getWordSets());
}
public function testSingleWordPhrase()
{
$oPhrase = new Phrase('a', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a'), new TokensFullSet());
$this->assertEquals(
'(a)',
public function testMultiWordPhrase()
{
$oPhrase = new Phrase('a b', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet());
$this->assertEquals(
'(a b),(a|b)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
$this->assertEquals(
'(a b c),(a|b c),(a b|c),(a|b|c)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c d', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet());
$this->assertEquals(
'(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
$this->serializeSets($oPhrase->getWordSets())
public function testInverseWordSets()
{
$oPhrase = new Phrase('a b c', '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
$oPhrase->invertWordSets();
$this->assertEquals(
public function testMaxWordSets()
{
- $oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $aWords = array_fill(0, 4, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensFullSet());
$this->assertEquals(8, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(8, count($oPhrase->getWordSets()));
- $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
- $oPhrase->computeWordSets(new TokensFullSet());
+ $aWords = array_fill(0, 18, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensFullSet());
$this->assertEquals(100, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(100, count($oPhrase->getWordSets()));
public function testPartialTokensShortTerm()
{
$oPhrase = new Phrase('a b c d', '');
- $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
+ $oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
$this->assertEquals(
'(a|b c d),(a|b c|d)',
$this->serializeSets($oPhrase->getWordSets())
public function testPartialTokensLongTerm()
{
- $oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
- $oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
+ $aWords = array_fill(0, 18, 'a');
+ $oPhrase = new Phrase(join(' ', $aWords), '');
+ $oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a')));
$this->assertEquals(80, count($oPhrase->getWordSets()));
}
}
$this->assertFalse($TL->contains('unknownword'));
$this->assertEquals(array(), $TL->get('unknownword'));
}
-
- public function testAddress()
- {
- $this->expectOutputRegex('/<p><tt>/');
-
- $oDbStub = $this->getMockBuilder(Nominatim\DB::class)
- ->setMethods(array('getAll', 'getDBQuotedList'))
- ->getMock();
-
- $oDbStub->method('getDBQuotedList')
- ->will($this->returnCallback(function ($aVals) {
- return array_map(function ($sVal) {
- return "'".$sVal."'";
- }, $aVals);
- }));
-
-
- $oDbStub->method('getAll')
- ->will($this->returnCallback(function ($sql) {
- $aResults = array();
- if (preg_match('/1051/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => '1051',
- 'class' => 'place',
- 'type' => 'house'
- ));
- }
- if (preg_match('/hauptstr/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'hauptstr',
- 'class' => 'place',
- 'type' => 'street',
- 'operator' => true
- ));
- }
- if (preg_match('/64286/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => '64286',
- 'word' => '64286',
- 'class' => 'place',
- 'type' => 'postcode'
- ));
- }
- if (preg_match('/darmstadt/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'darmstadt',
- 'count' => 533
- ));
- }
- if (preg_match('/alemagne/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'alemagne',
- 'country_code' => 'de',
- ));
- }
- if (preg_match('/mexico/', $sql)) {
- $aResults[] = $this->wordResult(array(
- 'word_id' => 999,
- 'word_token' => 'mexico',
- 'country_code' => 'mx',
- ));
- }
- return $aResults;
- }));
-
- $aCountryCodes = array('de', 'fr');
- $sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico';
- $aTokens = explode(' ', $sNormQuery);
-
- $TL = new TokenList;
- $TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
- $this->assertEquals(5, $TL->count());
-
- $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
- $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
- $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
- $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
- $this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
- }
}
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+class Tokenizer
+{
+ private $oDB;
+
+ public function __construct(&$oDB)
+ {
+ $this->oDB =& $oDB;
+ }
+
+ public function checkStatus()
+ {
+ }
+}