From a48ebd9b477318bc5fdb44d7dc6bbf695911a4b9 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sat, 17 Jul 2021 20:24:33 +0200 Subject: [PATCH] move SearchDescription building into tokens Moving the logic for extending the SearchDescription into the token classes splits up the code and makes it more readable. More importantly: it allows tokenizer to define custom token classes in the future. --- lib-php/Geocode.php | 4 +- lib-php/SearchDescription.php | 362 +++++++++++----------------------- lib-php/TokenCountry.php | 35 +++- lib-php/TokenHousenumber.php | 72 ++++++- lib-php/TokenList.php | 8 +- lib-php/TokenPartial.php | 82 +++++++- lib-php/TokenPostcode.php | 60 +++++- lib-php/TokenSpecialTerm.php | 49 +++++ lib-php/TokenWord.php | 62 +++++- 9 files changed, 464 insertions(+), 270 deletions(-) diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 001c1e1e..734f4069 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -362,8 +362,8 @@ class Geocode foreach ($aWordsetSearches as $oCurrentSearch) { foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithSearchTerm( - $oSearchTerm, + $aNewSearches = $oSearchTerm->extendSearch( + $oCurrentSearch, $oPosition ); diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index b4a78eb8..4886462a 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -67,35 +67,6 @@ class SearchDescription return $this->iSearchRank; } - /** - * Make this search a POI search. - * - * In a POI search, objects are not (only) searched by their name - * but also by the primary OSM key/value pair (class and type in Nominatim). - * - * @param integer $iOperator Type of POI search - * @param string $sClass Class (or OSM tag key) of POI. - * @param string $sType Type (or OSM tag value) of POI. - * - * @return void - */ - public function setPoiSearch($iOperator, $sClass, $sType) - { - $this->iOperator = $iOperator; - $this->sClass = $sClass; - $this->sType = $sType; - } - - /** - * Check if any operator is set. - * - * @return bool True, if this is a special search operation. - */ - public function hasOperator() - { - return $this->iOperator != Operator::NONE; - } - /** * Extract key/value pairs from a query. * @@ -147,244 +118,137 @@ class SearchDescription } /////////// Search building functions + public function clone($iTermCost) + { + $oSearch = clone $this; + $oSearch->iSearchRank += $iTermCost; + return $oSearch; + } - /** - * Derive new searches by adding a full term to the existing search. - * - * @param object $oSearchTerm Description of the token. - * @param object $oPosition Description of the token position within - the query. - * - * @return SearchDescription[] List of derived search descriptions. - */ - public function extendWithSearchTerm($oSearchTerm, $oPosition) + public function hasName($bIncludeNonNames = false) { - $aNewSearches = array(); + return !empty($this->aName) + || (!empty($this->aNameNonSearch) && $bIncludeNonNames); + } - if ($oPosition->maybePhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Country') - ) { - if (!$this->sCountryCode) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->sCountryCode = $oSearchTerm->sCountryCode; - // Country is almost always at the end of the string - // - increase score for finding it anywhere else (optimisation) - if (!$oPosition->isLastToken()) { - $oSearch->iSearchRank += 5; - $oSearch->iNamePhrase = -1; - } - $aNewSearches[] = $oSearch; - } - } elseif ($oPosition->maybePhrase('postalcode') - && is_a($oSearchTerm, '\Nominatim\Token\Postcode') - ) { - if (!$this->sPostcode) { - // If we have structured search or this is the first term, - // make the postcode the primary search element. - if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iOperator = Operator::POSTCODE; - $oSearch->aAddress = array_merge($this->aAddress, $this->aName); - $oSearch->aName = - array($oSearchTerm->iId => $oSearchTerm->sPostcode); - $aNewSearches[] = $oSearch; - } + public function hasAddress() + { + return !empty($this->aAddress) || !empty($this->aAddressNonSearch); + } - // If we have a structured search or this is not the first term, - // add the postcode as an addendum. - if ($this->iOperator != Operator::POSTCODE - && ($oPosition->isPhrase('postalcode') || !empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iNamePhrase = -1; - if (strlen($oSearchTerm->sPostcode) < 4) { - $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); - } - $oSearch->sPostcode = $oSearchTerm->sPostcode; - $aNewSearches[] = $oSearch; - } - } - } elseif ($oPosition->maybePhrase('street') - && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') - ) { - if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { - // sanity check: if the housenumber is not mainly made - // up of numbers, add a penalty - $iSearchCost = 1; - if (preg_match('/\\d/', $oSearchTerm->sToken) === 0 - || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) { - $iSearchCost++; - } - if ($this->iOperator != Operator::NONE) { - $iSearchCost++; - } - if (empty($oSearchTerm->iId)) { - $iSearchCost++; - } - // also must not appear in the middle of the address - if (!empty($this->aAddress) - || (!empty($this->aAddressNonSearch)) - || $this->sPostcode - ) { - $iSearchCost++; - } + public function hasCountry() + { + return $this->sCountryCode !== ''; + } - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->iNamePhrase = -1; - $oSearch->sHouseNumber = $oSearchTerm->sToken; - $aNewSearches[] = $oSearch; - - // Housenumbers may appear in the name when the place has its own - // address terms. - if ($oSearchTerm->iId !== null - && ($this->iNamePhrase >= 0 || empty($this->aName)) - && empty($this->aAddress) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->aAddress = $this->aName; - $oSearch->bRareName = false; - $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId); - $aNewSearches[] = $oSearch; - } - } - } elseif ($oPosition->isPhrase('') - && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') - ) { - if ($this->iOperator == Operator::NONE) { - $oSearch = clone $this; - $oSearch->iSearchRank += 2; - $oSearch->iNamePhrase = -1; - - $iOp = $oSearchTerm->iOperator; - if ($iOp == Operator::NONE) { - if (!empty($this->aName) || $this->oContext->isBoundedSearch()) { - $iOp = Operator::NAME; - } else { - $iOp = Operator::NEAR; - } - $oSearch->iSearchRank += 2; - } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) { - $oSearch->iSearchRank += 2; - } - if ($this->sHouseNumber) { - $oSearch->iSearchRank++; - } + public function hasPostcode() + { + return $this->sPostcode !== ''; + } - $oSearch->setPoiSearch( - $iOp, - $oSearchTerm->sClass, - $oSearchTerm->sType - ); - $aNewSearches[] = $oSearch; - } - } elseif (!$oPosition->isPhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Word') - ) { - $iWordID = $oSearchTerm->iId; - // Full words can only be a name if they appear at the beginning - // of the phrase. In structured search the name must forcably in - // the first phrase. In unstructured search it may be in a later - // phrase when the first phrase is a house number. - if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) { - if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) - && $oSearchTerm->iTermCount > 1 - ) { - $oSearch = clone $this; - $oSearch->iNamePhrase = -1; - $oSearch->iSearchRank += 1; - $oSearch->aAddress[$iWordID] = $iWordID; - $aNewSearches[] = $oSearch; - } - } elseif (empty($this->aNameNonSearch)) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->aName = array($iWordID => $iWordID); - if (CONST_Search_NameOnlySearchFrequencyThreshold) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } - $aNewSearches[] = $oSearch; - } - } elseif (!$oPosition->isPhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Partial') - ) { - $aNewSearches = $this->extendWithPartialTerm( - $oSearchTerm, - $oPosition - ); + public function hasHousenumber() + { + return $this->sHouseNumber !== ''; + } + + public function hasOperator($iOperator = null) + { + return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator; + } + + public function addAddressToken($iId, $bSearchable = true) + { + if ($bSearchable) { + $this->aAddress[$iId] = $iId; + } else { + $this->aAddressNonSearch[$iId] = $iId; + } + } + + public function addNameToken($iId) + { + $this->aName[$iId] = $iId; + } + + public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber) + { + if ($bSearchable) { + $this->aName[$iId] = $iId; + } else { + $this->aNameNonSearch[$iId] = $iId; } + $this->iNamePhrase = $iPhraseNumber; + } + + public function markRareName() + { + $this->bRareName = true; + } + + public function setCountry($sCountryCode) + { + $this->sCountryCode = $sCountryCode; + $this->iNamePhrase = -1; + } + + public function setPostcode($sPostcode) + { + $this->sPostcode = $sPostcode; + $this->iNamePhrase = -1; + } + + public function setPostcodeAsName($iId, $sPostcode) + { + $this->iOperator = Operator::POSTCODE; + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->aName = array($iId => $sPostcode); + $this->bRareName = true; + $this->iNamePhrase = -1; + } + + public function setHousenumber($sNumber) + { + $this->sHouseNumber = $sNumber; + $this->iNamePhrase = -1; + } - return $aNewSearches; + public function setHousenumberAsName($iId) + { + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->bRareName = false; + $this->aName = array($iId => $iId); + $this->iNamePhrase = -1; } /** - * Derive new searches by adding a partial term to the existing search. + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). * - * @param object $oSearchTerm Description of the token. - * @param object $oPosition Description of the token position within - the query. + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. * - * @return SearchDescription[] List of derived search descriptions. + * @return void */ - private function extendWithPartialTerm($oSearchTerm, $oPosition) + public function setPoiSearch($iOperator, $sClass, $sType) { - $aNewSearches = array(); - $iWordID = $oSearchTerm->iId; - - if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) - && (!empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - $oSearch->aAddress[$iWordID] = $iWordID; - } else { - $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - } - $aNewSearches[] = $oSearch; - } + $this->iOperator = $iOperator; + $this->sClass = $sClass; + $this->sType = $sType; + $this->iNamePhrase = -1; + } - if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && ((empty($this->aName) && empty($this->aNameNonSearch)) - || $this->iNamePhrase == $oPosition->getPhrase()) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (empty($this->aName) && empty($this->aNameNonSearch)) { - $oSearch->iSearchRank++; - } - if (preg_match('#^[0-9 ]+$#', $oSearchTerm->sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - if (empty($this->aName) - && CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } else { - $oSearch->bRareName = false; - } - $oSearch->aName[$iWordID] = $iWordID; - } else { - $oSearch->aNameNonSearch[$iWordID] = $iWordID; - } - $oSearch->iNamePhrase = $oPosition->getPhrase(); - $aNewSearches[] = $oSearch; - } + public function getNamePhrase() + { + return $this->iNamePhrase; + } - return $aNewSearches; + public function getContext() + { + return $this->oContext; } /////////// Query functions diff --git a/lib-php/TokenCountry.php b/lib-php/TokenCountry.php index 518c0a31..917ed9d2 100644 --- a/lib-php/TokenCountry.php +++ b/lib-php/TokenCountry.php @@ -8,9 +8,9 @@ namespace Nominatim\Token; class Country { /// Database word id, if available. - public $iId; + private $iId; /// Two-letter country code (lower-cased). - public $sCountryCode; + private $sCountryCode; public function __construct($iId, $sCountryCode) { @@ -18,6 +18,32 @@ class Country $this->sCountryCode = $sCountryCode; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + if ($oSearch->hasCountry() || !$oPosition->maybePhrase('country')) { + return array(); + } + + $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6); + $oNewSearch->setCountry($this->sCountryCode); + + return array($oNewSearch); + } + public function debugInfo() { return array( @@ -26,4 +52,9 @@ class Country 'Info' => $this->sCountryCode ); } + + public function debugCode() + { + return 'C'; + } } diff --git a/lib-php/TokenHousenumber.php b/lib-php/TokenHousenumber.php index 5c7c6e9b..0cc67a12 100644 --- a/lib-php/TokenHousenumber.php +++ b/lib-php/TokenHousenumber.php @@ -8,9 +8,9 @@ namespace Nominatim\Token; class HouseNumber { /// Database word id, if available. - public $iId; + private $iId; /// Normalized house number. - public $sToken; + private $sToken; public function __construct($iId, $sToken) { @@ -18,6 +18,69 @@ class HouseNumber $this->sToken = $sToken; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $aNewSearches = array(); + + if ($oSearch->hasHousenumber() + || $oSearch->hasOperator(\Nominatim\Operator::POSTCODE) + || !$oPosition->maybePhrase('street') + ) { + return $aNewSearches; + } + + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + $iSearchCost = 1; + if (preg_match('/\\d/', $this->sToken) === 0 + || preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) { + $iSearchCost++; + } + if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) { + $iSearchCost++; + } + if (empty($this->iId)) { + $iSearchCost++; + } + // also must not appear in the middle of the address + if ($oSearch->hasAddress() || $oSearch->hasPostcode()) { + $iSearchCost++; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setHousenumber($this->sToken); + $aNewSearches[] = $oNewSearch; + + // Housenumbers may appear in the name when the place has its own + // address terms. + if ($this->iId !== null + && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName()) + && !$oSearch->hasAddress() + ) { + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setHousenumberAsName($this->iId); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + + public function debugInfo() { return array( @@ -26,4 +89,9 @@ class HouseNumber 'Info' => array('nr' => $this->sToken) ); } + + public function debugCode() + { + return 'H'; + } } diff --git a/lib-php/TokenList.php b/lib-php/TokenList.php index bc8f9c3f..a599648c 100644 --- a/lib-php/TokenList.php +++ b/lib-php/TokenList.php @@ -79,7 +79,7 @@ class TokenList foreach ($this->aTokens as $aTokenList) { foreach ($aTokenList as $oToken) { if (is_a($oToken, '\Nominatim\Token\Word')) { - $ids[$oToken->iId] = $oToken->iId; + $ids[$oToken->getId()] = $oToken->getId(); } } } @@ -109,9 +109,9 @@ class TokenList $aWordsIDs = array(); foreach ($this->aTokens as $sToken => $aWords) { foreach ($aWords as $aToken) { - if ($aToken->iId !== null) { - $aWordsIDs[$aToken->iId] = - '#'.$sToken.'('.$aToken->iId.')#'; + $iId = $aToken->getId(); + if ($iId !== null) { + $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#'; } } } diff --git a/lib-php/TokenPartial.php b/lib-php/TokenPartial.php index 99a75947..e52161cc 100644 --- a/lib-php/TokenPartial.php +++ b/lib-php/TokenPartial.php @@ -8,19 +8,86 @@ namespace Nominatim\Token; class Partial { /// Database word id, if applicable. - public $iId; + private $iId; /// Number of appearances in the database. - public $iSearchNameCount; - /// Normalised version of the partial word. - public $sToken; + private $iSearchNameCount; + /// True, if the token consists exclusively of digits and spaces. + private $bNumberToken; public function __construct($iId, $sToken, $iSearchNameCount) { $this->iId = $iId; - $this->sToken = $sToken; + $this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken); $this->iSearchNameCount = $iSearchNameCount; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + if ($oPosition->isPhrase('country')) { + return array(); + } + + $aNewSearches = array(); + + // Partial token in Address. + if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) + && $oSearch->hasName() + ) { + $iSearchCost = $this->bNumberToken ? 2 : 1; + if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) { + $iSearchCost += 1; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->addAddressToken( + $this->iId, + $this->iSearchNameCount < CONST_Max_Word_Frequency + ); + + $aNewSearches[] = $oNewSearch; + } + + // Partial token in Name. + if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress()) + && (!$oSearch->hasName(true) + || $oSearch->getNamePhrase() == $oPosition->getPhrase()) + ) { + $iSearchCost = 1; + if (!$oSearch->hasName(true)) { + $iSearchCost += 1; + } + if ($this->bNumberToken) { + $iSearchCost += 1; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->addPartialNameToken( + $this->iId, + $this->iSearchNameCount < CONST_Max_Word_Frequency, + $oPosition->getPhrase() + ); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + + public function debugInfo() { return array( @@ -31,4 +98,9 @@ class Partial ) ); } + + public function debugCode() + { + return 'w'; + } } diff --git a/lib-php/TokenPostcode.php b/lib-php/TokenPostcode.php index 8fa2ae80..563fe7fa 100644 --- a/lib-php/TokenPostcode.php +++ b/lib-php/TokenPostcode.php @@ -8,11 +8,11 @@ namespace Nominatim\Token; class Postcode { /// Database word id, if available. - public $iId; + private $iId; /// Full nomralized postcode (upper cased). - public $sPostcode; + private $sPostcode; // Optional country code the postcode belongs to (currently unused). - public $sCountryCode; + private $sCountryCode; public function __construct($iId, $sPostcode, $sCountryCode = '') { @@ -21,6 +21,55 @@ class Postcode $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $aNewSearches = array(); + + if ($oSearch->hasPostcode() || !$oPosition->maybePhrase('postalcode')) { + return $aNewSearches; + } + + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode); + + $aNewSearches[] = $oNewSearch; + } + + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE) + && ($oPosition->isPhrase('postalcode') || $oSearch->hasName()) + ) { + $iPenalty = 1; + if (strlen($this->sPostcode) < 4) { + $iPenalty += 4 - strlen($this->sPostcode); + } + $oNewSearch = $oSearch->clone($iPenalty); + $oNewSearch->setPostcode($this->sPostcode); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + public function debugInfo() { return array( @@ -29,4 +78,9 @@ class Postcode 'Info' => $this->sPostcode.'('.$this->sCountryCode.')' ); } + + public function debugCode() + { + return 'P'; + } } diff --git a/lib-php/TokenSpecialTerm.php b/lib-php/TokenSpecialTerm.php index b2c312ec..89dfa026 100644 --- a/lib-php/TokenSpecialTerm.php +++ b/lib-php/TokenSpecialTerm.php @@ -26,6 +26,50 @@ class SpecialTerm $this->iOperator = $iOperator; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + if ($oSearch->hasOperator() || !$oPosition->isPhrase('')) { + return array(); + } + + $iSearchCost = 2; + + $iOp = $this->iOperator; + if ($iOp == \Nominatim\Operator::NONE) { + if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) { + $iOp = \Nominatim\Operator::NAME; + } else { + $iOp = \Nominatim\Operator::NEAR; + } + $iSearchCost += 2; + } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) { + $iSearchCost += 2; + } + if ($oSearch->hasHousenumber()) { + $iSearchCost ++; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType); + + return array($oNewSearch); + } + + public function debugInfo() { return array( @@ -38,4 +82,9 @@ class SpecialTerm ) ); } + + public function debugCode() + { + return 'S'; + } } diff --git a/lib-php/TokenWord.php b/lib-php/TokenWord.php index 6de58422..7c653f8f 100644 --- a/lib-php/TokenWord.php +++ b/lib-php/TokenWord.php @@ -8,11 +8,11 @@ namespace Nominatim\Token; class Word { /// Database word id, if applicable. - public $iId; + private $iId; /// Number of appearances in the database. - public $iSearchNameCount; + private $iSearchNameCount; /// Number of terms in the word. - public $iTermCount; + private $iTermCount; public function __construct($iId, $iSearchNameCount, $iTermCount) { @@ -21,6 +21,57 @@ class Word $this->iTermCount = $iTermCount; } + public function getId() + { + return $this->iId; + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + if ($oPosition->isPhrase('country')) { + return array(); + } + + // Full words can only be a name if they appear at the beginning + // of the phrase. In structured search the name must forcably in + // the first phrase. In unstructured search it may be in a later + // phrase when the first phrase is a house number. + if ($oSearch->hasName() + || !($oPosition->isFirstPhrase() || $oPosition->isPhrase('')) + ) { + if ($this->iTermCount > 1 + && ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) + ) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->addAddressToken($this->iId); + + return array($oNewSearch); + } + } elseif (!$oSearch->hasName(true)) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->addNameToken($this->iId); + if (CONST_Search_NameOnlySearchFrequencyThreshold + && $this->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold + ) { + $oNewSearch->markRareName(); + } + + return array($oNewSearch); + } + + return array(); + } + public function debugInfo() { return array( @@ -32,4 +83,9 @@ class Word ) ); } + + public function debugCode() + { + return 'W'; + } } -- 2.39.5