From: Sarah Hoffmann Date: Thu, 29 Jul 2021 19:25:59 +0000 (+0200) Subject: remove country restriction from tokenizer X-Git-Tag: v4.0.0~41^2~4 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/0fb8eade136ea03e7853aca0795ca69833c33661 remove country restriction from tokenizer Restricting tokens due to the search context is better done in the generic search part instead of repeating the same test in every tokenizer implementation. --- diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 52b92c99..0f76a9c4 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -498,7 +498,6 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } - $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); diff --git a/lib-php/SearchContext.php b/lib-php/SearchContext.php index 8316a012..3b512ecb 100644 --- a/lib-php/SearchContext.php +++ b/lib-php/SearchContext.php @@ -28,6 +28,8 @@ class SearchContext public $sqlViewboxLarge = ''; /// Reference along a route (as SQL). public $sqlViewboxCentre = ''; + /// List of countries to restrict search to (as array). + public $aCountryList = null; /// List of countries to restrict search to (as SQL). public $sqlCountryList = ''; /// List of place IDs to exclude (as SQL). @@ -187,6 +189,7 @@ class SearchContext public function setCountryList($aCountries) { $this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')'; + $this->aCountryList = $aCountries; } /** @@ -279,6 +282,19 @@ class SearchContext return ''; } + /** + * Check if the given country is covered by the search context. + * + * @param string $sCountryCode Country code of the country to check. + * + * @return True, if no country code restrictions are set or the + * country is included in the country list. + */ + public function isCountryApplicable($sCountryCode) + { + return $this->aCountryList === null || in_array($sCountryCode, $this->aCountryList); + } + public function debugInfo() { return array( diff --git a/lib-php/TokenCountry.php b/lib-php/TokenCountry.php index c9b7b6af..ab84c388 100644 --- a/lib-php/TokenCountry.php +++ b/lib-php/TokenCountry.php @@ -36,7 +36,9 @@ class Country */ public function isExtendable($oSearch, $oPosition) { - return !$oSearch->hasCountry() && $oPosition->maybePhrase('country'); + return !$oSearch->hasCountry() + && $oPosition->maybePhrase('country') + && $oSearch->getContext()->isCountryApplicable($this->sCountryCode); } /** diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php index 4e297954..690ef136 100644 --- a/lib-php/tokenizer/legacy_icu_tokenizer.php +++ b/lib-php/tokenizer/legacy_icu_tokenizer.php @@ -8,7 +8,6 @@ class Tokenizer private $oNormalizer; private $oTransliterator; - private $aCountryRestriction; public function __construct(&$oDB) { @@ -30,12 +29,6 @@ class Tokenizer } - public function setCountryRestriction($aCountries) - { - $this->aCountryRestriction = $aCountries; - } - - public function normalizeString($sTerm) { if ($this->oNormalizer === null) { @@ -162,10 +155,7 @@ class Tokenizer switch ($aWord['type']) { case 'C': // country name tokens - if ($aWord['word'] !== null - && (!$this->aCountryRestriction - || in_array($aWord['word'], $this->aCountryRestriction)) - ) { + if ($aWord['word'] !== null) { $oValidTokens->addToken( $sTok, new Token\Country($iId, $aWord['word']) diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index 570b8828..6760057d 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -7,7 +7,6 @@ class Tokenizer private $oDB; private $oNormalizer = null; - private $aCountryRestriction = null; public function __construct(&$oDB) { @@ -37,12 +36,6 @@ class Tokenizer } - public function setCountryRestriction($aCountries) - { - $this->aCountryRestriction = $aCountries; - } - - public function normalizeString($sTerm) { if ($this->oNormalizer === null) { @@ -206,12 +199,7 @@ class Tokenizer ); } } elseif ($aWord['country_code']) { - // Filter country tokens that do not match restricted countries. - if (!$this->aCountryRestriction - || in_array($aWord['country_code'], $this->aCountryRestriction) - ) { - $oToken = new Token\Country($iId, $aWord['country_code']); - } + $oToken = new Token\Country($iId, $aWord['country_code']); } elseif ($aWord['word_token'][0] == ' ') { $oToken = new Token\Word( $iId,