From 0fb8eade136ea03e7853aca0795ca69833c33661 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 29 Jul 2021 21:25:59 +0200 Subject: [PATCH] remove country restriction from tokenizer Restricting tokens due to the search context is better done in the generic search part instead of repeating the same test in every tokenizer implementation. --- lib-php/Geocode.php | 1 - lib-php/SearchContext.php | 16 ++++++++++++++++ lib-php/TokenCountry.php | 4 +++- lib-php/tokenizer/legacy_icu_tokenizer.php | 12 +----------- lib-php/tokenizer/legacy_tokenizer.php | 14 +------------- 5 files changed, 21 insertions(+), 26 deletions(-) diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 52b92c99..0f76a9c4 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -498,7 +498,6 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } - $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); diff --git a/lib-php/SearchContext.php b/lib-php/SearchContext.php index 8316a012..3b512ecb 100644 --- a/lib-php/SearchContext.php +++ b/lib-php/SearchContext.php @@ -28,6 +28,8 @@ class SearchContext public $sqlViewboxLarge = ''; /// Reference along a route (as SQL). public $sqlViewboxCentre = ''; + /// List of countries to restrict search to (as array). + public $aCountryList = null; /// List of countries to restrict search to (as SQL). public $sqlCountryList = ''; /// List of place IDs to exclude (as SQL). @@ -187,6 +189,7 @@ class SearchContext public function setCountryList($aCountries) { $this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')'; + $this->aCountryList = $aCountries; } /** @@ -279,6 +282,19 @@ class SearchContext return ''; } + /** + * Check if the given country is covered by the search context. + * + * @param string $sCountryCode Country code of the country to check. + * + * @return True, if no country code restrictions are set or the + * country is included in the country list. + */ + public function isCountryApplicable($sCountryCode) + { + return $this->aCountryList === null || in_array($sCountryCode, $this->aCountryList); + } + public function debugInfo() { return array( diff --git a/lib-php/TokenCountry.php b/lib-php/TokenCountry.php index c9b7b6af..ab84c388 100644 --- a/lib-php/TokenCountry.php +++ b/lib-php/TokenCountry.php @@ -36,7 +36,9 @@ class Country */ public function isExtendable($oSearch, $oPosition) { - return !$oSearch->hasCountry() && $oPosition->maybePhrase('country'); + return !$oSearch->hasCountry() + && $oPosition->maybePhrase('country') + && $oSearch->getContext()->isCountryApplicable($this->sCountryCode); } /** diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php index 4e297954..690ef136 100644 --- a/lib-php/tokenizer/legacy_icu_tokenizer.php +++ b/lib-php/tokenizer/legacy_icu_tokenizer.php @@ -8,7 +8,6 @@ class Tokenizer private $oNormalizer; private $oTransliterator; - private $aCountryRestriction; public function __construct(&$oDB) { @@ -30,12 +29,6 @@ class Tokenizer } - public function setCountryRestriction($aCountries) - { - $this->aCountryRestriction = $aCountries; - } - - public function normalizeString($sTerm) { if ($this->oNormalizer === null) { @@ -162,10 +155,7 @@ class Tokenizer switch ($aWord['type']) { case 'C': // country name tokens - if ($aWord['word'] !== null - && (!$this->aCountryRestriction - || in_array($aWord['word'], $this->aCountryRestriction)) - ) { + if ($aWord['word'] !== null) { $oValidTokens->addToken( $sTok, new Token\Country($iId, $aWord['word']) diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index 570b8828..6760057d 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -7,7 +7,6 @@ class Tokenizer private $oDB; private $oNormalizer = null; - private $aCountryRestriction = null; public function __construct(&$oDB) { @@ -37,12 +36,6 @@ class Tokenizer } - public function setCountryRestriction($aCountries) - { - $this->aCountryRestriction = $aCountries; - } - - public function normalizeString($sTerm) { if ($this->oNormalizer === null) { @@ -206,12 +199,7 @@ class Tokenizer ); } } elseif ($aWord['country_code']) { - // Filter country tokens that do not match restricted countries. - if (!$this->aCountryRestriction - || in_array($aWord['country_code'], $this->aCountryRestriction) - ) { - $oToken = new Token\Country($iId, $aWord['country_code']); - } + $oToken = new Token\Country($iId, $aWord['country_code']); } elseif ($aWord['word_token'][0] == ' ') { $oToken = new Token\Word( $iId, -- 2.39.5