From: Sarah Hoffmann Date: Sun, 6 May 2018 20:10:38 +0000 (+0200) Subject: replace word frequency hash X-Git-Tag: v3.2.0~74^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/115792d1dbfab0594806d600cbd5e2a0d1de8c37 replace word frequency hash The word frequency hash was only used to determine if the name of a SearchDescription is rare. Do this already when building the SearchDescription (when the word frequency is still available) and get gid of the extra hash. --- diff --git a/lib/Geocode.php b/lib/Geocode.php index 612c1a0f..68a9a7cb 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -658,7 +658,6 @@ class Geocode $this->oDB->getAll($sSQL), 'Could not get word tokens.' ); - $aWordFrequencyScores = array(); foreach ($aDatabaseWords as $aToken) { // Filter country tokens that do not match restricted countries. if ($this->aCountryCodes @@ -681,7 +680,6 @@ class Geocode } else { $aValidTokens[$aToken['word_token']] = array($aToken); } - $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1; } // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code @@ -781,7 +779,6 @@ class Geocode $aResults += $oSearch->query( $this->oDB, - $aWordFrequencyScores, $this->iMinAddressRank, $this->iMaxAddressRank, $this->iLimit diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 6345f50f..07eccec4 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -17,6 +17,8 @@ class SearchDescription private $sCountryCode = ''; /// List of word ids making up the name of the object. private $aName = array(); + /// True if the name is rare enough to force index use on name. + private $bRareName = false; /// List of word ids making up the address of the object. private $aAddress = array(); /// Subset of word ids of full words making up the address. @@ -292,6 +294,11 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aName = array($iWordID => $iWordID); + if (CONST_Search_NameOnlySearchFrequencyThreshold) { + $oSearch->bRareName = + $aSearchTerm['search_name_count'] + 1 + < CONST_Search_NameOnlySearchFrequencyThreshold; + } $aNewSearches[] = $oSearch; } } @@ -368,6 +375,13 @@ class SearchDescription $oSearch->iSearchRank += 2; } if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { + if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) { + $oSearch->bRareName = + $aSearchTerm['search_name_count'] + 1 + < CONST_Search_NameOnlySearchFrequencyThreshold; + } else { + $oSearch->bRareName = false; + } $oSearch->aName[$iWordID] = $iWordID; } else { $oSearch->aNameNonSearch[$iWordID] = $iWordID; @@ -385,20 +399,16 @@ class SearchDescription /** * Query database for places that match this search. * - * @param object $oDB Database connection to use. - * @param mixed[] $aWordFrequencyScores Number of times tokens appears - * overall in a planet database. - * @param integer $iMinRank Minimum address rank to restrict - * search to. - * @param integer $iMaxRank Maximum address rank to restrict - * search to. - * @param integer $iLimit Maximum number of results. + * @param object $oDB Database connection to use. + * @param integer $iMinRank Minimum address rank to restrict search to. + * @param integer $iMaxRank Maximum address rank to restrict search to. + * @param integer $iLimit Maximum number of results. * * @return mixed[] An array with two fields: IDs contains the list of * matching place IDs and houseNumber the houseNumber * if appicable or -1 if not. */ - public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit) + public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit) { $aResults = array(); $iHousenumber = -1; @@ -427,7 +437,6 @@ class SearchDescription // First search for places according to name and address. $aResults = $this->queryNamedPlace( $oDB, - $aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit @@ -579,7 +588,7 @@ class SearchDescription return $aResults; } - private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) + private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); @@ -615,11 +624,7 @@ class SearchDescription } if (!empty($this->aAddress)) { // For infrequent name terms disable index usage for address - if (CONST_Search_NameOnlySearchFrequencyThreshold - && count($this->aName) == 1 - && $aWordFrequencyScores[$this->aName[reset($this->aName)]] - < CONST_Search_NameOnlySearchFrequencyThreshold - ) { + if ($this->bRareName) { $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress); } else { $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);