X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3da4c9c38433da5eaa4f1071ffaf887da5c88017..77b76ae51bc4751c02e58678e9003fa9838abcf1:/lib/Geocode.php?ds=sidebyside diff --git a/lib/Geocode.php b/lib/Geocode.php index 16919bb8..faf9e770 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -3,6 +3,7 @@ namespace Nominatim; require_once(CONST_BasePath.'/lib/PlaceLookup.php'); +require_once(CONST_BasePath.'/lib/Phrase.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); require_once(CONST_BasePath.'/lib/SearchDescription.php'); require_once(CONST_BasePath.'/lib/SearchContext.php'); @@ -668,7 +669,7 @@ class Geocode return $aSearchResults; } - public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery) + public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured, $sNormQuery) { /* Calculate all searches using aValidTokens i.e. @@ -683,15 +684,11 @@ class Geocode */ $iGlobalRank = 0; - foreach ($aPhrases as $iPhrase => $aPhrase) { + foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - if ($bStructuredPhrases) { - $sPhraseType = $aPhraseTypes[$iPhrase]; - } else { - $sPhraseType = ''; - } + $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : ''; - foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { + foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -746,9 +743,8 @@ class Geocode foreach ($aValidTokens[$sToken] as $aSearchTerm) { $aNewSearches = $oCurrentSearch->extendWithPartialTerm( $aSearchTerm, - $bStructuredPhrases, + $bIsStructured, $iPhrase, - $aWordFrequencyScores, isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array() ); @@ -806,7 +802,7 @@ class Geocode // Revisit searches, drop bad searches and give penalty to unlikely combinations. $aGroupedSearches = array(); foreach ($aSearches as $oSearch) { - if (!$oSearch->isValidSearch($this->aCountryCodes)) { + if (!$oSearch->isValidSearch()) { continue; } @@ -955,10 +951,10 @@ class Geocode // Split query into phrases // Commas are used to reduce the search space by indicating where phrases split if ($this->aStructuredQuery) { - $aPhrases = $this->aStructuredQuery; + $aInPhrases = $this->aStructuredQuery; $bStructuredPhrases = true; } else { - $aPhrases = explode(',', $sQuery); + $aInPhrases = explode(',', $sQuery); $bStructuredPhrases = false; } @@ -967,25 +963,19 @@ class Geocode // Get all 'sets' of words // Generate a complete list of all $aTokens = array(); - foreach ($aPhrases as $iPhrase => $sPhrase) { - $aPhrase = chksql( - $this->oDB->getRow("SELECT make_standard_name('".pg_escape_string($sPhrase)."') as string"), + $aPhrases = array(); + foreach ($aInPhrases as $iPhrase => $sPhrase) { + $sPhrase = chksql( + $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'), "Cannot normalize query string (is it a UTF-8 string?)" ); - if (trim($aPhrase['string'])) { - $aPhrases[$iPhrase] = $aPhrase; - $aPhrases[$iPhrase]['words'] = explode(' ', $aPhrases[$iPhrase]['string']); - $aPhrases[$iPhrase]['wordsets'] = getWordSets($aPhrases[$iPhrase]['words'], 0); - $aTokens = array_merge($aTokens, getTokensFromSets($aPhrases[$iPhrase]['wordsets'])); - } else { - unset($aPhrases[$iPhrase]); + if (trim($sPhrase)) { + $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : ''); + $oPhrase->addTokens($aTokens); + $aPhrases[] = $oPhrase; } } - // Reindex phrases - we make assumptions later on that they are numerically keyed in order - $aPhraseTypes = array_keys($aPhrases); - $aPhrases = array_values($aPhrases); - if (sizeof($aTokens)) { // Check which tokens we have, get the ID numbers $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; @@ -999,12 +989,12 @@ class Geocode $this->oDB->getAll($sSQL), "Could not get word tokens." ); - $aPossibleMainWordIDs = array(); $aWordFrequencyScores = array(); foreach ($aDatabaseWords as $aToken) { - // Very special case - require 2 letter country param to match the country code found - if ($bStructuredPhrases && $aToken['country_code'] && !empty($this->aStructuredQuery['country']) - && strlen($this->aStructuredQuery['country']) == 2 && strtolower($this->aStructuredQuery['country']) != $aToken['country_code'] + // Filter country tokens that do not match restricted countries. + if ($this->aCountryCodes + && $aToken['country_code'] + && !in_array($aToken['country_code'], $this->aCountryCodes) ) { continue; } @@ -1014,7 +1004,6 @@ class Geocode } else { $aValidTokens[$aToken['word_token']] = array($aToken); } - if (!$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1; $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1; } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); @@ -1046,19 +1035,18 @@ class Geocode // Any words that have failed completely? // TODO: suggestions - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in // the first and final phrase. Don't bother about phrases in the middle // because order in the address doesn't matter. $aPhrases = array_reverse($aPhrases); - $aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0); + $aPhrases[0]->invertWordSets(); if (sizeof($aPhrases) > 1) { - $aFinalPhrase = end($aPhrases); - $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); + $aPhrases[sizeof($aPhrases)-1]->invertWordSets(); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false, $sNormQuery); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) {