X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b66004c3c554c87886afa170f9065be249a3d9d4..c7eb4e2c7373d64b440b3acf2186545e6a593a59:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 1c84f14b..cce85f2b 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -7,6 +7,7 @@ require_once(CONST_BasePath.'/lib/Phrase.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); require_once(CONST_BasePath.'/lib/SearchDescription.php'); require_once(CONST_BasePath.'/lib/SearchContext.php'); +require_once(CONST_BasePath.'/lib/TokenList.php'); class Geocode { @@ -16,8 +17,6 @@ class Geocode protected $aLangPrefOrder = array(); - protected $bIncludeAddressDetails = false; - protected $aExcludePlaceIDs = array(); protected $bReverseInPlan = true; @@ -86,7 +85,6 @@ class Geocode $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs); } - if ($this->bIncludeAddressDetails) $aParams['addressdetails'] = '1'; if ($this->bBoundedSearch) $aParams['bounded'] = '1'; if ($this->aCountryCodes) { @@ -151,6 +149,10 @@ class Geocode private function viewboxImportanceFactor($fX, $fY) { + if (!$this->aViewBox) { + return 1; + } + $fWidth = ($this->aViewBox[2] - $this->aViewBox[0])/2; $fHeight = ($this->aViewBox[3] - $this->aViewBox[1])/2; @@ -182,9 +184,6 @@ class Geocode public function loadParamArray($oParams, $sForceGeometryType = null) { - $this->bIncludeAddressDetails - = $oParams->getBool('addressdetails', $this->bIncludeAddressDetails); - $this->bBoundedSearch = $oParams->getBool('bounded', $this->bBoundedSearch); $this->setLimit($oParams->getInt('limit', $this->iFinalLimit)); @@ -246,8 +245,7 @@ class Geocode } $this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType); - $this->oPlaceLookup->setIncludeAddressDetails(false); - $this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon')); + $this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false)); } public function setQueryFromParams($oParams) @@ -332,10 +330,10 @@ class Geocode return false; } - public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured) + public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured) { /* - Calculate all searches using aValidTokens i.e. + Calculate all searches using oValidTokens i.e. 'Wodsworth Road, Sheffield' => Phrase Wordset @@ -349,10 +347,7 @@ class Geocode $aNewPhraseSearches = array(); $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : ''; - foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) { - // Too many permutations - too expensive - if ($iWordSet > 120) break; - + foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset @@ -365,38 +360,37 @@ class Geocode //var_dump($oCurrentSearch); //echo ""; - // If the token is valid - if (isset($aValidTokens[' '.$sToken])) { - foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $aSearchTerm, - isset($aValidTokens[$sToken]) - && strpos($sToken, ' ') === false, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } + // Tokens with full name matches. + foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { + $aNewSearches = $oCurrentSearch->extendWithFullTerm( + $oSearchTerm, + $oValidTokens->contains($sToken) + && strpos($sToken, ' ') === false, + $sPhraseType, + $iToken == 0 && $iPhrase == 0, + $iPhrase == 0, + $iToken + 1 == count($aWordset) + && $iPhrase + 1 == count($aPhrases) + ); + + foreach ($aNewSearches as $oSearch) { + if ($oSearch->getRank() < $this->iMaxRank) { + $aNewWordsetSearches[] = $oSearch; } } } // Look for partial matches. // Note that there is no point in adding country terms here // because country is omitted in the address. - if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') { + if ($sPhraseType != 'country') { // Allow searching for a word - but at extra cost - foreach ($aValidTokens[$sToken] as $aSearchTerm) { + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $aSearchTerm, + $sToken, + $oSearchTerm, $bIsStructured, $iPhrase, - isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array() + $oValidTokens->get(' '.$sToken) ); foreach ($aNewSearches as $oSearch) { @@ -529,8 +523,8 @@ class Geocode $sNormQuery = $this->normTerm($this->sQuery); Debug::printVar('Normalized query', $sNormQuery); - $sLanguagePrefArraySQL = getArraySQL( - array_map('getDBQuoted', $this->aLangPrefOrder) + $sLanguagePrefArraySQL = $this->oDB->getArraySQL( + $this->oDB->getDBQuotedList($this->aLangPrefOrder) ); $sQuery = $this->sQuery; @@ -548,7 +542,6 @@ class Geocode // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); - $aResults = array(); if ($sQuery || $this->aStructuredQuery) { // Start with a single blank search $aSearches = array(new SearchDescription($oCtx)); @@ -584,8 +577,9 @@ class Geocode if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { $sSpecialTerm = pg_escape_string($sSpecialTerm); - $sToken = chksql( - $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"), + $sToken = $this->oDB->getOne( + 'SELECT make_standard_name(:term)', + array(':term' => $sSpecialTerm), 'Cannot decode query. Wrong encoding?' ); $sSQL = 'SELECT class, type FROM word '; @@ -593,7 +587,7 @@ class Geocode $sSQL .= ' AND class is not null AND class not in (\'place\')'; Debug::printSQL($sSQL); - $aSearchWords = chksql($this->oDB->getAll($sSQL)); + $aSearchWords = $this->oDB->getAll($sSQL); $aNewSearches = array(); foreach ($aSearches as $oSearch) { foreach ($aSearchWords as $aSearchTerm) { @@ -631,8 +625,9 @@ class Geocode $aTokens = array(); $aPhrases = array(); foreach ($aInPhrases as $iPhrase => $sPhrase) { - $sPhrase = chksql( - $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'), + $sPhrase = $this->oDB->getOne( + 'SELECT make_standard_name(:phrase)', + array(':phrase' => $sPhrase), 'Cannot normalize query string (is it a UTF-8 string?)' ); if (trim($sPhrase)) { @@ -642,78 +637,52 @@ class Geocode } } - Debug::printDebugTable('Phrases', $aPhrases); Debug::printVar('Tokens', $aTokens); - if (!empty($aTokens)) { - // Check which tokens we have, get the ID numbers - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; - $sSQL .= ' FROM word '; - $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')'; - - Debug::printSQL($sSQL); + $oValidTokens = new TokenList(); - $aValidTokens = array(); - $aDatabaseWords = chksql( - $this->oDB->getAll($sSQL), - 'Could not get word tokens.' + if (!empty($aTokens)) { + $oValidTokens->addTokensFromDB( + $this->oDB, + $aTokens, + $this->aCountryCodes, + $sNormQuery, + $this->oNormalizer ); - $aWordFrequencyScores = array(); - foreach ($aDatabaseWords as $aToken) { - // Filter country tokens that do not match restricted countries. - if ($this->aCountryCodes - && $aToken['country_code'] - && !in_array($aToken['country_code'], $this->aCountryCodes) - ) { - continue; - } - - // Special terms need to appear in their normalized form. - if ($aToken['word'] && $aToken['class']) { - $sNormWord = $this->normTerm($aToken['word']); - if (strpos($sNormQuery, $sNormWord) === false) { - continue; - } - } - - if (isset($aValidTokens[$aToken['word_token']])) { - $aValidTokens[$aToken['word_token']][] = $aToken; - } else { - $aValidTokens[$aToken['word_token']] = array($aToken); - } - $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1; - } - // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { - if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - if (isset($aValidTokens[$aData[1]])) { - foreach ($aValidTokens[$aData[1]] as $aToken) { - if (!$aToken['class']) { - if (isset($aValidTokens[$sToken])) { - $aValidTokens[$sToken][] = $aToken; - } else { - $aValidTokens[$sToken] = array($aToken); - } - } - } + if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + // US ZIP+4 codes - merge in the 5-digit ZIP code + $oValidTokens->addToken( + $sToken, + new Token\Postcode(null, $aData[1], 'us') + ); + } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + // Unknown single word token with a number. + // Assume it is a house number. + $oValidTokens->addToken( + $sToken, + new Token\HouseNumber(null, trim($sToken)) + ); } } } - foreach ($aTokens as $sToken) { - // Unknown single word token with a number - assume it is a house number - if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) { - $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken)); - } - } - Debug::printGroupTable('Valid Tokens', $aValidTokens); - // Any words that have failed completely? // TODO: suggestions + + Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo()); + + foreach ($aPhrases as $oPhrase) { + $oPhrase->computeWordSets($oValidTokens); + } + Debug::printDebugTable('Phrases', $aPhrases); + Debug::newSection('Search candidates'); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@ -724,7 +693,7 @@ class Geocode if (count($aPhrases) > 1) { $aPhrases[count($aPhrases)-1]->invertWordSets(); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@ -764,32 +733,63 @@ class Geocode } } - if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); + Debug::printGroupedSearch( + $aGroupedSearches, + $oValidTokens->debugTokenByWordIdList() + ); // Start the search process $iGroupLoop = 0; $iQueryLoop = 0; + $aNextResults = array(); foreach ($aGroupedSearches as $iGroupedRank => $aSearches) { $iGroupLoop++; + $aResults = $aNextResults; foreach ($aSearches as $oSearch) { $iQueryLoop++; - if (CONST_Debug) { - echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; - _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens); - } + Debug::newSection("Search Loop, group $iGroupLoop, loop $iQueryLoop"); + Debug::printGroupedSearch( + array($iGroupedRank => array($oSearch)), + $oValidTokens->debugTokenByWordIdList() + ); - $aResults += $oSearch->query( + $aNewResults = $oSearch->query( $this->oDB, - $aWordFrequencyScores, $this->iMinAddressRank, $this->iMaxAddressRank, $this->iLimit ); + // The same result may appear in different rounds, only + // use the one with minimal rank. + foreach ($aNewResults as $iPlace => $oRes) { + if (!isset($aResults[$iPlace]) + || $aResults[$iPlace]->iResultRank > $oRes->iResultRank) { + $aResults[$iPlace] = $oRes; + } + } + if ($iQueryLoop > 20) break; } + if (!empty($aResults)) { + $aSplitResults = Result::splitResults($aResults); + Debug::printVar('Split results', $aSplitResults); + if ($iGroupLoop <= 4 && empty($aSplitResults['tail']) + && reset($aSplitResults['head'])->iResultRank > 0) { + // Haven't found an exact match for the query yet. + // Therefore add result from the next group level. + $aNextResults = $aSplitResults['head']; + foreach ($aNextResults as $oRes) { + $oRes->iResultRank--; + } + $aResults = array(); + } else { + $aResults = $aSplitResults['head']; + } + } + if (!empty($aResults) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) { // Need to verify passes rank limits before dropping out of the loop (yuk!) // reduces the number of place ids, like a filter @@ -801,9 +801,7 @@ class Geocode $sSQL .= 'WHERE place_id in ('.$sPlaceIds.') '; $sSQL .= ' AND ('; $sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; - if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) { - $sSQL .= " OR (extratags->'place') = 'city'"; - } + $sSQL .= " OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank "; if ($this->aAddressRankList) { $sSQL .= ' OR placex.rank_address in ('.join(',', $this->aAddressRankList).')'; } @@ -826,7 +824,7 @@ class Geocode if ($aFilterSql) { $sSQL = join(' UNION ', $aFilterSql); Debug::printSQL($sSQL); - $aFilteredIDs = chksql($this->oDB->getCol($sSQL)); + $aFilteredIDs = $this->oDB->getCol($sSQL); } $tempIDs = array(); @@ -883,7 +881,6 @@ class Geocode $aSearchResults = $this->oPlaceLookup->lookup($aResults); - $aClassType = getClassTypesWithImportance(); $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); @@ -892,45 +889,23 @@ class Geocode Debug::printVar('Recheck words', $aRecheckWords); foreach ($aSearchResults as $iIdx => $aResult) { - // Default - $fDiameter = getResultDiameter($aResult); + $fRadius = ClassTypes\getDefRadius($aResult); - $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2); + $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius); if ($aOutlineResult) { $aResult = array_merge($aResult, $aOutlineResult); } - if ($aResult['extra_place'] == 'city') { - $aResult['class'] = 'place'; - $aResult['type'] = 'city'; - $aResult['rank_search'] = 16; - } - // Is there an icon set for this type of result? - if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['icon']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['icon'] - ) { - $aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassType[$aResult['class'].':'.$aResult['type']]['icon'].'.p.20.png'; + $sIcon = ClassTypes\getIconFile($aResult); + if (isset($sIcon)) { + $aResult['icon'] = $sIcon; } - if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['label']) - && $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['label'] - ) { - $aResult['label'] = $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['label']; - } elseif (isset($aClassType[$aResult['class'].':'.$aResult['type']]['label']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['label'] - ) { - $aResult['label'] = $aClassType[$aResult['class'].':'.$aResult['type']]['label']; + $sLabel = ClassTypes\getLabel($aResult); + if (isset($sLabel)) { + $aResult['label'] = $sLabel; } - // if tag '&addressdetails=1' is set in query - if ($this->bIncludeAddressDetails) { - // getAddressDetails() is defined in lib.php and uses the SQL function get_addressdata in functions.sql - $aResult['address'] = getAddressDetails($this->oDB, $sLanguagePrefArraySQL, $aResult['place_id'], $aResult['country_code'], $aResults[$aResult['place_id']]->iHouseNumber); - if ($aResult['extra_place'] == 'city' && !isset($aResult['address']['city'])) { - $aResult['address'] = array_merge(array('city' => array_values($aResult['address'])[0]), $aResult['address']); - } - } - $aResult['name'] = $aResult['langaddress']; if ($oCtx->hasNearPoint()) { @@ -942,6 +917,26 @@ class Geocode $aResult['lon'], $aResult['lat'] ); + + // secondary ordering (for results with same importance (the smaller the better): + // - approximate importance of address parts + if (isset($aResult['addressimportance']) && $aResult['addressimportance']) { + $aResult['foundorder'] = -$aResult['addressimportance']/10; + } else { + $aResult['foundorder'] = -$aResult['importance']; + } + // - number of exact matches from the query + $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; + // - importance of the class/type + $iClassImportance = ClassTypes\getImportance($aResult); + if (isset($iClassImportance)) { + $aResult['foundorder'] += 0.0001 * $iClassImportance; + } else { + $aResult['foundorder'] += 0.01; + } + // - rank + $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']); + // Adjust importance for the number of exact string matches in the result $iCountWords = 0; $sAddress = $aResult['langaddress']; @@ -952,21 +947,8 @@ class Geocode } } - $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right - - // secondary ordering (for results with same importance (the smaller the better): - // - approximate importance of address parts - $aResult['foundorder'] = -$aResult['addressimportance']/10; - // - number of exact matches from the query - $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; - // - importance of the class/type - if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['importance'] - ) { - $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; - } else { - $aResult['foundorder'] += 0.01; - } + // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right + $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); } $aSearchResults[$iIdx] = $aResult; } @@ -1009,7 +991,6 @@ class Geocode 'Query' => $this->sQuery, 'Structured query' => $this->aStructuredQuery, 'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder), - 'Include address' => $this->bIncludeAddressDetails, 'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs), 'Try reversed query'=> $this->bReverseInPlan, 'Limit (for searches)' => $this->iLimit,