X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/9908c93d4cec91b5109d4fbc83dde18b5076350f..301fd7f7e8d97755849e26427c12eed070f21b5f:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index e2fd7272..12f9da37 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -2,12 +2,12 @@ namespace Nominatim; -require_once(CONST_BasePath.'/lib/PlaceLookup.php'); -require_once(CONST_BasePath.'/lib/Phrase.php'); -require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); -require_once(CONST_BasePath.'/lib/SearchDescription.php'); -require_once(CONST_BasePath.'/lib/SearchContext.php'); -require_once(CONST_BasePath.'/lib/TokenList.php'); +require_once(CONST_LibDir.'/PlaceLookup.php'); +require_once(CONST_LibDir.'/Phrase.php'); +require_once(CONST_LibDir.'/ReverseGeocode.php'); +require_once(CONST_LibDir.'/SearchDescription.php'); +require_once(CONST_LibDir.'/SearchContext.php'); +require_once(CONST_LibDir.'/TokenList.php'); class Geocode { @@ -18,7 +18,7 @@ class Geocode protected $aLangPrefOrder = array(); protected $aExcludePlaceIDs = array(); - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -245,7 +245,6 @@ class Geocode } $this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType); - $this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon')); $this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false)); } @@ -348,10 +347,7 @@ class Geocode $aNewPhraseSearches = array(); $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : ''; - foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) { - // Too many permutations - too expensive - if ($iWordSet > 120) break; - + foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset @@ -527,8 +523,8 @@ class Geocode $sNormQuery = $this->normTerm($this->sQuery); Debug::printVar('Normalized query', $sNormQuery); - $sLanguagePrefArraySQL = getArraySQL( - array_map('getDBQuoted', $this->aLangPrefOrder) + $sLanguagePrefArraySQL = $this->oDB->getArraySQL( + $this->oDB->getDBQuotedList($this->aLangPrefOrder) ); $sQuery = $this->sQuery; @@ -581,8 +577,9 @@ class Geocode if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { $sSpecialTerm = pg_escape_string($sSpecialTerm); - $sToken = chksql( - $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"), + $sToken = $this->oDB->getOne( + 'SELECT make_standard_name(:term)', + array(':term' => $sSpecialTerm), 'Cannot decode query. Wrong encoding?' ); $sSQL = 'SELECT class, type FROM word '; @@ -590,7 +587,7 @@ class Geocode $sSQL .= ' AND class is not null AND class not in (\'place\')'; Debug::printSQL($sSQL); - $aSearchWords = chksql($this->oDB->getAll($sSQL)); + $aSearchWords = $this->oDB->getAll($sSQL); $aNewSearches = array(); foreach ($aSearches as $oSearch) { foreach ($aSearchWords as $aSearchTerm) { @@ -628,8 +625,9 @@ class Geocode $aTokens = array(); $aPhrases = array(); foreach ($aInPhrases as $iPhrase => $sPhrase) { - $sPhrase = chksql( - $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'), + $sPhrase = $this->oDB->getOne( + 'SELECT make_standard_name(:phrase)', + array(':phrase' => $sPhrase), 'Cannot normalize query string (is it a UTF-8 string?)' ); if (trim($sPhrase)) { @@ -639,18 +637,11 @@ class Geocode } } - Debug::printDebugTable('Phrases', $aPhrases); Debug::printVar('Tokens', $aTokens); $oValidTokens = new TokenList(); if (!empty($aTokens)) { - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; - $sSQL .= ' FROM word '; - $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')'; - - Debug::printSQL($sSQL); - $oValidTokens->addTokensFromDB( $this->oDB, $aTokens, @@ -659,6 +650,8 @@ class Geocode $this->oNormalizer ); + $oCtx->setFullNameWords($oValidTokens->getFullWordIDs()); + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { @@ -684,6 +677,11 @@ class Geocode Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo()); + foreach ($aPhrases as $oPhrase) { + $oPhrase->computeWordSets($oValidTokens); + } + Debug::printDebugTable('Phrases', $aPhrases); + Debug::newSection('Search candidates'); $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases); @@ -758,13 +756,22 @@ class Geocode $oValidTokens->debugTokenByWordIdList() ); - $aResults += $oSearch->query( + $aNewResults = $oSearch->query( $this->oDB, $this->iMinAddressRank, $this->iMaxAddressRank, $this->iLimit ); + // The same result may appear in different rounds, only + // use the one with minimal rank. + foreach ($aNewResults as $iPlace => $oRes) { + if (!isset($aResults[$iPlace]) + || $aResults[$iPlace]->iResultRank > $oRes->iResultRank) { + $aResults[$iPlace] = $oRes; + } + } + if ($iQueryLoop > 20) break; } @@ -796,9 +803,7 @@ class Geocode $sSQL .= 'WHERE place_id in ('.$sPlaceIds.') '; $sSQL .= ' AND ('; $sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; - if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) { - $sSQL .= " OR (extratags->'place') = 'city'"; - } + $sSQL .= " OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank "; if ($this->aAddressRankList) { $sSQL .= ' OR placex.rank_address in ('.join(',', $this->aAddressRankList).')'; } @@ -821,7 +826,7 @@ class Geocode if ($aFilterSql) { $sSQL = join(' UNION ', $aFilterSql); Debug::printSQL($sSQL); - $aFilteredIDs = chksql($this->oDB->getCol($sSQL)); + $aFilteredIDs = $this->oDB->getCol($sSQL); } $tempIDs = array(); @@ -878,7 +883,6 @@ class Geocode $aSearchResults = $this->oPlaceLookup->lookup($aResults); - $aClassType = ClassTypes\getListWithImportance(); $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); @@ -887,33 +891,23 @@ class Geocode Debug::printVar('Recheck words', $aRecheckWords); foreach ($aSearchResults as $iIdx => $aResult) { - // Default - $fDiameter = ClassTypes\getProperty($aResult, 'defdiameter', 0.0001); + $fRadius = ClassTypes\getDefRadius($aResult); - $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2); + $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius); if ($aOutlineResult) { $aResult = array_merge($aResult, $aOutlineResult); } - if ($aResult['extra_place'] == 'city') { - $aResult['class'] = 'place'; - $aResult['type'] = 'city'; - $aResult['rank_search'] = 16; - } - // Is there an icon set for this type of result? - $aClassInfo = ClassTypes\getInfo($aResult); - - if ($aClassInfo) { - if (isset($aClassInfo['icon'])) { - $aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassInfo['icon'].'.p.20.png'; - } - - if (isset($aClassInfo['label'])) { - $aResult['label'] = $aClassInfo['label']; - } + $sIcon = ClassTypes\getIconFile($aResult); + if (isset($sIcon)) { + $aResult['icon'] = $sIcon; } + $sLabel = ClassTypes\getLabel($aResult); + if (isset($sLabel)) { + $aResult['label'] = $sLabel; + } $aResult['name'] = $aResult['langaddress']; if ($oCtx->hasNearPoint()) { @@ -925,6 +919,26 @@ class Geocode $aResult['lon'], $aResult['lat'] ); + + // secondary ordering (for results with same importance (the smaller the better): + // - approximate importance of address parts + if (isset($aResult['addressimportance']) && $aResult['addressimportance']) { + $aResult['foundorder'] = -$aResult['addressimportance']/10; + } else { + $aResult['foundorder'] = -$aResult['importance']; + } + // - number of exact matches from the query + $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; + // - importance of the class/type + $iClassImportance = ClassTypes\getImportance($aResult); + if (isset($iClassImportance)) { + $aResult['foundorder'] += 0.0001 * $iClassImportance; + } else { + $aResult['foundorder'] += 0.01; + } + // - rank + $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']); + // Adjust importance for the number of exact string matches in the result $iCountWords = 0; $sAddress = $aResult['langaddress']; @@ -935,21 +949,8 @@ class Geocode } } - $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right - - // secondary ordering (for results with same importance (the smaller the better): - // - approximate importance of address parts - $aResult['foundorder'] = -$aResult['addressimportance']/10; - // - number of exact matches from the query - $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; - // - importance of the class/type - if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['importance'] - ) { - $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; - } else { - $aResult['foundorder'] += 0.01; - } + // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right + $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); } $aSearchResults[$iIdx] = $aResult; }