X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/25baaf530dea28b7cf3c72ba4e83f3ebed05629f..19ab0387244254800a83a0b587b7f4fa20a0a7e7:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index b9e26f03..12f9da37 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -2,12 +2,12 @@ namespace Nominatim; -require_once(CONST_BasePath.'/lib/PlaceLookup.php'); -require_once(CONST_BasePath.'/lib/Phrase.php'); -require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); -require_once(CONST_BasePath.'/lib/SearchDescription.php'); -require_once(CONST_BasePath.'/lib/SearchContext.php'); -require_once(CONST_BasePath.'/lib/TokenList.php'); +require_once(CONST_LibDir.'/PlaceLookup.php'); +require_once(CONST_LibDir.'/Phrase.php'); +require_once(CONST_LibDir.'/ReverseGeocode.php'); +require_once(CONST_LibDir.'/SearchDescription.php'); +require_once(CONST_LibDir.'/SearchContext.php'); +require_once(CONST_LibDir.'/TokenList.php'); class Geocode { @@ -18,7 +18,7 @@ class Geocode protected $aLangPrefOrder = array(); protected $aExcludePlaceIDs = array(); - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -149,6 +149,10 @@ class Geocode private function viewboxImportanceFactor($fX, $fY) { + if (!$this->aViewBox) { + return 1; + } + $fWidth = ($this->aViewBox[2] - $this->aViewBox[0])/2; $fHeight = ($this->aViewBox[3] - $this->aViewBox[1])/2; @@ -241,7 +245,6 @@ class Geocode } $this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType); - $this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon')); $this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false)); } @@ -344,10 +347,7 @@ class Geocode $aNewPhraseSearches = array(); $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : ''; - foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) { - // Too many permutations - too expensive - if ($iWordSet > 120) break; - + foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset @@ -523,8 +523,8 @@ class Geocode $sNormQuery = $this->normTerm($this->sQuery); Debug::printVar('Normalized query', $sNormQuery); - $sLanguagePrefArraySQL = getArraySQL( - array_map('getDBQuoted', $this->aLangPrefOrder) + $sLanguagePrefArraySQL = $this->oDB->getArraySQL( + $this->oDB->getDBQuotedList($this->aLangPrefOrder) ); $sQuery = $this->sQuery; @@ -542,7 +542,6 @@ class Geocode // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); - $aResults = array(); if ($sQuery || $this->aStructuredQuery) { // Start with a single blank search $aSearches = array(new SearchDescription($oCtx)); @@ -578,8 +577,9 @@ class Geocode if ($sSpecialTerm && !$aSearches[0]->hasOperator()) { $sSpecialTerm = pg_escape_string($sSpecialTerm); - $sToken = chksql( - $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"), + $sToken = $this->oDB->getOne( + 'SELECT make_standard_name(:term)', + array(':term' => $sSpecialTerm), 'Cannot decode query. Wrong encoding?' ); $sSQL = 'SELECT class, type FROM word '; @@ -587,7 +587,7 @@ class Geocode $sSQL .= ' AND class is not null AND class not in (\'place\')'; Debug::printSQL($sSQL); - $aSearchWords = chksql($this->oDB->getAll($sSQL)); + $aSearchWords = $this->oDB->getAll($sSQL); $aNewSearches = array(); foreach ($aSearches as $oSearch) { foreach ($aSearchWords as $aSearchTerm) { @@ -625,8 +625,9 @@ class Geocode $aTokens = array(); $aPhrases = array(); foreach ($aInPhrases as $iPhrase => $sPhrase) { - $sPhrase = chksql( - $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'), + $sPhrase = $this->oDB->getOne( + 'SELECT make_standard_name(:phrase)', + array(':phrase' => $sPhrase), 'Cannot normalize query string (is it a UTF-8 string?)' ); if (trim($sPhrase)) { @@ -636,18 +637,11 @@ class Geocode } } - Debug::printDebugTable('Phrases', $aPhrases); Debug::printVar('Tokens', $aTokens); $oValidTokens = new TokenList(); if (!empty($aTokens)) { - $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; - $sSQL .= ' FROM word '; - $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')'; - - Debug::printSQL($sSQL); - $oValidTokens->addTokensFromDB( $this->oDB, $aTokens, @@ -656,6 +650,8 @@ class Geocode $this->oNormalizer ); + $oCtx->setFullNameWords($oValidTokens->getFullWordIDs()); + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { @@ -681,6 +677,11 @@ class Geocode Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo()); + foreach ($aPhrases as $oPhrase) { + $oPhrase->computeWordSets($oValidTokens); + } + Debug::printDebugTable('Phrases', $aPhrases); + Debug::newSection('Search candidates'); $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases); @@ -742,8 +743,10 @@ class Geocode // Start the search process $iGroupLoop = 0; $iQueryLoop = 0; + $aNextResults = array(); foreach ($aGroupedSearches as $iGroupedRank => $aSearches) { $iGroupLoop++; + $aResults = $aNextResults; foreach ($aSearches as $oSearch) { $iQueryLoop++; @@ -753,16 +756,42 @@ class Geocode $oValidTokens->debugTokenByWordIdList() ); - $aResults += $oSearch->query( + $aNewResults = $oSearch->query( $this->oDB, $this->iMinAddressRank, $this->iMaxAddressRank, $this->iLimit ); + // The same result may appear in different rounds, only + // use the one with minimal rank. + foreach ($aNewResults as $iPlace => $oRes) { + if (!isset($aResults[$iPlace]) + || $aResults[$iPlace]->iResultRank > $oRes->iResultRank) { + $aResults[$iPlace] = $oRes; + } + } + if ($iQueryLoop > 20) break; } + if (!empty($aResults)) { + $aSplitResults = Result::splitResults($aResults); + Debug::printVar('Split results', $aSplitResults); + if ($iGroupLoop <= 4 && empty($aSplitResults['tail']) + && reset($aSplitResults['head'])->iResultRank > 0) { + // Haven't found an exact match for the query yet. + // Therefore add result from the next group level. + $aNextResults = $aSplitResults['head']; + foreach ($aNextResults as $oRes) { + $oRes->iResultRank--; + } + $aResults = array(); + } else { + $aResults = $aSplitResults['head']; + } + } + if (!empty($aResults) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) { // Need to verify passes rank limits before dropping out of the loop (yuk!) // reduces the number of place ids, like a filter @@ -774,9 +803,7 @@ class Geocode $sSQL .= 'WHERE place_id in ('.$sPlaceIds.') '; $sSQL .= ' AND ('; $sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; - if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) { - $sSQL .= " OR (extratags->'place') = 'city'"; - } + $sSQL .= " OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank "; if ($this->aAddressRankList) { $sSQL .= ' OR placex.rank_address in ('.join(',', $this->aAddressRankList).')'; } @@ -799,7 +826,7 @@ class Geocode if ($aFilterSql) { $sSQL = join(' UNION ', $aFilterSql); Debug::printSQL($sSQL); - $aFilteredIDs = chksql($this->oDB->getCol($sSQL)); + $aFilteredIDs = $this->oDB->getCol($sSQL); } $tempIDs = array(); @@ -856,7 +883,6 @@ class Geocode $aSearchResults = $this->oPlaceLookup->lookup($aResults); - $aClassType = ClassTypes\getListWithImportance(); $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); @@ -865,33 +891,23 @@ class Geocode Debug::printVar('Recheck words', $aRecheckWords); foreach ($aSearchResults as $iIdx => $aResult) { - // Default - $fDiameter = ClassTypes\getProperty($aResult, 'defdiameter', 0.0001); + $fRadius = ClassTypes\getDefRadius($aResult); - $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2); + $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius); if ($aOutlineResult) { $aResult = array_merge($aResult, $aOutlineResult); } - if ($aResult['extra_place'] == 'city') { - $aResult['class'] = 'place'; - $aResult['type'] = 'city'; - $aResult['rank_search'] = 16; - } - // Is there an icon set for this type of result? - $aClassInfo = ClassTypes\getInfo($aResult); - - if ($aClassInfo) { - if (isset($aClassInfo['icon'])) { - $aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassInfo['icon'].'.p.20.png'; - } - - if (isset($aClassInfo['label'])) { - $aResult['label'] = $aClassInfo['label']; - } + $sIcon = ClassTypes\getIconFile($aResult); + if (isset($sIcon)) { + $aResult['icon'] = $sIcon; } + $sLabel = ClassTypes\getLabel($aResult); + if (isset($sLabel)) { + $aResult['label'] = $sLabel; + } $aResult['name'] = $aResult['langaddress']; if ($oCtx->hasNearPoint()) { @@ -903,6 +919,26 @@ class Geocode $aResult['lon'], $aResult['lat'] ); + + // secondary ordering (for results with same importance (the smaller the better): + // - approximate importance of address parts + if (isset($aResult['addressimportance']) && $aResult['addressimportance']) { + $aResult['foundorder'] = -$aResult['addressimportance']/10; + } else { + $aResult['foundorder'] = -$aResult['importance']; + } + // - number of exact matches from the query + $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; + // - importance of the class/type + $iClassImportance = ClassTypes\getImportance($aResult); + if (isset($iClassImportance)) { + $aResult['foundorder'] += 0.0001 * $iClassImportance; + } else { + $aResult['foundorder'] += 0.01; + } + // - rank + $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']); + // Adjust importance for the number of exact string matches in the result $iCountWords = 0; $sAddress = $aResult['langaddress']; @@ -913,21 +949,8 @@ class Geocode } } - $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right - - // secondary ordering (for results with same importance (the smaller the better): - // - approximate importance of address parts - $aResult['foundorder'] = -$aResult['addressimportance']/10; - // - number of exact matches from the query - $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches; - // - importance of the class/type - if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['importance'] - ) { - $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; - } else { - $aResult['foundorder'] += 0.01; - } + // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right + $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); } $aSearchResults[$iIdx] = $aResult; }