]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
put a heavy penalty on full word searches in address
[nominatim.git] / lib / Geocode.php
index 5343873fb9d59bdd8987954f7f20692bc0866be3..cce85f2b31b8f78c982fe9f9674355d3a40a95e7 100644 (file)
@@ -245,7 +245,6 @@ class Geocode
         }
 
         $this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType);
-        $this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon'));
         $this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false));
     }
 
@@ -348,10 +347,7 @@ class Geocode
             $aNewPhraseSearches = array();
             $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
 
-            foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) {
-                // Too many permutations - too expensive
-                if ($iWordSet > 120) break;
-
+            foreach ($oPhrase->getWordSets() as $aWordset) {
                 $aWordsetSearches = $aSearches;
 
                 // Add all words from this wordset
@@ -527,8 +523,8 @@ class Geocode
         $sNormQuery = $this->normTerm($this->sQuery);
         Debug::printVar('Normalized query', $sNormQuery);
 
-        $sLanguagePrefArraySQL = getArraySQL(
-            array_map('getDBQuoted', $this->aLangPrefOrder)
+        $sLanguagePrefArraySQL = $this->oDB->getArraySQL(
+            $this->oDB->getDBQuotedList($this->aLangPrefOrder)
         );
 
         $sQuery = $this->sQuery;
@@ -581,8 +577,9 @@ class Geocode
 
             if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
                 $sSpecialTerm = pg_escape_string($sSpecialTerm);
-                $sToken = chksql(
-                    $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"),
+                $sToken = $this->oDB->getOne(
+                    'SELECT make_standard_name(:term)',
+                    array(':term' => $sSpecialTerm),
                     'Cannot decode query. Wrong encoding?'
                 );
                 $sSQL = 'SELECT class, type FROM word ';
@@ -590,7 +587,7 @@ class Geocode
                 $sSQL .= '   AND class is not null AND class not in (\'place\')';
 
                 Debug::printSQL($sSQL);
-                $aSearchWords = chksql($this->oDB->getAll($sSQL));
+                $aSearchWords = $this->oDB->getAll($sSQL);
                 $aNewSearches = array();
                 foreach ($aSearches as $oSearch) {
                     foreach ($aSearchWords as $aSearchTerm) {
@@ -628,8 +625,9 @@ class Geocode
             $aTokens = array();
             $aPhrases = array();
             foreach ($aInPhrases as $iPhrase => $sPhrase) {
-                $sPhrase = chksql(
-                    $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'),
+                $sPhrase = $this->oDB->getOne(
+                    'SELECT make_standard_name(:phrase)',
+                    array(':phrase' => $sPhrase),
                     'Cannot normalize query string (is it a UTF-8 string?)'
                 );
                 if (trim($sPhrase)) {
@@ -639,18 +637,11 @@ class Geocode
                 }
             }
 
-            Debug::printDebugTable('Phrases', $aPhrases);
             Debug::printVar('Tokens', $aTokens);
 
             $oValidTokens = new TokenList();
 
             if (!empty($aTokens)) {
-                $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
-                $sSQL .= ' FROM word ';
-                $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
-
-                Debug::printSQL($sSQL);
-
                 $oValidTokens->addTokensFromDB(
                     $this->oDB,
                     $aTokens,
@@ -684,6 +675,11 @@ class Geocode
 
                 Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
 
+                foreach ($aPhrases as $oPhrase) {
+                    $oPhrase->computeWordSets($oValidTokens);
+                }
+                Debug::printDebugTable('Phrases', $aPhrases);
+
                 Debug::newSection('Search candidates');
 
                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
@@ -758,13 +754,22 @@ class Geocode
                         $oValidTokens->debugTokenByWordIdList()
                     );
 
-                    $aResults += $oSearch->query(
+                    $aNewResults = $oSearch->query(
                         $this->oDB,
                         $this->iMinAddressRank,
                         $this->iMaxAddressRank,
                         $this->iLimit
                     );
 
+                    // The same result may appear in different rounds, only
+                    // use the one with minimal rank.
+                    foreach ($aNewResults as $iPlace => $oRes) {
+                        if (!isset($aResults[$iPlace])
+                            || $aResults[$iPlace]->iResultRank > $oRes->iResultRank) {
+                            $aResults[$iPlace] = $oRes;
+                        }
+                    }
+
                     if ($iQueryLoop > 20) break;
                 }
 
@@ -796,9 +801,7 @@ class Geocode
                         $sSQL .= 'WHERE place_id in ('.$sPlaceIds.') ';
                         $sSQL .= '  AND (';
                         $sSQL .= "         placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
-                        if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) {
-                            $sSQL .= "     OR (extratags->'place') = 'city'";
-                        }
+                        $sSQL .= "         OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank ";
                         if ($this->aAddressRankList) {
                             $sSQL .= '     OR placex.rank_address in ('.join(',', $this->aAddressRankList).')';
                         }
@@ -821,7 +824,7 @@ class Geocode
                     if ($aFilterSql) {
                         $sSQL = join(' UNION ', $aFilterSql);
                         Debug::printSQL($sSQL);
-                        $aFilteredIDs = chksql($this->oDB->getCol($sSQL));
+                        $aFilteredIDs = $this->oDB->getCol($sSQL);
                     }
 
                     $tempIDs = array();
@@ -878,7 +881,6 @@ class Geocode
 
         $aSearchResults = $this->oPlaceLookup->lookup($aResults);
 
-        $aClassType = ClassTypes\getListWithImportance();
         $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
         foreach ($aRecheckWords as $i => $sWord) {
             if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
@@ -887,33 +889,23 @@ class Geocode
         Debug::printVar('Recheck words', $aRecheckWords);
 
         foreach ($aSearchResults as $iIdx => $aResult) {
-            // Default
-            $fDiameter = ClassTypes\getProperty($aResult, 'defdiameter', 0.0001);
+            $fRadius = ClassTypes\getDefRadius($aResult);
 
-            $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2);
+            $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius);
             if ($aOutlineResult) {
                 $aResult = array_merge($aResult, $aOutlineResult);
             }
 
-            if ($aResult['extra_place'] == 'city') {
-                $aResult['class'] = 'place';
-                $aResult['type'] = 'city';
-                $aResult['rank_search'] = 16;
-            }
-
             // Is there an icon set for this type of result?
-            $aClassInfo = ClassTypes\getInfo($aResult);
-
-            if ($aClassInfo) {
-                if (isset($aClassInfo['icon'])) {
-                    $aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassInfo['icon'].'.p.20.png';
-                }
-
-                if (isset($aClassInfo['label'])) {
-                    $aResult['label'] = $aClassInfo['label'];
-                }
+            $sIcon = ClassTypes\getIconFile($aResult);
+            if (isset($sIcon)) {
+                $aResult['icon'] = $sIcon;
             }
 
+            $sLabel = ClassTypes\getLabel($aResult);
+            if (isset($sLabel)) {
+                $aResult['label'] = $sLabel;
+            }
             $aResult['name'] = $aResult['langaddress'];
 
             if ($oCtx->hasNearPoint()) {
@@ -925,6 +917,26 @@ class Geocode
                     $aResult['lon'],
                     $aResult['lat']
                 );
+
+                // secondary ordering (for results with same importance (the smaller the better):
+                // - approximate importance of address parts
+                if (isset($aResult['addressimportance']) && $aResult['addressimportance']) {
+                    $aResult['foundorder'] = -$aResult['addressimportance']/10;
+                } else {
+                    $aResult['foundorder'] = -$aResult['importance'];
+                }
+                // - number of exact matches from the query
+                $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
+                // - importance of the class/type
+                $iClassImportance = ClassTypes\getImportance($aResult);
+                if (isset($iClassImportance)) {
+                    $aResult['foundorder'] += 0.0001 * $iClassImportance;
+                } else {
+                    $aResult['foundorder'] += 0.01;
+                }
+                // - rank
+                $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
+
                 // Adjust importance for the number of exact string matches in the result
                 $iCountWords = 0;
                 $sAddress = $aResult['langaddress'];
@@ -935,21 +947,8 @@ class Geocode
                     }
                 }
 
-                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
-
-                // secondary ordering (for results with same importance (the smaller the better):
-                // - approximate importance of address parts
-                $aResult['foundorder'] = -$aResult['addressimportance']/10;
-                // - number of exact matches from the query
-                $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
-                // - importance of the class/type
-                if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance'])
-                    && $aClassType[$aResult['class'].':'.$aResult['type']]['importance']
-                ) {
-                    $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance'];
-                } else {
-                    $aResult['foundorder'] += 0.01;
-                }
+                // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
+                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1);
             }
             $aSearchResults[$iIdx] = $aResult;
         }