]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
put a heavy penalty on full word searches in address
[nominatim.git] / lib / Geocode.php
index 684e7adb3d935c70eeea1d1a05ebfcc27dc2e84e..cce85f2b31b8f78c982fe9f9674355d3a40a95e7 100644 (file)
@@ -17,10 +17,8 @@ class Geocode
 
     protected $aLangPrefOrder = array();
 
-    protected $bIncludeAddressDetails = false;
-
     protected $aExcludePlaceIDs = array();
-    protected $bReverseInPlan = false;
+    protected $bReverseInPlan = true;
 
     protected $iLimit = 20;
     protected $iFinalLimit = 10;
@@ -87,7 +85,6 @@ class Geocode
             $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
         }
 
-        if ($this->bIncludeAddressDetails) $aParams['addressdetails'] = '1';
         if ($this->bBoundedSearch) $aParams['bounded'] = '1';
 
         if ($this->aCountryCodes) {
@@ -152,6 +149,10 @@ class Geocode
 
     private function viewboxImportanceFactor($fX, $fY)
     {
+        if (!$this->aViewBox) {
+            return 1;
+        }
+
         $fWidth = ($this->aViewBox[2] - $this->aViewBox[0])/2;
         $fHeight = ($this->aViewBox[3] - $this->aViewBox[1])/2;
 
@@ -183,9 +184,6 @@ class Geocode
 
     public function loadParamArray($oParams, $sForceGeometryType = null)
     {
-        $this->bIncludeAddressDetails
-         = $oParams->getBool('addressdetails', $this->bIncludeAddressDetails);
-
         $this->bBoundedSearch = $oParams->getBool('bounded', $this->bBoundedSearch);
 
         $this->setLimit($oParams->getInt('limit', $this->iFinalLimit));
@@ -247,14 +245,7 @@ class Geocode
         }
 
         $this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType);
-        $this->oPlaceLookup->setIncludeAddressDetails(false);
-        $this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon'));
-
-        if ($this->bIncludeAddressDetails
-            && $oParams->getString('format', '') == 'geocodejson'
-           ) {
-            $this->oPlaceLookup->setAddressAdminLevels(true);
-        }
+        $this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false));
     }
 
     public function setQueryFromParams($oParams)
@@ -356,10 +347,7 @@ class Geocode
             $aNewPhraseSearches = array();
             $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
 
-            foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) {
-                // Too many permutations - too expensive
-                if ($iWordSet > 120) break;
-
+            foreach ($oPhrase->getWordSets() as $aWordset) {
                 $aWordsetSearches = $aSearches;
 
                 // Add all words from this wordset
@@ -535,8 +523,8 @@ class Geocode
         $sNormQuery = $this->normTerm($this->sQuery);
         Debug::printVar('Normalized query', $sNormQuery);
 
-        $sLanguagePrefArraySQL = getArraySQL(
-            array_map('getDBQuoted', $this->aLangPrefOrder)
+        $sLanguagePrefArraySQL = $this->oDB->getArraySQL(
+            $this->oDB->getDBQuotedList($this->aLangPrefOrder)
         );
 
         $sQuery = $this->sQuery;
@@ -554,7 +542,6 @@ class Geocode
         // Do we have anything that looks like a lat/lon pair?
         $sQuery = $oCtx->setNearPointFromQuery($sQuery);
 
-        $aResults = array();
         if ($sQuery || $this->aStructuredQuery) {
             // Start with a single blank search
             $aSearches = array(new SearchDescription($oCtx));
@@ -590,8 +577,9 @@ class Geocode
 
             if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
                 $sSpecialTerm = pg_escape_string($sSpecialTerm);
-                $sToken = chksql(
-                    $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"),
+                $sToken = $this->oDB->getOne(
+                    'SELECT make_standard_name(:term)',
+                    array(':term' => $sSpecialTerm),
                     'Cannot decode query. Wrong encoding?'
                 );
                 $sSQL = 'SELECT class, type FROM word ';
@@ -599,7 +587,7 @@ class Geocode
                 $sSQL .= '   AND class is not null AND class not in (\'place\')';
 
                 Debug::printSQL($sSQL);
-                $aSearchWords = chksql($this->oDB->getAll($sSQL));
+                $aSearchWords = $this->oDB->getAll($sSQL);
                 $aNewSearches = array();
                 foreach ($aSearches as $oSearch) {
                     foreach ($aSearchWords as $aSearchTerm) {
@@ -637,8 +625,9 @@ class Geocode
             $aTokens = array();
             $aPhrases = array();
             foreach ($aInPhrases as $iPhrase => $sPhrase) {
-                $sPhrase = chksql(
-                    $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'),
+                $sPhrase = $this->oDB->getOne(
+                    'SELECT make_standard_name(:phrase)',
+                    array(':phrase' => $sPhrase),
                     'Cannot normalize query string (is it a UTF-8 string?)'
                 );
                 if (trim($sPhrase)) {
@@ -648,18 +637,11 @@ class Geocode
                 }
             }
 
-            Debug::printDebugTable('Phrases', $aPhrases);
             Debug::printVar('Tokens', $aTokens);
 
             $oValidTokens = new TokenList();
 
             if (!empty($aTokens)) {
-                $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
-                $sSQL .= ' FROM word ';
-                $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
-
-                Debug::printSQL($sSQL);
-
                 $oValidTokens->addTokensFromDB(
                     $this->oDB,
                     $aTokens,
@@ -693,6 +675,11 @@ class Geocode
 
                 Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
 
+                foreach ($aPhrases as $oPhrase) {
+                    $oPhrase->computeWordSets($oValidTokens);
+                }
+                Debug::printDebugTable('Phrases', $aPhrases);
+
                 Debug::newSection('Search candidates');
 
                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
@@ -754,8 +741,10 @@ class Geocode
             // Start the search process
             $iGroupLoop = 0;
             $iQueryLoop = 0;
+            $aNextResults = array();
             foreach ($aGroupedSearches as $iGroupedRank => $aSearches) {
                 $iGroupLoop++;
+                $aResults = $aNextResults;
                 foreach ($aSearches as $oSearch) {
                     $iQueryLoop++;
 
@@ -765,16 +754,42 @@ class Geocode
                         $oValidTokens->debugTokenByWordIdList()
                     );
 
-                    $aResults += $oSearch->query(
+                    $aNewResults = $oSearch->query(
                         $this->oDB,
                         $this->iMinAddressRank,
                         $this->iMaxAddressRank,
                         $this->iLimit
                     );
 
+                    // The same result may appear in different rounds, only
+                    // use the one with minimal rank.
+                    foreach ($aNewResults as $iPlace => $oRes) {
+                        if (!isset($aResults[$iPlace])
+                            || $aResults[$iPlace]->iResultRank > $oRes->iResultRank) {
+                            $aResults[$iPlace] = $oRes;
+                        }
+                    }
+
                     if ($iQueryLoop > 20) break;
                 }
 
+                if (!empty($aResults)) {
+                    $aSplitResults = Result::splitResults($aResults);
+                    Debug::printVar('Split results', $aSplitResults);
+                    if ($iGroupLoop <= 4 && empty($aSplitResults['tail'])
+                        && reset($aSplitResults['head'])->iResultRank > 0) {
+                        // Haven't found an exact match for the query yet.
+                        // Therefore add result from the next group level.
+                        $aNextResults = $aSplitResults['head'];
+                        foreach ($aNextResults as $oRes) {
+                            $oRes->iResultRank--;
+                        }
+                        $aResults = array();
+                    } else {
+                        $aResults = $aSplitResults['head'];
+                    }
+                }
+
                 if (!empty($aResults) && ($this->iMinAddressRank != 0 || $this->iMaxAddressRank != 30)) {
                     // Need to verify passes rank limits before dropping out of the loop (yuk!)
                     // reduces the number of place ids, like a filter
@@ -786,9 +801,7 @@ class Geocode
                         $sSQL .= 'WHERE place_id in ('.$sPlaceIds.') ';
                         $sSQL .= '  AND (';
                         $sSQL .= "         placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
-                        if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) {
-                            $sSQL .= "     OR (extratags->'place') = 'city'";
-                        }
+                        $sSQL .= "         OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank ";
                         if ($this->aAddressRankList) {
                             $sSQL .= '     OR placex.rank_address in ('.join(',', $this->aAddressRankList).')';
                         }
@@ -811,7 +824,7 @@ class Geocode
                     if ($aFilterSql) {
                         $sSQL = join(' UNION ', $aFilterSql);
                         Debug::printSQL($sSQL);
-                        $aFilteredIDs = chksql($this->oDB->getCol($sSQL));
+                        $aFilteredIDs = $this->oDB->getCol($sSQL);
                     }
 
                     $tempIDs = array();
@@ -868,7 +881,6 @@ class Geocode
 
         $aSearchResults = $this->oPlaceLookup->lookup($aResults);
 
-        $aClassType = ClassTypes\getListWithImportance();
         $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
         foreach ($aRecheckWords as $i => $sWord) {
             if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
@@ -877,41 +889,23 @@ class Geocode
         Debug::printVar('Recheck words', $aRecheckWords);
 
         foreach ($aSearchResults as $iIdx => $aResult) {
-            // Default
-            $fDiameter = ClassTypes\getProperty($aResult, 'defdiameter', 0.0001);
+            $fRadius = ClassTypes\getDefRadius($aResult);
 
-            $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2);
+            $aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius);
             if ($aOutlineResult) {
                 $aResult = array_merge($aResult, $aOutlineResult);
             }
 
-            if ($aResult['extra_place'] == 'city') {
-                $aResult['class'] = 'place';
-                $aResult['type'] = 'city';
-                $aResult['rank_search'] = 16;
-            }
-
             // Is there an icon set for this type of result?
-            $aClassInfo = ClassTypes\getInfo($aResult);
-
-            if ($aClassInfo) {
-                if (isset($aClassInfo['icon'])) {
-                    $aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassInfo['icon'].'.p.20.png';
-                }
-
-                if (isset($aClassInfo['label'])) {
-                    $aResult['label'] = $aClassInfo['label'];
-                }
-            }
-            // if tag '&addressdetails=1' is set in query
-            if ($this->bIncludeAddressDetails) {
-                // getAddressDetails() is defined in lib.php and uses the SQL function get_addressdata in functions.sql
-                $aResult['address'] = getAddressDetails($this->oDB, $sLanguagePrefArraySQL, $aResult['place_id'], $aResult['country_code'], $aResults[$aResult['place_id']]->iHouseNumber);
-                if ($aResult['extra_place'] == 'city' && !isset($aResult['address']['city'])) {
-                    $aResult['address'] = array_merge(array('city' => array_values($aResult['address'])[0]), $aResult['address']);
-                }
+            $sIcon = ClassTypes\getIconFile($aResult);
+            if (isset($sIcon)) {
+                $aResult['icon'] = $sIcon;
             }
 
+            $sLabel = ClassTypes\getLabel($aResult);
+            if (isset($sLabel)) {
+                $aResult['label'] = $sLabel;
+            }
             $aResult['name'] = $aResult['langaddress'];
 
             if ($oCtx->hasNearPoint()) {
@@ -923,6 +917,26 @@ class Geocode
                     $aResult['lon'],
                     $aResult['lat']
                 );
+
+                // secondary ordering (for results with same importance (the smaller the better):
+                // - approximate importance of address parts
+                if (isset($aResult['addressimportance']) && $aResult['addressimportance']) {
+                    $aResult['foundorder'] = -$aResult['addressimportance']/10;
+                } else {
+                    $aResult['foundorder'] = -$aResult['importance'];
+                }
+                // - number of exact matches from the query
+                $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
+                // - importance of the class/type
+                $iClassImportance = ClassTypes\getImportance($aResult);
+                if (isset($iClassImportance)) {
+                    $aResult['foundorder'] += 0.0001 * $iClassImportance;
+                } else {
+                    $aResult['foundorder'] += 0.01;
+                }
+                // - rank
+                $aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
+
                 // Adjust importance for the number of exact string matches in the result
                 $iCountWords = 0;
                 $sAddress = $aResult['langaddress'];
@@ -933,21 +947,8 @@ class Geocode
                     }
                 }
 
-                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
-
-                // secondary ordering (for results with same importance (the smaller the better):
-                // - approximate importance of address parts
-                $aResult['foundorder'] = -$aResult['addressimportance']/10;
-                // - number of exact matches from the query
-                $aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
-                // - importance of the class/type
-                if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance'])
-                    && $aClassType[$aResult['class'].':'.$aResult['type']]['importance']
-                ) {
-                    $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance'];
-                } else {
-                    $aResult['foundorder'] += 0.01;
-                }
+                // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
+                $aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1);
             }
             $aSearchResults[$iIdx] = $aResult;
         }
@@ -990,7 +991,6 @@ class Geocode
                 'Query' => $this->sQuery,
                 'Structured query' => $this->aStructuredQuery,
                 'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
-                'Include address' => $this->bIncludeAddressDetails,
                 'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
                 'Try reversed query'=> $this->bReverseInPlan,
                 'Limit (for searches)' => $this->iLimit,