]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / lib / Geocode.php
index 314d20bc88f0cf75e691a646ba8e472a99a7a8d3..ac92257f7782dc3fb9c9d8a612c8157f603446f4 100644 (file)
@@ -25,7 +25,7 @@ class Geocode
 
     protected $aExcludePlaceIDs = array();
     protected $bDeDupe = true;
-    protected $bReverseInPlan = false;
+    protected $bReverseInPlan = true;
 
     protected $iLimit = 20;
     protected $iFinalLimit = 10;
@@ -63,7 +63,7 @@ class Geocode
     private function normTerm($sTerm)
     {
         if ($this->oNormalizer === null) {
-            return null;
+            return $sTerm;
         }
 
         return $this->oNormalizer->transliterate($sTerm);
@@ -313,12 +313,7 @@ class Geocode
                 if (count($aViewbox) != 4) {
                     userError("Bad parmater 'viewbox'. Expected 4 coordinates.");
                 }
-                $this->setViewBox(array(
-                                   $aViewbox[0],
-                                   $aViewbox[3],
-                                   $aViewbox[2],
-                                   $aViewbox[1]
-                                  ));
+                $this->setViewBox($aViewbox);
             } else {
                 $aRoute = $oParams->getStringList('route');
                 $fRouteWidth = $oParams->getFloat('routewidth');
@@ -737,6 +732,8 @@ class Geocode
 
                         // If the token is valid
                         if (isset($aValidTokens[' '.$sToken])) {
+                            // TODO variable should go into aCurrentSearch
+                            $bHavePostcode = false;
                             foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
                                 $aSearch = $aCurrentSearch;
                                 $aSearch['iSearchRank']++;
@@ -749,18 +746,19 @@ class Geocode
                                         }
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
-                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
+                                } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                    if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
-                                        isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
+                                    if (!$bHavePostcode && $aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                        isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                         // If we have structured search or this is the first term,
                                         // make the postcode the primary search element.
                                         if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                             $aNewSearch = $aSearch;
                                             $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word'];
+                                            $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
+                                            $bHavePostcode = true;
                                         }
 
                                         // If we have a structured search or this is not the first term,
@@ -790,7 +788,7 @@ class Geocode
                                     // require a normalized exact match of the term
                                     // if we have the normalizer version of the query
                                     // available
-                                    if ($aSearch['sClass'] === ''
+                                    if ($aSearch['sOperator'] === ''
                                         && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                         $aSearch['sClass'] = $aSearchTerm['class'];
                                         $aSearch['sType'] = $aSearchTerm['type'];
@@ -915,6 +913,19 @@ class Geocode
 
             //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
         }
+
+        // Revisit searches, giving penalty to unlikely combinations
+        $aGroupedSearches = array();
+        foreach ($aSearches as $aSearch) {
+            if (!$aSearch['aName']) {
+                if ($aSearch['sHouseNumber']) {
+                    continue;
+                }
+            }
+            $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
+        }
+        ksort($aGroupedSearches);
+
         return $aGroupedSearches;
     }
 
@@ -1050,7 +1061,7 @@ class Geocode
                     continue;
                 }
 
-                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string"));
+                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string"));
                 $sSQL = 'SELECT * ';
                 $sSQL .= 'FROM ( ';
                 $sSQL .= '   SELECT word_id, word_token, word, class, type, country_code, operator';
@@ -1116,14 +1127,10 @@ class Geocode
                 if (CONST_Debug) var_Dump($sSQL);
 
                 $aValidTokens = array();
-                if (sizeof($aTokens)) {
-                    $aDatabaseWords = chksql(
-                        $this->oDB->getAll($sSQL),
-                        "Could not get word tokens."
-                    );
-                } else {
-                    $aDatabaseWords = array();
-                }
+                $aDatabaseWords = chksql(
+                    $this->oDB->getAll($sSQL),
+                    "Could not get word tokens."
+                );
                 $aPossibleMainWordIDs = array();
                 $aWordFrequencyScores = array();
                 foreach ($aDatabaseWords as $aToken) {
@@ -1274,8 +1281,8 @@ class Geocode
                     }
 
                     // No location term?
-                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
-                        if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
+                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) {
+                        if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
                                 $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
@@ -1295,39 +1302,32 @@ class Geocode
                             if (chksql($this->oDB->getOne($sSQL))) {
                                 $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
                                 if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)";
-                                $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)";
+                                if ($aSearch['oNear']) {
+                                    $sSQL .= " WHERE ".$aSearch['oNear']->withinSQL('ct.centroid');
+                                } else {
+                                    $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)";
+                                }
                                 if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if (sizeof($this->aExcludePlaceIDs)) {
                                     $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")";
                                 }
-                                if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC";
+                                if ($this->sViewboxCentreSQL) {
+                                    $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC";
+                                } elseif ($aSearch['oNear']) {
+                                    $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('ct.centroid').' ASC';
+                                }
                                 $sSQL .= " limit $this->iLimit";
                                 if (CONST_Debug) var_dump($sSQL);
                                 $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
-
-                                // If excluded place IDs are given, it is fair to assume that
-                                // there have been results in the small box, so no further
-                                // expansion in that case.
-                                // Also don't expand if bounded results were requested.
-                                if (!sizeof($aPlaceIDs) && !sizeof($this->aExcludePlaceIDs) && !$this->bBoundedSearch) {
-                                    $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
-                                    if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
-                                    $sSQL .= " WHERE ST_Contains($this->sViewboxLargeSQL, ct.centroid)";
-                                    if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
-                                    if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC";
-                                    $sSQL .= " LIMIT $this->iLimit";
-                                    if (CONST_Debug) var_dump($sSQL);
-                                    $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
-                                }
-                            } else {
+                            } else if ($aSearch['oNear']) {
                                 $sSQL = "SELECT place_id ";
                                 $sSQL .= "FROM placex ";
                                 $sSQL .= "WHERE class='".$aSearch['sClass']."' ";
                                 $sSQL .= "  AND type='".$aSearch['sType']."'";
-                                $sSQL .= "  AND ST_Contains($this->sViewboxSmallSQL, geometry) ";
+                                $sSQL .= "  AND ".$aSearch['oNear']->withinSQL('geometry');
                                 $sSQL .= "  AND linked_place_id is null";
                                 if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
-                                if ($this->sViewboxCentreSQL)   $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, centroid) ASC";
+                                $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('centroid')." ASC";
                                 $sSQL .= " LIMIT $this->iLimit";
                                 if (CONST_Debug) var_dump($sSQL);
                                 $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
@@ -1390,19 +1390,20 @@ class Geocode
                         // TODO: filter out the pointless search terms (2 letter name tokens and less)
                         // they might be right - but they are just too darned expensive to run
                         if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
-                        if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
+                        //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
                         if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
                             // For infrequent name terms disable index usage for address
                             if (CONST_Search_NameOnlySearchFrequencyThreshold
                                 && sizeof($aSearch['aName']) == 1
                                 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
                             ) {
-                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                             } else {
                                 $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
-                                if (sizeof($aSearch['aAddressNonSearch'])) {
+                                /*if (sizeof($aSearch['aAddressNonSearch'])) {
                                     $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
-                                }
+                                }*/
                             }
                         }
                         if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
@@ -1575,7 +1576,8 @@ class Geocode
                             }
 
                             // Fallback to the road (if no housenumber was found)
-                            if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])) {
+                            if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])
+                                && ($aSearch['aAddress'] || $aSearch['sCountryCode'])) {
                                 $aPlaceIDs = $aRoadPlaceIDs;
                                 //set to -1, if no housenumbers were found
                                 $searchedHousenumber = -1;
@@ -1740,9 +1742,11 @@ class Geocode
                     // Need to verify passes rank limits before dropping out of the loop (yuk!)
                     // reduces the number of place ids, like a filter
                     // rank_address is 30 for interpolated housenumbers
+                    $sWherePlaceId = 'WHERE place_id in (';
+                    $sWherePlaceId .= join(',', array_keys($aResultPlaceIDs)).') ';
+
                     $sSQL = "SELECT place_id ";
-                    $sSQL .= "FROM placex ";
-                    $sSQL .= "WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") ";
+                    $sSQL .= "FROM placex ".$sWherePlaceId;
                     $sSQL .= "  AND (";
                     $sSQL .= "         placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
                     if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) {
@@ -1751,20 +1755,23 @@ class Geocode
                     if ($this->aAddressRankList) {
                         $sSQL .= "     OR placex.rank_address in (".join(',', $this->aAddressRankList).")";
                     }
-                    if (CONST_Use_US_Tiger_Data) {
-                        $sSQL .= "  ) ";
+                    $sSQL .= "  ) UNION ";
+                    $sSQL .= " SELECT place_id FROM location_postcode lp ".$sWherePlaceId;
+                    $sSQL .= "  AND (lp.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
+                    if ($this->aAddressRankList) {
+                        $sSQL .= "     OR lp.rank_address in (".join(',', $this->aAddressRankList).")";
+                    }
+                    $sSQL .= ") ";
+                    if (CONST_Use_US_Tiger_Data && $this->iMaxAddressRank == 30) {
+                        $sSQL .= "UNION ";
+                        $sSQL .= "  SELECT place_id ";
+                        $sSQL .= "  FROM location_property_tiger ".$sWherePlaceId;
+                    }
+                    if ($this->iMaxAddressRank == 30) {
                         $sSQL .= "UNION ";
                         $sSQL .= "  SELECT place_id ";
-                        $sSQL .= "  FROM location_property_tiger ";
-                        $sSQL .= "  WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") ";
-                        $sSQL .= "    AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank ";
-                        if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',', $this->aAddressRankList).")";
+                        $sSQL .= "  FROM location_property_osmline ".$sWherePlaceId;
                     }
-                    $sSQL .= ") UNION ";
-                    $sSQL .= "  SELECT place_id ";
-                    $sSQL .= "  FROM location_property_osmline ";
-                    $sSQL .= "  WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")";
-                    $sSQL .= "    AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)";
                     if (CONST_Debug) var_dump($sSQL);
                     $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     $tempIDs = array();