]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / lib / Geocode.php
index f07c51045fb120563799539c295235cbc4f06c91..eb6152aa3470cc5962c5eaa5c3cdba29aaecf9cd 100644 (file)
@@ -25,7 +25,7 @@ class Geocode
 
     protected $aExcludePlaceIDs = array();
     protected $bDeDupe = true;
-    protected $bReverseInPlan = false;
+    protected $bReverseInPlan = true;
 
     protected $iLimit = 20;
     protected $iFinalLimit = 10;
@@ -51,10 +51,22 @@ class Geocode
     protected $sQuery = false;
     protected $aStructuredQuery = false;
 
+    protected $oNormalizer = null;
+
 
     public function __construct(&$oDB)
     {
         $this->oDB =& $oDB;
+        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+    }
+
+    private function normTerm($sTerm)
+    {
+        if ($this->oNormalizer === null) {
+            return null;
+        }
+
+        return $this->oNormalizer->transliterate($sTerm);
     }
 
     public function setReverseInPlan($bReverse)
@@ -410,8 +422,15 @@ class Geocode
         $sPlaceIDs = join(',', array_keys($aPlaceIDs));
 
         $sImportanceSQL = '';
-        if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
-        if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
+        $sImportanceSQLGeom = '';
+        if ($this->sViewboxSmallSQL) {
+            $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
+            $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * ";
+        }
+        if ($this->sViewboxLargeSQL) {
+            $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
+            $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * ";
+        }
 
         $sSQL  = "SELECT ";
         $sSQL .= "    osm_type,";
@@ -476,6 +495,35 @@ class Geocode
         if ($this->bIncludeNameDetails) $sSQL .= "name, ";
         $sSQL .= "     extratags->'place' ";
 
+        // postcode table
+        $sSQL .= "UNION ";
+        $sSQL .= "SELECT";
+        $sSQL .= "  'P' as osm_type,";
+        $sSQL .= "  (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,";
+        $sSQL .= "  'place' as class, 'postcode' as type,";
+        $sSQL .= "  null as admin_level, rank_search, rank_address,";
+        $sSQL .= "  place_id, parent_place_id, country_code,";
+        $sSQL .= "  get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,";
+        $sSQL .= "  postcode as placename,";
+        $sSQL .= "  postcode as ref,";
+        if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,";
+        if ($this->bIncludeNameDetails) $sSQL .= "null AS names,";
+        $sSQL .= "  ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,";
+        $sSQL .=    $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, ";
+        $sSQL .= "  (";
+        $sSQL .= "     SELECT max(p.importance*(p.rank_address+2))";
+        $sSQL .= "     FROM ";
+        $sSQL .= "       place_addressline s, ";
+        $sSQL .= "       placex p";
+        $sSQL .= "     WHERE s.place_id = lp.parent_place_id";
+        $sSQL .= "       AND p.place_id = s.address_place_id ";
+        $sSQL .= "       AND s.isaddress";
+        $sSQL .= "       AND p.importance is not null";
+        $sSQL .= "  ) AS addressimportance, ";
+        $sSQL .= "  null AS extra_place ";
+        $sSQL .= "FROM location_postcode lp";
+        $sSQL .= " WHERE place_id in ($sPlaceIDs) ";
+
         if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) {
             // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines
             // with start- and endnumber, the common osm housenumbers are usually saved as points
@@ -701,37 +749,29 @@ class Geocode
                                         }
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
-                                } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) {
-                                    if ($aSearch['oNear'] === false) {
-                                        $aSearch['oNear'] = new NearPoint(
-                                            $aSearchTerm['lat'],
-                                            $aSearchTerm['lon'],
-                                            $aSearchTerm['radius']
-                                        );
-                                        if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
-                                    }
                                 } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                    if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) {
+                                    if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                        isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                         // If we have structured search or this is the first term,
                                         // make the postcode the primary search element.
-                                        if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0) {
+                                        if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                             $aNewSearch = $aSearch;
                                             $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_token'];
+                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word'];
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
                                         }
 
                                         // If we have a structured search or this is not the first term,
                                         // add the postcode as an addendum.
-                                        if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) {
-                                            $aSearch['sPostcode'] = $aSearchTerm['word_token'];
+                                        if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) {
+                                            $aSearch['sPostcode'] = $aSearchTerm['word'];
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         }
                                     }
                                 } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') {
-                                    if ($aSearch['sHouseNumber'] === '') {
+                                    if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') {
                                         $aSearch['sHouseNumber'] = $sToken;
                                         // sanity check: if the housenumber is not mainly made
                                         // up of numbers, add a penalty
@@ -914,13 +954,7 @@ class Geocode
     {
         if (!$this->sQuery && !$this->aStructuredQuery) return array();
 
-        $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
-        if ($oNormalizer !== null) {
-            $sNormQuery = $oNormalizer->transliterate($this->sQuery);
-        } else {
-            $sNormQuery = null;
-        }
-
+        $sNormQuery = $this->normTerm($this->sQuery);
         $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
         $sCountryCodesSQL = false;
         if ($this->aCountryCodes) {
@@ -1105,21 +1139,9 @@ class Geocode
                 }
                 if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
-                // Try and calculate GB postcodes we might be missing
+                // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
                 foreach ($aTokens as $sToken) {
-                    // Source of gb postcodes is now definitive - always use
-                    if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) {
-                        if (substr($aData[1], -2, 1) != ' ') {
-                            $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1);
-                            $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1);
-                        }
-                        $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB);
-                        if ($aGBPostcodeLocation) {
-                            $aValidTokens[$sToken] = $aGBPostcodeLocation;
-                        }
-                    } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                        // US ZIP+4 codes - if there is no token,
-                        // merge in the 5-digit ZIP code
+                    if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
                         if (isset($aValidTokens[$aData[1]])) {
                             foreach ($aValidTokens[$aData[1]] as $aToken) {
                                 if (!$aToken['class']) {
@@ -1186,7 +1208,6 @@ class Geocode
                 ksort($aGroupedSearches);
             }
 
-            if (CONST_Debug) var_Dump($aGroupedSearches);
             if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) {
                 $aCopyGroupedSearches = $aGroupedSearches;
                 foreach ($aCopyGroupedSearches as $iGroup => $aSearches) {
@@ -1243,13 +1264,16 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
+                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                        continue;
+                    }
+
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
                                 $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
-                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if ($bBoundingBoxSearch)
                                     $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)";
                                 $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1";
@@ -1308,6 +1332,24 @@ class Geocode
                         // If a coordinate is given, the search must either
                         // be for a name or a special search. Ignore everythin else.
                         $aPlaceIDs = array();
+                    } elseif ($aSearch['sOperator'] == 'postcode') {
+                        $sSQL  = "SELECT p.place_id FROM location_postcode p ";
+                        if (sizeof($aSearch['aAddress'])) {
+                            $sSQL .= ", search_name s ";
+                            $sSQL .= "WHERE s.place_id = p.parent_place_id ";
+                            $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND ";
+                        } else {
+                            $sSQL .= " WHERE ";
+                        }
+                        $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'";
+                        if ($aSearch['sCountryCode']) {
+                            $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'";
+                        } elseif ($sCountryCodesSQL) {
+                            $sSQL .= " AND p.country_code in ($sCountryCodesSQL)";
+                        }
+                        $sSQL .= " LIMIT $this->iLimit";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     } else {
                         $aPlaceIDs = array();
 
@@ -1343,19 +1385,20 @@ class Geocode
                         // TODO: filter out the pointless search terms (2 letter name tokens and less)
                         // they might be right - but they are just too darned expensive to run
                         if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
-                        if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
+                        //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
                         if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
                             // For infrequent name terms disable index usage for address
                             if (CONST_Search_NameOnlySearchFrequencyThreshold
                                 && sizeof($aSearch['aName']) == 1
                                 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
                             ) {
-                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                             } else {
                                 $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
-                                if (sizeof($aSearch['aAddressNonSearch'])) {
+                                /*if (sizeof($aSearch['aAddressNonSearch'])) {
                                     $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
-                                }
+                                }*/
                             }
                         }
                         if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
@@ -1373,6 +1416,12 @@ class Geocode
                             $aTerms[] = $aSearch['oNear']->withinSQL('centroid');
 
                             $aOrder[] = $aSearch['oNear']->distanceSQL('centroid');
+                        } elseif ($aSearch['sPostcode']) {
+                            if (!sizeof($aSearch['aAddress'])) {
+                                $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))";
+                            } else {
+                                $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')";
+                            }
                         }
                         if (sizeof($this->aExcludePlaceIDs)) {
                             $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
@@ -1661,6 +1710,21 @@ class Geocode
                         var_Dump($aPlaceIDs);
                     }
 
+                    if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) {
+                        $sSQL = 'SELECT place_id FROM placex';
+                        $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')';
+                        $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
+                        if ($aFilteredPlaceIDs) {
+                            $aPlaceIDs = $aFilteredPlaceIDs;
+                            if (CONST_Debug) {
+                                echo "<br><b>Place IDs after postcode filtering:</b> ";
+                                var_Dump($aPlaceIDs);
+                            }
+                        }
+                    }
+
                     foreach ($aPlaceIDs as $iPlaceID) {
                         // array for placeID => -1 | Tiger housenumber
                         $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber;