]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
include GB CodePoint data into location_postcode table
[nominatim.git] / lib / Geocode.php
index 80449cb631d350797d1ab2679aee2c22c4343368..0546983f19e33beefbdf6db29916eba57d6a1221 100644 (file)
@@ -476,6 +476,35 @@ class Geocode
         if ($this->bIncludeNameDetails) $sSQL .= "name, ";
         $sSQL .= "     extratags->'place' ";
 
+        // postcode table
+        $sSQL .= "UNION ";
+        $sSQL .= "SELECT";
+        $sSQL .= "  'P' as osm_type,";
+        $sSQL .= "  (SELECT osm_id from placex p WHERE p.place_id = parent_place_id) as osm_id,";
+        $sSQL .= "  'place' as class, 'postcode' as type,";
+        $sSQL .= "  null as admin_level, rank_search, rank_address,";
+        $sSQL .= "  place_id, parent_place_id, country_code,";
+        $sSQL .= "  get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,";
+        $sSQL .= "  postcode as placename,";
+        $sSQL .= "  postcode as ref,";
+        if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,";
+        if ($this->bIncludeNameDetails) $sSQL .= "null AS names,";
+        $sSQL .= "  ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,";
+        $sSQL .=    $sImportanceSQL."(0.75-(rank_search::float/40)) AS importance, ";
+        $sSQL .= "  (";
+        $sSQL .= "     SELECT max(p.importance*(p.rank_address+2))";
+        $sSQL .= "     FROM ";
+        $sSQL .= "       place_addressline s, ";
+        $sSQL .= "       placex p";
+        $sSQL .= "     WHERE s.place_id = parent_place_id";
+        $sSQL .= "       AND p.place_id = s.address_place_id ";
+        $sSQL .= "       AND s.isaddress";
+        $sSQL .= "       AND p.importance is not null";
+        $sSQL .= "  ) AS addressimportance, ";
+        $sSQL .= "  null AS extra_place ";
+        $sSQL .= "FROM location_postcode";
+        $sSQL .= " WHERE place_id in ($sPlaceIDs) ";
+
         if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) {
             // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines
             // with start- and endnumber, the common osm housenumbers are usually saved as points
@@ -666,12 +695,12 @@ class Geocode
 
              Score how good the search is so they can be ordered
          */
-        foreach ($aPhrases as $iPhrase => $sPhrase) {
+        foreach ($aPhrases as $iPhrase => $aPhrase) {
             $aNewPhraseSearches = array();
             if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
             else $sPhraseType = '';
 
-            foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) {
+            foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) {
                 // Too many permutations - too expensive
                 if ($iWordSet > 120) break;
 
@@ -710,30 +739,25 @@ class Geocode
                                         );
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
-                                } elseif ($sPhraseType == 'postalcode') {
+                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
                                     if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) {
-                                        // If we already have a name try putting the postcode first
-                                        if (sizeof($aSearch['aName'])) {
+                                        // If we have structured search or this is the first term,
+                                        // make the postcode the primary search element.
+                                        if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) {
                                             $aNewSearch = $aSearch;
+                                            $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'] = array();
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = substr($aSearchTerm['word_token'], 1);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
                                         }
 
-                                        if (sizeof($aSearch['aName'])) {
-                                            if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) {
-                                                $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                            } else {
-                                                $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                                $aSearch['iSearchRank'] += 1000; // skip;
-                                            }
-                                        } else {
-                                            $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                            //$aSearch['iNamePhrase'] = $iPhrase;
+                                        // If we have a structured search or this is not the first term,
+                                        // add the postcode as an addendum.
+                                        if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) {
+                                            $aSearch['sPostcode'] = substr($aSearchTerm['word_token'], 1);
+                                            if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         }
-                                        if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') {
                                     if ($aSearch['sHouseNumber'] === '') {
@@ -985,6 +1009,7 @@ class Geocode
                            'sClass' => '',
                            'sType' => '',
                            'sHouseNumber' => '',
+                           'sPostcode' => '',
                            'oNear' => $oNearPoint
                           )
                          );
@@ -1109,21 +1134,9 @@ class Geocode
                 }
                 if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
-                // Try and calculate GB postcodes we might be missing
+                // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
                 foreach ($aTokens as $sToken) {
-                    // Source of gb postcodes is now definitive - always use
-                    if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) {
-                        if (substr($aData[1], -2, 1) != ' ') {
-                            $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1);
-                            $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1);
-                        }
-                        $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB);
-                        if ($aGBPostcodeLocation) {
-                            $aValidTokens[$sToken] = $aGBPostcodeLocation;
-                        }
-                    } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                        // US ZIP+4 codes - if there is no token,
-                        // merge in the 5-digit ZIP code
+                    if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
                         if (isset($aValidTokens[$aData[1]])) {
                             foreach ($aValidTokens[$aData[1]] as $aToken) {
                                 if (!$aToken['class']) {
@@ -1247,13 +1260,16 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
+                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                        continue;
+                    }
+
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
                                 $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
-                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if ($bBoundingBoxSearch)
                                     $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)";
                                 $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1";
@@ -1312,6 +1328,24 @@ class Geocode
                         // If a coordinate is given, the search must either
                         // be for a name or a special search. Ignore everythin else.
                         $aPlaceIDs = array();
+                    } elseif ($aSearch['sOperator'] == 'postcode') {
+                        $sSQL  = "SELECT p.place_id FROM location_postcode p ";
+                        if (sizeof($aSearch['aAddress'])) {
+                            $sSQL .= ", search_name s ";
+                            $sSQL .= "WHERE s.place_id = p.parent_place_id ";
+                            $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND ";
+                        } else {
+                            $sSQL .= " WHERE ";
+                        }
+                        $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'";
+                        if ($aSearch['sCountryCode']) {
+                            $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'";
+                        } elseif ($sCountryCodesSQL) {
+                            $sSQL .= " AND p.country_code in ($sCountryCodesSQL)";
+                        }
+                        $sSQL .= " LIMIT $this->iLimit";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     } else {
                         $aPlaceIDs = array();
 
@@ -1377,6 +1411,8 @@ class Geocode
                             $aTerms[] = $aSearch['oNear']->withinSQL('centroid');
 
                             $aOrder[] = $aSearch['oNear']->distanceSQL('centroid');
+                        } elseif ($aSearch['sPostcode']) {
+                            $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')";
                         }
                         if (sizeof($this->aExcludePlaceIDs)) {
                             $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
@@ -1665,6 +1701,21 @@ class Geocode
                         var_Dump($aPlaceIDs);
                     }
 
+                    if ($aSearch['sPostcode']) {
+                        $sSQL = 'SELECT place_id FROM placex';
+                        $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')';
+                        $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
+                        if ($aFilteredPlaceIDs) {
+                            $aPlaceIDs = $aFilteredPlaceIDs;
+                            if (CONST_Debug) {
+                                echo "<br><b>Place IDs after postcode filtering:</b> ";
+                                var_Dump($aPlaceIDs);
+                            }
+                        }
+                    }
+
                     foreach ($aPlaceIDs as $iPlaceID) {
                         // array for placeID => -1 | Tiger housenumber
                         $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber;