]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
include GB CodePoint data into location_postcode table
[nominatim.git] / lib / Geocode.php
index ec8eb3489e6daaa15f11ba3d6aa0029284abb114..0546983f19e33beefbdf6db29916eba57d6a1221 100644 (file)
@@ -476,6 +476,35 @@ class Geocode
         if ($this->bIncludeNameDetails) $sSQL .= "name, ";
         $sSQL .= "     extratags->'place' ";
 
+        // postcode table
+        $sSQL .= "UNION ";
+        $sSQL .= "SELECT";
+        $sSQL .= "  'P' as osm_type,";
+        $sSQL .= "  (SELECT osm_id from placex p WHERE p.place_id = parent_place_id) as osm_id,";
+        $sSQL .= "  'place' as class, 'postcode' as type,";
+        $sSQL .= "  null as admin_level, rank_search, rank_address,";
+        $sSQL .= "  place_id, parent_place_id, country_code,";
+        $sSQL .= "  get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,";
+        $sSQL .= "  postcode as placename,";
+        $sSQL .= "  postcode as ref,";
+        if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,";
+        if ($this->bIncludeNameDetails) $sSQL .= "null AS names,";
+        $sSQL .= "  ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,";
+        $sSQL .=    $sImportanceSQL."(0.75-(rank_search::float/40)) AS importance, ";
+        $sSQL .= "  (";
+        $sSQL .= "     SELECT max(p.importance*(p.rank_address+2))";
+        $sSQL .= "     FROM ";
+        $sSQL .= "       place_addressline s, ";
+        $sSQL .= "       placex p";
+        $sSQL .= "     WHERE s.place_id = parent_place_id";
+        $sSQL .= "       AND p.place_id = s.address_place_id ";
+        $sSQL .= "       AND s.isaddress";
+        $sSQL .= "       AND p.importance is not null";
+        $sSQL .= "  ) AS addressimportance, ";
+        $sSQL .= "  null AS extra_place ";
+        $sSQL .= "FROM location_postcode";
+        $sSQL .= " WHERE place_id in ($sPlaceIDs) ";
+
         if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) {
             // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines
             // with start- and endnumber, the common osm housenumbers are usually saved as points
@@ -653,7 +682,7 @@ class Geocode
         return $aSearchResults;
     }
 
-    public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+    public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
     {
         /*
              Calculate all searches using aValidTokens i.e.
@@ -666,12 +695,12 @@ class Geocode
 
              Score how good the search is so they can be ordered
          */
-        foreach ($aPhrases as $iPhrase => $sPhrase) {
+        foreach ($aPhrases as $iPhrase => $aPhrase) {
             $aNewPhraseSearches = array();
             if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
             else $sPhraseType = '';
 
-            foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) {
+            foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) {
                 // Too many permutations - too expensive
                 if ($iWordSet > 120) break;
 
@@ -710,30 +739,25 @@ class Geocode
                                         );
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
-                                } elseif ($sPhraseType == 'postalcode') {
+                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
                                     if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) {
-                                        // If we already have a name try putting the postcode first
-                                        if (sizeof($aSearch['aName'])) {
+                                        // If we have structured search or this is the first term,
+                                        // make the postcode the primary search element.
+                                        if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) {
                                             $aNewSearch = $aSearch;
+                                            $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'] = array();
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = substr($aSearchTerm['word_token'], 1);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
                                         }
 
-                                        if (sizeof($aSearch['aName'])) {
-                                            if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) {
-                                                $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                            } else {
-                                                $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                                $aSearch['iSearchRank'] += 1000; // skip;
-                                            }
-                                        } else {
-                                            $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                            //$aSearch['iNamePhrase'] = $iPhrase;
+                                        // If we have a structured search or this is not the first term,
+                                        // add the postcode as an addendum.
+                                        if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) {
+                                            $aSearch['sPostcode'] = substr($aSearchTerm['word_token'], 1);
+                                            if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         }
-                                        if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') {
                                     if ($aSearch['sHouseNumber'] === '') {
@@ -752,13 +776,19 @@ class Geocode
                                          */
                                     }
                                 } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
-                                    if ($aSearch['sClass'] === '') {
-                                        $aSearch['sOperator'] = $aSearchTerm['operator'];
+                                    // require a normalized exact match of the term
+                                    // if we have the normalizer version of the query
+                                    // available
+                                    if ($aSearch['sClass'] === ''
+                                        && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                         $aSearch['sClass'] = $aSearchTerm['class'];
                                         $aSearch['sType'] = $aSearchTerm['type'];
-                                        if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
-                                        else $aSearch['sOperator'] = 'near'; // near = in for the moment
-                                        if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+                                        if ($aSearchTerm['operator'] == '') {
+                                            $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' :  'near';
+                                            $aSearch['iSearchRank'] += 2;
+                                        } else {
+                                            $aSearch['sOperator'] = 'near'; // near = in for the moment
+                                        }
 
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
@@ -913,6 +943,13 @@ class Geocode
     {
         if (!$this->sQuery && !$this->aStructuredQuery) return array();
 
+        $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+        if ($oNormalizer !== null) {
+            $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+        } else {
+            $sNormQuery = null;
+        }
+
         $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
         $sCountryCodesSQL = false;
         if ($this->aCountryCodes) {
@@ -972,6 +1009,7 @@ class Geocode
                            'sClass' => '',
                            'sType' => '',
                            'sHouseNumber' => '',
+                           'sPostcode' => '',
                            'oNear' => $oNearPoint
                           )
                          );
@@ -1096,21 +1134,9 @@ class Geocode
                 }
                 if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
-                // Try and calculate GB postcodes we might be missing
+                // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
                 foreach ($aTokens as $sToken) {
-                    // Source of gb postcodes is now definitive - always use
-                    if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) {
-                        if (substr($aData[1], -2, 1) != ' ') {
-                            $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1);
-                            $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1);
-                        }
-                        $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB);
-                        if ($aGBPostcodeLocation) {
-                            $aValidTokens[$sToken] = $aGBPostcodeLocation;
-                        }
-                    } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                        // US ZIP+4 codes - if there is no token,
-                        // merge in the 5-digit ZIP code
+                    if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
                         if (isset($aValidTokens[$aData[1]])) {
                             foreach ($aValidTokens[$aData[1]] as $aToken) {
                                 if (!$aToken['class']) {
@@ -1139,7 +1165,7 @@ class Geocode
                 // array with: placeid => -1 | tiger-housenumber
                 $aResultPlaceIDs = array();
 
-                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
 
                 if ($this->bReverseInPlan) {
                     // Reverse phrase array and also reverse the order of the wordsets in
@@ -1151,7 +1177,7 @@ class Geocode
                         $aFinalPhrase = end($aPhrases);
                         $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
                     }
-                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
 
                     foreach ($aGroupedSearches as $aSearches) {
                         foreach ($aSearches as $aSearch) {
@@ -1234,13 +1260,16 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
+                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                        continue;
+                    }
+
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
                                 $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
-                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if ($bBoundingBoxSearch)
                                     $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)";
                                 $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1";
@@ -1299,6 +1328,24 @@ class Geocode
                         // If a coordinate is given, the search must either
                         // be for a name or a special search. Ignore everythin else.
                         $aPlaceIDs = array();
+                    } elseif ($aSearch['sOperator'] == 'postcode') {
+                        $sSQL  = "SELECT p.place_id FROM location_postcode p ";
+                        if (sizeof($aSearch['aAddress'])) {
+                            $sSQL .= ", search_name s ";
+                            $sSQL .= "WHERE s.place_id = p.parent_place_id ";
+                            $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND ";
+                        } else {
+                            $sSQL .= " WHERE ";
+                        }
+                        $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'";
+                        if ($aSearch['sCountryCode']) {
+                            $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'";
+                        } elseif ($sCountryCodesSQL) {
+                            $sSQL .= " AND p.country_code in ($sCountryCodesSQL)";
+                        }
+                        $sSQL .= " LIMIT $this->iLimit";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     } else {
                         $aPlaceIDs = array();
 
@@ -1364,6 +1411,8 @@ class Geocode
                             $aTerms[] = $aSearch['oNear']->withinSQL('centroid');
 
                             $aOrder[] = $aSearch['oNear']->distanceSQL('centroid');
+                        } elseif ($aSearch['sPostcode']) {
+                            $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')";
                         }
                         if (sizeof($this->aExcludePlaceIDs)) {
                             $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
@@ -1542,8 +1591,9 @@ class Geocode
                             }
 
                             if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'near') { // & in
+                                $sClassTable = 'place_classtype_'.$aSearch['sClass'].'_'.$aSearch['sType'];
                                 $sSQL = "SELECT count(*) FROM pg_tables ";
-                                $sSQL .= "WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'";
+                                $sSQL .= "WHERE tablename = '$sClassTable'";
                                 $bCacheTable = chksql($this->oDB->getOne($sSQL));
 
                                 $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)";
@@ -1591,7 +1641,8 @@ class Geocode
                                             $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)";
                                         }
 
-                                        $sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l";
+                                        $sSQL = "select distinct i.place_id".($sOrderBySQL?', i.order_term':'')." from (";
+                                        $sSQL .= "select l.place_id".($sOrderBySQL?','.$sOrderBySQL.' as order_term':'')." from ".$sClassTable." as l";
                                         if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)";
                                         if ($sPlaceIDs) {
                                             $sSQL .= ",placex as f where ";
@@ -1605,7 +1656,8 @@ class Geocode
                                             $sSQL .= " and l.place_id not in (".join(',', $this->aExcludePlaceIDs).")";
                                         }
                                         if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)";
-                                        if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc";
+                                        $sSQL .= 'limit 300) i ';
+                                        if ($sOrderBySQL) $sSQL .= "order by order_term asc";
                                         if ($this->iOffset) $sSQL .= " offset $this->iOffset";
                                         $sSQL .= " limit $this->iLimit";
                                         if (CONST_Debug) var_dump($sSQL);
@@ -1649,6 +1701,21 @@ class Geocode
                         var_Dump($aPlaceIDs);
                     }
 
+                    if ($aSearch['sPostcode']) {
+                        $sSQL = 'SELECT place_id FROM placex';
+                        $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')';
+                        $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'";
+                        if (CONST_Debug) var_dump($sSQL);
+                        $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
+                        if ($aFilteredPlaceIDs) {
+                            $aPlaceIDs = $aFilteredPlaceIDs;
+                            if (CONST_Debug) {
+                                echo "<br><b>Place IDs after postcode filtering:</b> ";
+                                var_Dump($aPlaceIDs);
+                            }
+                        }
+                    }
+
                     foreach ($aPlaceIDs as $iPlaceID) {
                         // array for placeID => -1 | Tiger housenumber
                         $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber;