]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / lib / Geocode.php
index a8e4083b2c4883238e9d55cf7de0026c23619a17..62e7a7cfed120da8735b509740cf78f3339d50be 100644 (file)
@@ -2,6 +2,7 @@
 
 namespace Nominatim;
 
+require_once(CONST_BasePath.'/lib/NearPoint.php');
 require_once(CONST_BasePath.'/lib/PlaceLookup.php');
 require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
 
@@ -24,7 +25,7 @@ class Geocode
 
     protected $aExcludePlaceIDs = array();
     protected $bDeDupe = true;
-    protected $bReverseInPlan = false;
+    protected $bReverseInPlan = true;
 
     protected $iLimit = 20;
     protected $iFinalLimit = 10;
@@ -32,7 +33,6 @@ class Geocode
     protected $bFallback = false;
 
     protected $aCountryCodes = false;
-    protected $aNearPoint = false;
 
     protected $bBoundedSearch = false;
     protected $aViewBox = false;
@@ -126,6 +126,12 @@ class Geocode
         return $this->aExcludePlaceIDs;
     }
 
+
+    public function getCountryCodes()
+    {
+        return $this->aCountryCodes;
+    }
+
     public function getViewBoxString()
     {
         if (!$this->aViewBox) return null;
@@ -215,11 +221,6 @@ class Geocode
         );
     }
 
-    public function setNearPoint($aNearPoint, $fRadiusDeg = 0.1)
-    {
-        $this->aNearPoint = array((float)$aNearPoint[0], (float)$aNearPoint[1], (float)$fRadiusDeg);
-    }
-
     public function setQuery($sQueryString)
     {
         $this->sQuery = $sQueryString;
@@ -477,12 +478,13 @@ class Geocode
                 $sHousenumbers .= "(".$placeID.", ".$housenumber.")";
                 if ($i<$length) $sHousenumbers .= ", ";
             }
+
             if (CONST_Use_US_Tiger_Data) {
                 // Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join)
                 $sSQL .= " union";
                 $sSQL .= " SELECT ";
                 $sSQL .= "     'T' AS osm_type, ";
-                $sSQL .= "     place_id AS osm_id, ";
+                $sSQL .= "     (SELECT osm_id from placex p WHERE p.place_id=min(blub.parent_place_id)) as osm_id, ";
                 $sSQL .= "     'place' AS class, ";
                 $sSQL .= "     'house' AS type, ";
                 $sSQL .= "     null AS admin_level, ";
@@ -691,10 +693,12 @@ class Geocode
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) {
-                                    if ($aSearch['fLat'] === '') {
-                                        $aSearch['fLat'] = $aSearchTerm['lat'];
-                                        $aSearch['fLon'] = $aSearchTerm['lon'];
-                                        $aSearch['fRadius'] = $aSearchTerm['radius'];
+                                    if ($aSearch['oNear'] === false) {
+                                        $aSearch['oNear'] = new NearPoint(
+                                            $aSearchTerm['lat'],
+                                            $aSearchTerm['lon'],
+                                            $aSearchTerm['radius']
+                                        );
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif ($sPhraseType == 'postalcode') {
@@ -907,6 +911,9 @@ class Geocode
         }
 
         $sQuery = $this->sQuery;
+        if (!preg_match('//u', $sQuery)) {
+            userError("Query string is not UTF-8 encoded.");
+        }
 
         // Conflicts between US state abreviations and various words for 'the' in different languages
         if (isset($this->aLangPrefOrder['name:en'])) {
@@ -932,8 +939,9 @@ class Geocode
         }
 
         // Do we have anything that looks like a lat/lon pair?
-        if ($aLooksLike = looksLikeLatLonPair($sQuery)) {
-            $this->setNearPoint(array($aLooksLike['lat'], $aLooksLike['lon']));
+        $oNearPoint = false;
+        if ($aLooksLike = NearPoint::extractFromQuery($sQuery)) {
+            $oNearPoint = $aLooksLike['pt'];
             $sQuery = $aLooksLike['query'];
         }
 
@@ -955,21 +963,10 @@ class Geocode
                            'sClass' => '',
                            'sType' => '',
                            'sHouseNumber' => '',
-                           'fLat' => '',
-                           'fLon' => '',
-                           'fRadius' => ''
+                           'oNear' => $oNearPoint
                           )
                          );
 
-            // Do we have a radius search?
-            $sNearPointSQL = false;
-            if ($this->aNearPoint) {
-                $sNearPointSQL = "ST_SetSRID(ST_Point(".(float)$this->aNearPoint[1].",".(float)$this->aNearPoint[0]."),4326)";
-                $aSearches[0]['fLat'] = (float)$this->aNearPoint[0];
-                $aSearches[0]['fLon'] = (float)$this->aNearPoint[1];
-                $aSearches[0]['fRadius'] = (float)$this->aNearPoint[2];
-            }
-
             // Any 'special' terms in the search?
             $bSpecialTerms = false;
             preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
@@ -1229,7 +1226,7 @@ class Geocode
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
                     // No location term?
-                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['fLon']) {
+                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
@@ -1244,7 +1241,7 @@ class Geocode
                                 $aPlaceIDs = array();
                             }
                         } else {
-                            if (!$bBoundingBoxSearch && !$aSearch['fLon']) continue;
+                            if (!$bBoundingBoxSearch && !$aSearch['oNear']) continue;
                             if (!$aSearch['sClass']) continue;
 
                             $sSQL = "SELECT COUNT(*) FROM pg_tables WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'";
@@ -1289,7 +1286,7 @@ class Geocode
                                 $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
                             }
                         }
-                    } elseif ($aSearch['fLon'] && !sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['sClass']) {
+                    } elseif ($aSearch['oNear'] && !sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['sClass']) {
                         // If a coordinate is given, the search must either
                         // be for a name or a special search. Ignore everythin else.
                         $aPlaceIDs = array();
@@ -1316,6 +1313,7 @@ class Geocode
                             $aOrder[0] .= "     SELECT place_id ";
                             $aOrder[0] .= "     FROM location_property_osmline ";
                             $aOrder[0] .= "     WHERE parent_place_id = search_name.place_id";
+                            $aOrder[0] .= "       AND startnumber is not NULL";
                             $aOrder[0] .= "       AND ".intval($aSearch['sHouseNumber']).">=startnumber ";
                             $aOrder[0] .= "       AND ".intval($aSearch['sHouseNumber'])."<=endnumber ";
                             $aOrder[0] .= "     LIMIT 1";
@@ -1327,19 +1325,20 @@ class Geocode
                         // TODO: filter out the pointless search terms (2 letter name tokens and less)
                         // they might be right - but they are just too darned expensive to run
                         if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
-                        if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
+                        //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
                         if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
                             // For infrequent name terms disable index usage for address
                             if (CONST_Search_NameOnlySearchFrequencyThreshold
                                 && sizeof($aSearch['aName']) == 1
                                 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
                             ) {
-                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
+                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                             } else {
                                 $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
-                                if (sizeof($aSearch['aAddressNonSearch'])) {
+                                /*if (sizeof($aSearch['aAddressNonSearch'])) {
                                     $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
-                                }
+                                }*/
                             }
                         }
                         if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
@@ -1353,15 +1352,10 @@ class Geocode
                                 $aTerms[] = "address_rank <= ".$this->iMaxAddressRank;
                             }
                         }
-                        if ($aSearch['fLon'] && $aSearch['fLat']) {
-                            $aTerms[] = sprintf(
-                                'ST_DWithin(centroid, ST_SetSRID(ST_Point(%F,%F),4326), %F)',
-                                $aSearch['fLon'],
-                                $aSearch['fLat'],
-                                $aSearch['fRadius']
-                            );
+                        if ($aSearch['oNear']) {
+                            $aTerms[] = $aSearch['oNear']->withinSQL('centroid');
 
-                            $aOrder[] = "ST_Distance(centroid, ST_SetSRID(ST_Point(".$aSearch['fLon'].",".$aSearch['fLat']."),4326)) ASC";
+                            $aOrder[] = $aSearch['oNear']->distanceSQL('centroid');
                         }
                         if (sizeof($this->aExcludePlaceIDs)) {
                             $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
@@ -1371,7 +1365,9 @@ class Geocode
                         }
 
                         if ($bBoundingBoxSearch) $aTerms[] = "centroid && $this->sViewboxSmallSQL";
-                        if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) ASC";
+                        if ($oNearPoint) {
+                            $aOrder[] = $oNearPoint->distanceSQL('centroid');
+                        }
 
                         if ($aSearch['sHouseNumber']) {
                             $sImportanceSQL = '- abs(26 - address_rank) + 3';
@@ -1452,7 +1448,7 @@ class Geocode
                                 // do we need to use transliteration and the regex for housenumbers???
                                 //new query for lines, not housenumbers anymore
                                 $sSQL = "SELECT distinct place_id FROM location_property_osmline";
-                                $sSQL .= " WHERE parent_place_id in (".$sPlaceIDs.") and (";
+                                $sSQL .= " WHERE startnumber is not NULL and parent_place_id in (".$sPlaceIDs.") and (";
                                 if ($searchedHousenumber%2 == 0) {
                                     //if housenumber is even, look for housenumber in streets with interpolationtype even or all
                                     $sSQL .= "interpolationtype='even'";
@@ -1579,9 +1575,13 @@ class Geocode
                                         $fRange = 0.05;
 
                                         $sOrderBySQL = '';
-                                        if ($sNearPointSQL) $sOrderBySQL = "ST_Distance($sNearPointSQL, l.centroid)";
-                                        elseif ($sPlaceIDs) $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)";
-                                        elseif ($sPlaceGeom) $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)";
+                                        if ($oNearPoint) {
+                                            $sOrderBySQL = $oNearPoint->distanceSQL('l.centroid');
+                                        } elseif ($sPlaceIDs) {
+                                            $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)";
+                                        } elseif ($sPlaceGeom) {
+                                            $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)";
+                                        }
 
                                         $sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l";
                                         if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)";
@@ -1603,11 +1603,16 @@ class Geocode
                                         if (CONST_Debug) var_dump($sSQL);
                                         $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL)));
                                     } else {
-                                        if (isset($aSearch['fRadius']) && $aSearch['fRadius']) $fRange = $aSearch['fRadius'];
+                                        if ($aSearch['oNear']) {
+                                            $fRange = $aSearch['oNear']->radius();
+                                        }
 
                                         $sOrderBySQL = '';
-                                        if ($sNearPointSQL) $sOrderBySQL = "ST_Distance($sNearPointSQL, l.geometry)";
-                                        else $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)";
+                                        if ($oNearPoint) {
+                                            $sOrderBySQL = $oNearPoint->distanceSQL('l.geometry');
+                                        } else {
+                                            $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)";
+                                        }
 
                                         $sSQL = "SELECT distinct l.place_id".($sOrderBysSQL?','.$sOrderBysSQL:'');
                                         $sSQL .= " FROM placex as l, placex as f ";
@@ -1671,7 +1676,7 @@ class Geocode
                     $sSQL .= "  SELECT place_id ";
                     $sSQL .= "  FROM location_property_osmline ";
                     $sSQL .= "  WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")";
-                    $sSQL .= "    AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)";
+                    $sSQL .= "    AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)";
                     if (CONST_Debug) var_dump($sSQL);
                     $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     $tempIDs = array();
@@ -1697,8 +1702,8 @@ class Geocode
             $oReverse->setZoom(18);
 
             $aLookup = $oReverse->lookup(
-                (float)$this->aNearPoint[0],
-                (float)$this->aNearPoint[1],
+                $oNearPoint->lat(),
+                $oNearPoint->lon(),
                 false
             );
 
@@ -1726,7 +1731,7 @@ class Geocode
         $aClassType = getClassTypesWithImportance();
         $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
         foreach ($aRecheckWords as $i => $sWord) {
-            if (!preg_match('/\pL/', $sWord)) unset($aRecheckWords[$i]);
+            if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
         }
 
         if (CONST_Debug) {