]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
Merge pull request #742 from lonvia/compare-normalized
[nominatim.git] / lib / Geocode.php
index bb8ae65151094d65e42dc4741cc75e1844f22664..17aaf826e2963e2f9405561bafd04604a2fad651 100644 (file)
@@ -67,19 +67,45 @@ class Geocode
         $this->aLangPrefOrder = $aLangPref;
     }
 
-    public function getIncludeAddressDetails()
+    public function getMoreUrlParams()
     {
-        return $this->bIncludeAddressDetails;
-    }
+        if ($this->aStructuredQuery) {
+            $aParams = $this->aStructuredQuery;
+        } else {
+            $aParams = array('q' => $this->sQuery);
+        }
 
-    public function getIncludeExtraTags()
-    {
-        return $this->bIncludeExtraTags;
-    }
+        if ($this->aExcludePlaceIDs) {
+            $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
+        }
 
-    public function getIncludeNameDetails()
-    {
-        return $this->bIncludeNameDetails;
+        if ($this->bIncludeAddressDetails) $aParams['addressdetails'] = '1';
+        if ($this->bIncludeExtraTags) $aParams['extratags'] = '1';
+        if ($this->bIncludeNameDetails) $aParams['namedetails'] = '1';
+
+        if ($this->bIncludePolygonAsPoints) $aParams['polygon'] = '1';
+        if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
+        if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
+        if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
+        if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1';
+
+        if ($this->fPolygonSimplificationThreshold > 0.0) {
+            $aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold;
+        }
+
+        if ($this->bBoundedSearch) $aParams['bounded'] = '1';
+        if (!$this->bDeDupe) $aParams['dedupe'] = '0';
+
+        if ($this->aCountryCodes) {
+            $aParams['countrycodes'] = implode(',', $this->aCountryCodes);
+        }
+
+        if ($this->aViewBox) {
+            $aParams['viewbox'] = $this->aViewBox[0].','.$this->aViewBox[3]
+                                  .','.$this->aViewBox[2].','.$this->aViewBox[1];
+        }
+
+        return $aParams;
     }
 
     public function setIncludePolygonAsPoints($b = true)
@@ -121,23 +147,6 @@ class Geocode
         $this->iLimit = $iLimit + min($iLimit, 10);
     }
 
-    public function getExcludedPlaceIDs()
-    {
-        return $this->aExcludePlaceIDs;
-    }
-
-
-    public function getCountryCodes()
-    {
-        return $this->aCountryCodes;
-    }
-
-    public function getViewBoxString()
-    {
-        if (!$this->aViewBox) return null;
-        return $this->aViewBox[0].','.$this->aViewBox[3].','.$this->aViewBox[2].','.$this->aViewBox[1];
-    }
-
     public function setFeatureType($sFeatureType)
     {
         switch ($sFeatureType) {
@@ -341,7 +350,7 @@ class Geocode
         return true;
     }
 
-    public function setStructuredQuery($sAmentiy = false, $sStreet = false, $sCity = false, $sCounty = false, $sState = false, $sCountry = false, $sPostalCode = false)
+    public function setStructuredQuery($sAmenity = false, $sStreet = false, $sCity = false, $sCounty = false, $sState = false, $sCountry = false, $sPostalCode = false)
     {
         $this->sQuery = false;
 
@@ -353,7 +362,7 @@ class Geocode
         $this->aStructuredQuery = array();
         $this->sAllowedTypesSQLList = '';
 
-        $this->loadStructuredAddressElement($sAmentiy, 'amenity', 26, 30, false);
+        $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false);
         $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false);
         $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false);
         $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false);
@@ -414,7 +423,7 @@ class Geocode
         $sSQL .= "    rank_address,";
         $sSQL .= "    min(place_id) AS place_id, ";
         $sSQL .= "    min(parent_place_id) AS parent_place_id, ";
-        $sSQL .= "    calculated_country_code AS country_code, ";
+        $sSQL .= "    country_code, ";
         $sSQL .= "    get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,";
         $sSQL .= "    get_name_by_language(name, $sLanguagePrefArraySQL) AS placename,";
         $sSQL .= "    get_name_by_language(name, ARRAY['ref']) AS ref,";
@@ -457,7 +466,7 @@ class Geocode
         $sSQL .= "     admin_level, ";
         $sSQL .= "     rank_search, ";
         $sSQL .= "     rank_address, ";
-        $sSQL .= "     calculated_country_code, ";
+        $sSQL .= "     country_code, ";
         $sSQL .= "     importance, ";
         if (!$this->bDeDupe) $sSQL .= "place_id,";
         $sSQL .= "     langaddress, ";
@@ -542,7 +551,7 @@ class Geocode
             $sSQL .= "  30 AS rank_address, ";
             $sSQL .= "  min(place_id) as place_id, ";
             $sSQL .= "  min(parent_place_id) AS parent_place_id, ";
-            $sSQL .= "  calculated_country_code AS country_code, ";
+            $sSQL .= "  country_code, ";
             $sSQL .= "  get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) AS langaddress, ";
             $sSQL .= "  null AS placename, ";
             $sSQL .= "  null AS ref, ";
@@ -567,7 +576,7 @@ class Geocode
             $sSQL .= "     SELECT ";
             $sSQL .= "         osm_id, ";
             $sSQL .= "         place_id, ";
-            $sSQL .= "         calculated_country_code, ";
+            $sSQL .= "         country_code, ";
             $sSQL .= "         CASE ";             // interpolate the housenumbers here
             $sSQL .= "           WHEN startnumber != endnumber ";
             $sSQL .= "           THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) ";
@@ -586,7 +595,7 @@ class Geocode
             $sSQL .= "    osm_id, ";
             $sSQL .= "    place_id, ";
             $sSQL .= "    housenumber_for_place, ";
-            $sSQL .= "    calculated_country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique
+            $sSQL .= "    country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique
             if (!$this->bDeDupe) $sSQL .= ", place_id ";
 
             if (CONST_Use_Aux_Location_data) {
@@ -644,7 +653,7 @@ class Geocode
         return $aSearchResults;
     }
 
-    public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+    public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
     {
         /*
              Calculate all searches using aValidTokens i.e.
@@ -693,10 +702,12 @@ class Geocode
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) {
-                                    if ($aSearch['fLat'] === '') {
-                                        $aSearch['fLat'] = $aSearchTerm['lat'];
-                                        $aSearch['fLon'] = $aSearchTerm['lon'];
-                                        $aSearch['fRadius'] = $aSearchTerm['radius'];
+                                    if ($aSearch['oNear'] === false) {
+                                        $aSearch['oNear'] = new NearPoint(
+                                            $aSearchTerm['lat'],
+                                            $aSearchTerm['lon'],
+                                            $aSearchTerm['radius']
+                                        );
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
                                 } elseif ($sPhraseType == 'postalcode') {
@@ -741,13 +752,19 @@ class Geocode
                                          */
                                     }
                                 } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) {
-                                    if ($aSearch['sClass'] === '') {
-                                        $aSearch['sOperator'] = $aSearchTerm['operator'];
+                                    // require a normalized exact match of the term
+                                    // if we have the normalizer version of the query
+                                    // available
+                                    if ($aSearch['sClass'] === ''
+                                        && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                         $aSearch['sClass'] = $aSearchTerm['class'];
                                         $aSearch['sType'] = $aSearchTerm['type'];
-                                        if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
-                                        else $aSearch['sOperator'] = 'near'; // near = in for the moment
-                                        if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+                                        if ($aSearchTerm['operator'] == '') {
+                                            $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' :  'near';
+                                            $aSearch['iSearchRank'] += 2;
+                                        } else {
+                                            $aSearch['sOperator'] = 'near'; // near = in for the moment
+                                        }
 
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                     }
@@ -900,7 +917,14 @@ class Geocode
 
     public function lookup()
     {
-        if (!$this->sQuery && !$this->aStructuredQuery) return false;
+        if (!$this->sQuery && !$this->aStructuredQuery) return array();
+
+        $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
+        if ($oNormalizer !== null) {
+            $sNormQuery = $oNormalizer->transliterate($this->sQuery);
+        } else {
+            $sNormQuery = null;
+        }
 
         $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]";
         $sCountryCodesSQL = false;
@@ -961,19 +985,10 @@ class Geocode
                            'sClass' => '',
                            'sType' => '',
                            'sHouseNumber' => '',
-                           'fLat' => '',
-                           'fLon' => '',
-                           'fRadius' => ''
+                           'oNear' => $oNearPoint
                           )
                          );
 
-            // Do we have a radius search?
-            if ($oNearPoint) {
-                $aSearches[0]['fLat'] = $oNearPoint->lat();
-                $aSearches[0]['fLon'] = $oNearPoint->lon();
-                $aSearches[0]['fRadius'] = $oNearPoint->radius();
-            }
-
             // Any 'special' terms in the search?
             $bSpecialTerms = false;
             preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
@@ -1137,7 +1152,7 @@ class Geocode
                 // array with: placeid => -1 | tiger-housenumber
                 $aResultPlaceIDs = array();
 
-                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
+                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
 
                 if ($this->bReverseInPlan) {
                     // Reverse phrase array and also reverse the order of the wordsets in
@@ -1149,7 +1164,7 @@ class Geocode
                         $aFinalPhrase = end($aPhrases);
                         $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
                     }
-                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
+                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
 
                     foreach ($aGroupedSearches as $aSearches) {
                         foreach ($aSearches as $aSearch) {
@@ -1233,12 +1248,12 @@ class Geocode
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
                     // No location term?
-                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['fLon']) {
+                    if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) {
                             // Just looking for a country by code - look it up
                             if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) {
-                                $sSQL = "SELECT place_id FROM placex WHERE calculated_country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
-                                if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)";
+                                $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4";
+                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if ($bBoundingBoxSearch)
                                     $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)";
                                 $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1";
@@ -1248,7 +1263,7 @@ class Geocode
                                 $aPlaceIDs = array();
                             }
                         } else {
-                            if (!$bBoundingBoxSearch && !$aSearch['fLon']) continue;
+                            if (!$bBoundingBoxSearch && !$aSearch['oNear']) continue;
                             if (!$aSearch['sClass']) continue;
 
                             $sSQL = "SELECT COUNT(*) FROM pg_tables WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'";
@@ -1256,7 +1271,7 @@ class Geocode
                                 $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
                                 if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)";
                                 $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)";
-                                if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)";
+                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if (sizeof($this->aExcludePlaceIDs)) {
                                     $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")";
                                 }
@@ -1273,7 +1288,7 @@ class Geocode
                                     $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
                                     if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
                                     $sSQL .= " WHERE ST_Contains($this->sViewboxLargeSQL, ct.centroid)";
-                                    if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)";
+                                    if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                     if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC";
                                     $sSQL .= " LIMIT $this->iLimit";
                                     if (CONST_Debug) var_dump($sSQL);
@@ -1286,14 +1301,14 @@ class Geocode
                                 $sSQL .= "  AND type='".$aSearch['sType']."'";
                                 $sSQL .= "  AND ST_Contains($this->sViewboxSmallSQL, geometry) ";
                                 $sSQL .= "  AND linked_place_id is null";
-                                if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)";
+                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 if ($this->sViewboxCentreSQL)   $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, centroid) ASC";
                                 $sSQL .= " LIMIT $this->iLimit";
                                 if (CONST_Debug) var_dump($sSQL);
                                 $aPlaceIDs = chksql($this->oDB->getCol($sSQL));
                             }
                         }
-                    } elseif ($aSearch['fLon'] && !sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['sClass']) {
+                    } elseif ($aSearch['oNear'] && !sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['sClass']) {
                         // If a coordinate is given, the search must either
                         // be for a name or a special search. Ignore everythin else.
                         $aPlaceIDs = array();
@@ -1358,15 +1373,10 @@ class Geocode
                                 $aTerms[] = "address_rank <= ".$this->iMaxAddressRank;
                             }
                         }
-                        if ($aSearch['fLon'] && $aSearch['fLat']) {
-                            $aTerms[] = sprintf(
-                                'ST_DWithin(centroid, ST_SetSRID(ST_Point(%F,%F),4326), %F)',
-                                $aSearch['fLon'],
-                                $aSearch['fLat'],
-                                $aSearch['fRadius']
-                            );
+                        if ($aSearch['oNear']) {
+                            $aTerms[] = $aSearch['oNear']->withinSQL('centroid');
 
-                            $aOrder[] = "ST_Distance(centroid, ST_SetSRID(ST_Point(".$aSearch['fLon'].",".$aSearch['fLat']."),4326)) ASC";
+                            $aOrder[] = $aSearch['oNear']->distanceSQL('centroid');
                         }
                         if (sizeof($this->aExcludePlaceIDs)) {
                             $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")";
@@ -1537,7 +1547,7 @@ class Geocode
                                 $sSQL .= "   AND class='".$aSearch['sClass']."' ";
                                 $sSQL .= "   AND type='".$aSearch['sType']."'";
                                 $sSQL .= "   AND linked_place_id is null";
-                                if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)";
+                                if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)";
                                 $sSQL .= " ORDER BY rank_search ASC ";
                                 $sSQL .= " LIMIT $this->iLimit";
                                 if (CONST_Debug) var_dump($sSQL);
@@ -1587,7 +1597,7 @@ class Geocode
 
                                         $sOrderBySQL = '';
                                         if ($oNearPoint) {
-                                           $sOrderBySQL = $oNearPoint->distanceSQL('l.centroid');
+                                            $sOrderBySQL = $oNearPoint->distanceSQL('l.centroid');
                                         } elseif ($sPlaceIDs) {
                                             $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)";
                                         } elseif ($sPlaceGeom) {
@@ -1607,14 +1617,16 @@ class Geocode
                                         if (sizeof($this->aExcludePlaceIDs)) {
                                             $sSQL .= " and l.place_id not in (".join(',', $this->aExcludePlaceIDs).")";
                                         }
-                                        if ($sCountryCodesSQL) $sSQL .= " and lp.calculated_country_code in ($sCountryCodesSQL)";
+                                        if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)";
                                         if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc";
                                         if ($this->iOffset) $sSQL .= " offset $this->iOffset";
                                         $sSQL .= " limit $this->iLimit";
                                         if (CONST_Debug) var_dump($sSQL);
                                         $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL)));
                                     } else {
-                                        if (isset($aSearch['fRadius']) && $aSearch['fRadius']) $fRange = $aSearch['fRadius'];
+                                        if ($aSearch['oNear']) {
+                                            $fRange = $aSearch['oNear']->radius();
+                                        }
 
                                         $sOrderBySQL = '';
                                         if ($oNearPoint) {
@@ -1632,7 +1644,7 @@ class Geocode
                                         if (sizeof($this->aExcludePlaceIDs)) {
                                             $sSQL .= " AND l.place_id not in (".join(',', $this->aExcludePlaceIDs).")";
                                         }
-                                        if ($sCountryCodesSQL) $sSQL .= " AND l.calculated_country_code in ($sCountryCodesSQL)";
+                                        if ($sCountryCodesSQL) $sSQL .= " AND l.country_code in ($sCountryCodesSQL)";
                                         if ($sOrderBy) $sSQL .= "ORDER BY ".$OrderBysSQL." ASC";
                                         if ($this->iOffset) $sSQL .= " OFFSET $this->iOffset";
                                         $sSQL .= " limit $this->iLimit";