]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib-php/Geocode.php
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / lib-php / Geocode.php
index 82892eae6e78bd176bd4724be34de111374474a7..3529d8356e774c61603a2e5cb941b0c076ea0a93 100644 (file)
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
 
 namespace Nominatim;
 
@@ -95,7 +103,7 @@ class Geocode
         }
 
         $this->iFinalLimit = $iLimit;
-        $this->iLimit = $iLimit + min($iLimit, 10);
+        $this->iLimit = $iLimit + max($iLimit, 10);
     }
 
     public function setFeatureType($sFeatureType)
@@ -182,7 +190,7 @@ class Geocode
 
         $this->bFallback = $oParams->getBool('fallback', $this->bFallback);
 
-        // List of excluded Place IDs - used for more acurate pageing
+        // List of excluded Place IDs - used for more accurate pageing
         $sExcluded = $oParams->getStringList('exclude_place_ids');
         if ($sExcluded) {
             foreach ($sExcluded as $iExcludedPlaceID) {
@@ -249,19 +257,21 @@ class Geocode
     public function setQueryFromParams($oParams)
     {
         // Search query
-        $sQuery = $oParams->getString('q');
-        if (!$sQuery) {
-            $this->setStructuredQuery(
-                $oParams->getString('amenity'),
-                $oParams->getString('street'),
-                $oParams->getString('city'),
-                $oParams->getString('county'),
-                $oParams->getString('state'),
-                $oParams->getString('country'),
-                $oParams->getString('postalcode')
-            );
-        } else {
-            $this->setQuery($sQuery);
+        $this->setStructuredQuery(
+            $oParams->getString('amenity'),
+            $oParams->getString('street'),
+            $oParams->getString('city'),
+            $oParams->getString('county'),
+            $oParams->getString('state'),
+            $oParams->getString('country'),
+            $oParams->getString('postalcode')
+        );
+        if (!$this->sQuery) {
+            $sQuery = $oParams->getString('q');
+
+            if ($sQuery) {
+                $this->setQuery($sQuery);
+            }
         }
     }
 
@@ -286,26 +296,28 @@ class Geocode
     {
         $this->sQuery = false;
 
-        // Reset
-        $this->iMinAddressRank = 0;
-        $this->iMaxAddressRank = 30;
-        $this->aAddressRankList = array();
-
-        $this->aStructuredQuery = array();
-        $this->sAllowedTypesSQLList = false;
-
-        $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false);
-        $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false);
-        $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false);
-        $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false);
-        $this->loadStructuredAddressElement($sState, 'state', 8, 8, false);
-        $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11));
-        $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false);
-
-        if (!empty($this->aStructuredQuery)) {
-            $this->sQuery = join(', ', $this->aStructuredQuery);
-            if ($this->iMaxAddressRank < 30) {
-                $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')';
+        if ($sAmenity || $sStreet || $sCity || $sCounty || $sState || $sCountry || $sPostalCode) {
+            // Reset
+            $this->iMinAddressRank = 0;
+            $this->iMaxAddressRank = 30;
+            $this->aAddressRankList = array();
+
+            $this->aStructuredQuery = array();
+            $this->sAllowedTypesSQLList = false;
+
+            $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false);
+            $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false);
+            $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false);
+            $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false);
+            $this->loadStructuredAddressElement($sState, 'state', 8, 8, false);
+            $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11));
+            $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false);
+
+            if (!empty($this->aStructuredQuery)) {
+                $this->sQuery = join(', ', $this->aStructuredQuery);
+                if ($this->iMaxAddressRank < 30) {
+                    $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')';
+                }
             }
         }
     }
@@ -498,7 +510,6 @@ class Geocode
         if ($this->aCountryCodes) {
             $oCtx->setCountryList($this->aCountryCodes);
         }
-        $this->oTokenizer->setCountryRestriction($this->aCountryCodes);
 
         Debug::newSection('Query Preprocessing');
 
@@ -507,13 +518,6 @@ class Geocode
             userError('Query string is not UTF-8 encoded.');
         }
 
-        // Conflicts between US state abreviations and various words for 'the' in different languages
-        if (isset($this->aLangPrefOrder['name:en'])) {
-            $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery);
-            $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery);
-            $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery);
-        }
-
         // Do we have anything that looks like a lat/lon pair?
         $sQuery = $oCtx->setNearPointFromQuery($sQuery);
 
@@ -555,15 +559,15 @@ class Geocode
 
                 if (!empty($aTokens)) {
                     $aNewSearches = array();
+                    $oPosition = new SearchPosition('', 0, 1);
+                    $oPosition->setTokenPosition(0, 1);
+
                     foreach ($aSearches as $oSearch) {
                         foreach ($aTokens as $oToken) {
-                            $oNewSearch = clone $oSearch;
-                            $oNewSearch->setPoiSearch(
-                                $oToken->iOperator,
-                                $oToken->sClass,
-                                $oToken->sType
+                            $aNewSearches = array_merge(
+                                $aNewSearches,
+                                $oToken->extendSearch($oSearch, $oPosition)
                             );
-                            $aNewSearches[] = $oNewSearch;
                         }
                     }
                     $aSearches = $aNewSearches;
@@ -617,16 +621,15 @@ class Geocode
                     }
                     $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
 
-                    foreach ($aGroupedSearches as $aSearches) {
+                    foreach ($aReverseGroupedSearches as $aSearches) {
                         foreach ($aSearches as $aSearch) {
-                            if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
-                                $aReverseGroupedSearches[$aSearch->getRank()] = array();
+                            if (!isset($aGroupedSearches[$aSearch->getRank()])) {
+                                $aGroupedSearches[$aSearch->getRank()] = array();
                             }
-                            $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
+                            $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                         }
                     }
 
-                    $aGroupedSearches = $aReverseGroupedSearches;
                     ksort($aGroupedSearches);
                 }
             } else {
@@ -696,7 +699,7 @@ class Geocode
                         }
                     }
 
-                    if ($iQueryLoop > 20) {
+                    if ($iQueryLoop > 30) {
                         break;
                     }
                 }
@@ -773,7 +776,7 @@ class Geocode
                     $aResults = $tempIDs;
                 }
 
-                if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) {
+                if (!empty($aResults) || $iGroupLoop > 6 || $iQueryLoop > 40) {
                     break;
                 }
             }
@@ -844,7 +847,9 @@ class Geocode
                 $aResult['importance'] = 0.001;
                 $aResult['foundorder'] = $aResult['addressimportance'];
             } else {
-                $aResult['importance'] = max(0.001, $aResult['importance']);
+                if ($aResult['importance'] == 0) {
+                    $aResult['importance'] = 0.0001;
+                }
                 $aResult['importance'] *= $this->viewboxImportanceFactor(
                     $aResult['lon'],
                     $aResult['lat']
@@ -873,7 +878,7 @@ class Geocode
                 $iCountWords = 0;
                 $sAddress = $aResult['langaddress'];
                 foreach ($aRecheckWords as $i => $sWord) {
-                    if (stripos($sAddress, $sWord)!==false) {
+                    if (grapheme_stripos($sAddress, $sWord)!==false) {
                         $iCountWords++;
                         if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
                             $iCountWords += 0.1;