X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/c9984669a7a385489c072a7245486b0ec319f111..f71478e49c1a9462ca3b94a72d280581d98b8fff:/lib-php/Geocode.php?ds=inline diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index a3883b25..20edd9a5 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -1,4 +1,12 @@ aExcludePlaceIDs); } - if ($this->bBoundedSearch) $aParams['bounded'] = '1'; + if ($this->bBoundedSearch) { + $aParams['bounded'] = '1'; + } if ($this->aCountryCodes) { $aParams['countrycodes'] = implode(',', $this->aCountryCodes); @@ -85,11 +96,14 @@ class Geocode public function setLimit($iLimit = 10) { - if ($iLimit > 50) $iLimit = 50; - if ($iLimit < 1) $iLimit = 1; + if ($iLimit > 50) { + $iLimit = 50; + } elseif ($iLimit < 1) { + $iLimit = 1; + } $this->iFinalLimit = $iLimit; - $this->iLimit = $iLimit + min($iLimit, 10); + $this->iLimit = $iLimit + max($iLimit, 10); } public function setFeatureType($sFeatureType) @@ -176,23 +190,29 @@ class Geocode $this->bFallback = $oParams->getBool('fallback', $this->bFallback); - // List of excluded Place IDs - used for more acurate pageing + // List of excluded Place IDs - used for more accurate pageing $sExcluded = $oParams->getStringList('exclude_place_ids'); if ($sExcluded) { foreach ($sExcluded as $iExcludedPlaceID) { $iExcludedPlaceID = (int)$iExcludedPlaceID; - if ($iExcludedPlaceID) + if ($iExcludedPlaceID) { $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + } } - if (isset($aExcludePlaceIDs)) + if (isset($aExcludePlaceIDs)) { $this->aExcludePlaceIDs = $aExcludePlaceIDs; + } } // Only certain ranks of feature $sFeatureType = $oParams->getString('featureType'); - if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype'); - if ($sFeatureType) $this->setFeatureType($sFeatureType); + if (!$sFeatureType) { + $sFeatureType = $oParams->getString('featuretype'); + } + if ($sFeatureType) { + $this->setFeatureType($sFeatureType); + } // Country code list $sCountries = $oParams->getStringList('countrycodes'); @@ -202,8 +222,9 @@ class Geocode $aCountries[] = strtolower($sCountryCode); } } - if (isset($aCountries)) + if (isset($aCountries)) { $this->aCountryCodes = $aCountries; + } } $aViewbox = $oParams->getStringList('viewboxlbrt'); @@ -255,13 +276,17 @@ class Geocode public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues) { $sValue = trim($sValue); - if (!$sValue) return false; + if (!$sValue) { + return false; + } $this->aStructuredQuery[$sKey] = $sValue; if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) { $this->iMinAddressRank = $iNewMinAddressRank; $this->iMaxAddressRank = $iNewMaxAddressRank; } - if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + if ($aItemListValues) { + $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + } return true; } @@ -295,11 +320,11 @@ class Geocode public function fallbackStructuredQuery() { - if (!$this->aStructuredQuery) return false; - $aParams = $this->aStructuredQuery; - if (count($aParams) == 1) return false; + if (!$aParams || count($aParams) == 1) { + return false; + } $aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state'); @@ -329,50 +354,26 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $oPhrase->getPhraseType(); + $oPosition = new SearchPosition( + $oPhrase->getPhraseType(), + $iPhrase, + count($aPhrases) + ); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset foreach ($aWordset as $iToken => $sToken) { - //echo "
$sToken"; $aNewWordsetSearches = array(); + $oPosition->setTokenPosition($iToken, count($aWordset)); foreach ($aWordsetSearches as $oCurrentSearch) { - //echo ""; - //var_dump($oCurrentSearch); - //echo ""; - - // Tokens with full name matches. - foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $oSearchTerm, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } - } - } - // Look for partial matches. - // Note that there is no point in adding country terms here - // because country is omitted in the address. - if ($sPhraseType != 'country') { - // Allow searching for a word - but at extra cost - foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $sToken, - $oSearchTerm, - (bool) $sPhraseType, - $iPhrase, - $oValidTokens->get(' '.$sToken) + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { + if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) { + $aNewSearches = $oSearchTerm->extendSearch( + $oCurrentSearch, + $oPosition ); foreach ($aNewSearches as $oSearch) { @@ -387,7 +388,6 @@ class Geocode usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); } - //var_Dump('
',count($aWordsetSearches)); exit; $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank')); @@ -395,8 +395,11 @@ class Geocode $aSearchHash = array(); foreach ($aNewPhraseSearches as $iSearch => $aSearch) { $sHash = serialize($aSearch); - if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); - else $aSearchHash[$sHash] = 1; + if (isset($aSearchHash[$sHash])) { + unset($aNewPhraseSearches[$iSearch]); + } else { + $aSearchHash[$sHash] = 1; + } } $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); @@ -417,10 +420,12 @@ class Geocode $iSearchCount = 0; $aSearches = array(); - foreach ($aGroupedSearches as $iScore => $aNewSearches) { + foreach ($aGroupedSearches as $aNewSearches) { $iSearchCount += count($aNewSearches); $aSearches = array_merge($aSearches, $aNewSearches); - if ($iSearchCount > 50) break; + if ($iSearchCount > 50) { + break; + } } } @@ -477,7 +482,9 @@ class Geocode public function lookup() { Debug::newFunction('Geocode::lookup'); - if (!$this->sQuery && !$this->aStructuredQuery) return array(); + if (!$this->sQuery && !$this->aStructuredQuery) { + return array(); + } Debug::printDebugArray('Geocode', $this); @@ -499,26 +506,14 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } - $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); - $sLanguagePrefArraySQL = $this->oDB->getArraySQL( - $this->oDB->getDBQuotedList($this->aLangPrefOrder) - ); - $sQuery = $this->sQuery; if (!preg_match('//u', $sQuery)) { userError('Query string is not UTF-8 encoded.'); } - // Conflicts between US state abreviations and various words for 'the' in different languages - if (isset($this->aLangPrefOrder['name:en'])) { - $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery); - } - // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); @@ -560,15 +555,15 @@ class Geocode if (!empty($aTokens)) { $aNewSearches = array(); + $oPosition = new SearchPosition('', 0, 1); + $oPosition->setTokenPosition(0, 1); + foreach ($aSearches as $oSearch) { foreach ($aTokens as $oToken) { - $oNewSearch = clone $oSearch; - $oNewSearch->setPoiSearch( - $oToken->iOperator, - $oToken->sClass, - $oToken->sType + $aNewSearches = array_merge( + $aNewSearches, + $oToken->extendSearch($oSearch, $oPosition) ); - $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -622,16 +617,15 @@ class Geocode } $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens); - foreach ($aGroupedSearches as $aSearches) { + foreach ($aReverseGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { - if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) { - $aReverseGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); } - $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch; + $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } - $aGroupedSearches = $aReverseGroupedSearches; ksort($aGroupedSearches); } } else { @@ -639,7 +633,9 @@ class Geocode $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { if ($aSearch->getRank() < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); + } $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } @@ -653,7 +649,9 @@ class Geocode $sHash = serialize($aSearch); if (isset($aSearchHash[$sHash])) { unset($aGroupedSearches[$iGroup][$iSearch]); - if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]); + if (empty($aGroupedSearches[$iGroup])) { + unset($aGroupedSearches[$iGroup]); + } } else { $aSearchHash[$sHash] = 1; } @@ -697,7 +695,9 @@ class Geocode } } - if ($iQueryLoop > 20) break; + if ($iQueryLoop > 30) { + break; + } } if (!empty($aResults)) { @@ -772,9 +772,9 @@ class Geocode $aResults = $tempIDs; } - if (!empty($aResults)) break; - if ($iGroupLoop > 4) break; - if ($iQueryLoop > 30) break; + if (!empty($aResults) || $iGroupLoop > 6 || $iQueryLoop > 40) { + break; + } } } else { // Just interpret as a reverse geocode @@ -792,10 +792,8 @@ class Geocode // No results? Done if (empty($aResults)) { - if ($this->bFallback) { - if ($this->fallbackStructuredQuery()) { - return $this->lookup(); - } + if ($this->bFallback && $this->fallbackStructuredQuery()) { + return $this->lookup(); } return array(); @@ -814,7 +812,9 @@ class Geocode $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { - if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); + if (!preg_match('/[\pL\pN]/', $sWord)) { + unset($aRecheckWords[$i]); + } } Debug::printVar('Recheck words', $aRecheckWords); @@ -843,7 +843,9 @@ class Geocode $aResult['importance'] = 0.001; $aResult['foundorder'] = $aResult['addressimportance']; } else { - $aResult['importance'] = max(0.001, $aResult['importance']); + if ($aResult['importance'] == 0) { + $aResult['importance'] = 0.0001; + } $aResult['importance'] *= $this->viewboxImportanceFactor( $aResult['lon'], $aResult['lat'] @@ -872,9 +874,11 @@ class Geocode $iCountWords = 0; $sAddress = $aResult['langaddress']; foreach ($aRecheckWords as $i => $sWord) { - if (stripos($sAddress, $sWord)!==false) { + if (grapheme_stripos($sAddress, $sWord)!==false) { $iCountWords++; - if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1; + if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) { + $iCountWords += 0.1; + } } } @@ -891,15 +895,8 @@ class Geocode $aToFilter = $aSearchResults; $aSearchResults = array(); - $bFirst = true; foreach ($aToFilter as $aResult) { $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id']; - if ($bFirst) { - $fLat = $aResult['lat']; - $fLon = $aResult['lon']; - if (isset($aResult['zoom'])) $iZoom = $aResult['zoom']; - $bFirst = false; - } if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']]) && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']])) ) { @@ -909,7 +906,9 @@ class Geocode } // Absolute limit on number of results - if (count($aSearchResults) >= $this->iFinalLimit) break; + if (count($aSearchResults) >= $this->iFinalLimit) { + break; + } } Debug::printVar('Post-filter results', $aSearchResults);