X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d933ead2b503bd0377b1c8f7c0984f29b23ed908..0278ab7f41bf9bc96a87084c04bfec263c6acc40:/lib-php/Geocode.php diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index 14a3315e..3529d835 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -1,4 +1,12 @@ iFinalLimit = $iLimit; - $this->iLimit = $iLimit + min($iLimit, 10); + $this->iLimit = $iLimit + max($iLimit, 10); } public function setFeatureType($sFeatureType) @@ -181,7 +190,7 @@ class Geocode $this->bFallback = $oParams->getBool('fallback', $this->bFallback); - // List of excluded Place IDs - used for more acurate pageing + // List of excluded Place IDs - used for more accurate pageing $sExcluded = $oParams->getStringList('exclude_place_ids'); if ($sExcluded) { foreach ($sExcluded as $iExcludedPlaceID) { @@ -248,19 +257,21 @@ class Geocode public function setQueryFromParams($oParams) { // Search query - $sQuery = $oParams->getString('q'); - if (!$sQuery) { - $this->setStructuredQuery( - $oParams->getString('amenity'), - $oParams->getString('street'), - $oParams->getString('city'), - $oParams->getString('county'), - $oParams->getString('state'), - $oParams->getString('country'), - $oParams->getString('postalcode') - ); - } else { - $this->setQuery($sQuery); + $this->setStructuredQuery( + $oParams->getString('amenity'), + $oParams->getString('street'), + $oParams->getString('city'), + $oParams->getString('county'), + $oParams->getString('state'), + $oParams->getString('country'), + $oParams->getString('postalcode') + ); + if (!$this->sQuery) { + $sQuery = $oParams->getString('q'); + + if ($sQuery) { + $this->setQuery($sQuery); + } } } @@ -285,26 +296,28 @@ class Geocode { $this->sQuery = false; - // Reset - $this->iMinAddressRank = 0; - $this->iMaxAddressRank = 30; - $this->aAddressRankList = array(); - - $this->aStructuredQuery = array(); - $this->sAllowedTypesSQLList = false; - - $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); - $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); - $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false); - $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false); - $this->loadStructuredAddressElement($sState, 'state', 8, 8, false); - $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11)); - $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false); - - if (!empty($this->aStructuredQuery)) { - $this->sQuery = join(', ', $this->aStructuredQuery); - if ($this->iMaxAddressRank < 30) { - $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')'; + if ($sAmenity || $sStreet || $sCity || $sCounty || $sState || $sCountry || $sPostalCode) { + // Reset + $this->iMinAddressRank = 0; + $this->iMaxAddressRank = 30; + $this->aAddressRankList = array(); + + $this->aStructuredQuery = array(); + $this->sAllowedTypesSQLList = false; + + $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); + $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); + $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false); + $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false); + $this->loadStructuredAddressElement($sState, 'state', 8, 8, false); + $this->loadStructuredAddressElement($sPostalCode, 'postalcode', 5, 11, array(5, 11)); + $this->loadStructuredAddressElement($sCountry, 'country', 4, 4, false); + + if (!empty($this->aStructuredQuery)) { + $this->sQuery = join(', ', $this->aStructuredQuery); + if ($this->iMaxAddressRank < 30) { + $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')'; + } } } } @@ -345,50 +358,26 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $oPhrase->getPhraseType(); + $oPosition = new SearchPosition( + $oPhrase->getPhraseType(), + $iPhrase, + count($aPhrases) + ); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset foreach ($aWordset as $iToken => $sToken) { - //echo "
$sToken"; $aNewWordsetSearches = array(); + $oPosition->setTokenPosition($iToken, count($aWordset)); foreach ($aWordsetSearches as $oCurrentSearch) { - //echo ""; - //var_dump($oCurrentSearch); - //echo ""; - - // Tokens with full name matches. - foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $oSearchTerm, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } - } - } - // Look for partial matches. - // Note that there is no point in adding country terms here - // because country is omitted in the address. - if ($sPhraseType != 'country') { - // Allow searching for a word - but at extra cost - foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $sToken, - $oSearchTerm, - (bool) $sPhraseType, - $iPhrase, - $oValidTokens->get(' '.$sToken) + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { + if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) { + $aNewSearches = $oSearchTerm->extendSearch( + $oCurrentSearch, + $oPosition ); foreach ($aNewSearches as $oSearch) { @@ -403,7 +392,6 @@ class Geocode usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); } - //var_Dump('
',count($aWordsetSearches)); exit; $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank')); @@ -522,7 +510,6 @@ class Geocode if ($this->aCountryCodes) { $oCtx->setCountryList($this->aCountryCodes); } - $this->oTokenizer->setCountryRestriction($this->aCountryCodes); Debug::newSection('Query Preprocessing'); @@ -531,13 +518,6 @@ class Geocode userError('Query string is not UTF-8 encoded.'); } - // Conflicts between US state abreviations and various words for 'the' in different languages - if (isset($this->aLangPrefOrder['name:en'])) { - $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery); - $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery); - } - // Do we have anything that looks like a lat/lon pair? $sQuery = $oCtx->setNearPointFromQuery($sQuery); @@ -579,15 +559,15 @@ class Geocode if (!empty($aTokens)) { $aNewSearches = array(); + $oPosition = new SearchPosition('', 0, 1); + $oPosition->setTokenPosition(0, 1); + foreach ($aSearches as $oSearch) { foreach ($aTokens as $oToken) { - $oNewSearch = clone $oSearch; - $oNewSearch->setPoiSearch( - $oToken->iOperator, - $oToken->sClass, - $oToken->sType + $aNewSearches = array_merge( + $aNewSearches, + $oToken->extendSearch($oSearch, $oPosition) ); - $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -641,16 +621,15 @@ class Geocode } $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens); - foreach ($aGroupedSearches as $aSearches) { + foreach ($aReverseGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { - if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) { - $aReverseGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); } - $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch; + $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } - $aGroupedSearches = $aReverseGroupedSearches; ksort($aGroupedSearches); } } else { @@ -720,7 +699,7 @@ class Geocode } } - if ($iQueryLoop > 20) { + if ($iQueryLoop > 30) { break; } } @@ -797,7 +776,7 @@ class Geocode $aResults = $tempIDs; } - if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) { + if (!empty($aResults) || $iGroupLoop > 6 || $iQueryLoop > 40) { break; } } @@ -868,7 +847,9 @@ class Geocode $aResult['importance'] = 0.001; $aResult['foundorder'] = $aResult['addressimportance']; } else { - $aResult['importance'] = max(0.001, $aResult['importance']); + if ($aResult['importance'] == 0) { + $aResult['importance'] = 0.0001; + } $aResult['importance'] *= $this->viewboxImportanceFactor( $aResult['lon'], $aResult['lat'] @@ -897,7 +878,7 @@ class Geocode $iCountWords = 0; $sAddress = $aResult['langaddress']; foreach ($aRecheckWords as $i => $sWord) { - if (stripos($sAddress, $sWord)!==false) { + if (grapheme_stripos($sAddress, $sWord)!==false) { $iCountWords++; if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) { $iCountWords += 0.1;