X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/0d2cdb5c2f2cb6836e64d99c04a51c561c0ab7ac..b2871268cde63d2dd4d92c1313c396afb2807933:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index adae9f77..16fba050 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -25,7 +25,7 @@ class Geocode protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -709,6 +709,8 @@ class Geocode Score how good the search is so they can be ordered */ + $iGlobalRank = 0; + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; @@ -745,6 +747,11 @@ class Geocode $aSearch['iSearchRank'] += 5; } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + // If it is at the beginning, we can be almost sure that this is the wrong order + // Increase score for all searches. + if ($iToken == 0 && $iPhrase == 0) { + $iGlobalRank++; + } } } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both @@ -774,8 +781,8 @@ class Geocode // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; - // also housenumbers should appear in the first or second phrase - if ($iPhrase > 1) $aSearch['iSearchRank'] += 1; + // also must not appear in the middle of the address + if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; /* // Fall back to not searching for this item (better than nothing) @@ -852,7 +859,8 @@ class Geocode } } - if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) { + if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch']) + && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank'] += 1; if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; @@ -922,6 +930,7 @@ class Geocode continue; } } + $aSearch['iSearchRank'] += $iGlobalRank; $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; } ksort($aGroupedSearches); @@ -1390,19 +1399,20 @@ class Geocode // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { + /*if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } + }*/ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";