X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/77e55fade242d9212b342fa5f6f6e02223c5db90..a383f511ada424fb15eb37fbfdf52347e5225d36:/lib/Geocode.php?ds=inline diff --git a/lib/Geocode.php b/lib/Geocode.php index 971eb85d..b0f0c764 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -25,7 +25,7 @@ class Geocode protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -423,7 +423,7 @@ class Geocode $sSQL .= " rank_address,"; $sSQL .= " min(place_id) AS place_id, "; $sSQL .= " min(parent_place_id) AS parent_place_id, "; - $sSQL .= " calculated_country_code AS country_code, "; + $sSQL .= " country_code, "; $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; $sSQL .= " get_name_by_language(name, $sLanguagePrefArraySQL) AS placename,"; $sSQL .= " get_name_by_language(name, ARRAY['ref']) AS ref,"; @@ -466,7 +466,7 @@ class Geocode $sSQL .= " admin_level, "; $sSQL .= " rank_search, "; $sSQL .= " rank_address, "; - $sSQL .= " calculated_country_code, "; + $sSQL .= " country_code, "; $sSQL .= " importance, "; if (!$this->bDeDupe) $sSQL .= "place_id,"; $sSQL .= " langaddress, "; @@ -551,7 +551,7 @@ class Geocode $sSQL .= " 30 AS rank_address, "; $sSQL .= " min(place_id) as place_id, "; $sSQL .= " min(parent_place_id) AS parent_place_id, "; - $sSQL .= " calculated_country_code AS country_code, "; + $sSQL .= " country_code, "; $sSQL .= " get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) AS langaddress, "; $sSQL .= " null AS placename, "; $sSQL .= " null AS ref, "; @@ -576,7 +576,7 @@ class Geocode $sSQL .= " SELECT "; $sSQL .= " osm_id, "; $sSQL .= " place_id, "; - $sSQL .= " calculated_country_code, "; + $sSQL .= " country_code, "; $sSQL .= " CASE "; // interpolate the housenumbers here $sSQL .= " WHEN startnumber != endnumber "; $sSQL .= " THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) "; @@ -595,7 +595,7 @@ class Geocode $sSQL .= " osm_id, "; $sSQL .= " place_id, "; $sSQL .= " housenumber_for_place, "; - $sSQL .= " calculated_country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique + $sSQL .= " country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique if (!$this->bDeDupe) $sSQL .= ", place_id "; if (CONST_Use_Aux_Location_data) { @@ -653,7 +653,7 @@ class Geocode return $aSearchResults; } - public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery) { /* Calculate all searches using aValidTokens i.e. @@ -752,13 +752,19 @@ class Geocode */ } } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) { - if ($aSearch['sClass'] === '') { - $aSearch['sOperator'] = $aSearchTerm['operator']; + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($aSearch['sClass'] === '' + && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { $aSearch['sClass'] = $aSearchTerm['class']; $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + if ($aSearchTerm['operator'] == '') { + $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near'; + $aSearch['iSearchRank'] += 2; + } else { + $aSearch['sOperator'] = 'near'; // near = in for the moment + } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } @@ -913,6 +919,13 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); + $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + if ($oNormalizer !== null) { + $sNormQuery = $oNormalizer->transliterate($this->sQuery); + } else { + $sNormQuery = null; + } + $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@ -1139,7 +1152,7 @@ class Geocode // array with: placeid => -1 | tiger-housenumber $aResultPlaceIDs = array(); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@ -1151,7 +1164,7 @@ class Geocode $aFinalPhrase = end($aPhrases); $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@ -1239,8 +1252,8 @@ class Geocode if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { - $sSQL = "SELECT place_id FROM placex WHERE calculated_country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)"; + $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; + if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -1258,7 +1271,7 @@ class Geocode $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)"; $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; - if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)"; + if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } @@ -1275,7 +1288,7 @@ class Geocode $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; $sSQL .= " WHERE ST_Contains($this->sViewboxLargeSQL, ct.centroid)"; - if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)"; + if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1288,7 +1301,7 @@ class Geocode $sSQL .= " AND type='".$aSearch['sType']."'"; $sSQL .= " AND ST_Contains($this->sViewboxSmallSQL, geometry) "; $sSQL .= " AND linked_place_id is null"; - if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)"; + if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, centroid) ASC"; $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1334,19 +1347,20 @@ class Geocode // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { + /*if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } + }*/ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@ -1534,7 +1548,7 @@ class Geocode $sSQL .= " AND class='".$aSearch['sClass']."' "; $sSQL .= " AND type='".$aSearch['sType']."'"; $sSQL .= " AND linked_place_id is null"; - if ($sCountryCodesSQL) $sSQL .= " AND calculated_country_code in ($sCountryCodesSQL)"; + if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; $sSQL .= " ORDER BY rank_search ASC "; $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1542,8 +1556,9 @@ class Geocode } if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'near') { // & in + $sClassTable = 'place_classtype_'.$aSearch['sClass'].'_'.$aSearch['sType']; $sSQL = "SELECT count(*) FROM pg_tables "; - $sSQL .= "WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'"; + $sSQL .= "WHERE tablename = '$sClassTable'"; $bCacheTable = chksql($this->oDB->getOne($sSQL)); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; @@ -1591,7 +1606,8 @@ class Geocode $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; } - $sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l"; + $sSQL = "select distinct i.place_id".($sOrderBySQL?', i.order_term':'')." from ("; + $sSQL .= "select l.place_id".($sOrderBySQL?','.$sOrderBySQL.' as order_term':'')." from ".$sClassTable." as l"; if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)"; if ($sPlaceIDs) { $sSQL .= ",placex as f where "; @@ -1604,8 +1620,9 @@ class Geocode if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " and l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } - if ($sCountryCodesSQL) $sSQL .= " and lp.calculated_country_code in ($sCountryCodesSQL)"; - if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc"; + if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)"; + $sSQL .= 'limit 300) i '; + if ($sOrderBySQL) $sSQL .= "order by order_term asc"; if ($this->iOffset) $sSQL .= " offset $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1631,7 +1648,7 @@ class Geocode if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " AND l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } - if ($sCountryCodesSQL) $sSQL .= " AND l.calculated_country_code in ($sCountryCodesSQL)"; + if ($sCountryCodesSQL) $sSQL .= " AND l.country_code in ($sCountryCodesSQL)"; if ($sOrderBy) $sSQL .= "ORDER BY ".$OrderBysSQL." ASC"; if ($this->iOffset) $sSQL .= " OFFSET $this->iOffset"; $sSQL .= " limit $this->iLimit";