X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4f49ef07e653e91fae3a75976074833905c79aa8..550523df1bfc8f4d58871aeb7aa7ed0ae2593431:/website/search.php?ds=sidebyside
diff --git a/website/search.php b/website/search.php
index 9941907d..aee77345 100755
--- a/website/search.php
+++ b/website/search.php
@@ -1,8 +1,11 @@
50) $iFinalLimit = 50;
- $iLimit = $iFinalLimit + min($iFinalLimit, 10);
+ $iLimit = $iFinalLimit + min($iFinalLimit, 10);
$iMinAddressRank = 0;
$iMaxAddressRank = 30;
+ $sAllowedTypesSQLList = false;
// Format for output
if (isset($_GET['format']) && ($_GET['format'] == 'html' || $_GET['format'] == 'xml' || $_GET['format'] == 'json' || $_GET['format'] == 'jsonv2'))
@@ -152,12 +156,17 @@
array('postalcode', 16, 25),
);
$aStructuredQuery = array();
+ $sAllowedTypesSQLList = '';
foreach($aStructuredOptions as $aStructuredOption)
{
loadStructuredAddressElement($aStructuredQuery, $iMinAddressRank, $iMaxAddressRank, $_GET, $aStructuredOption[0], $aStructuredOption[1], $aStructuredOption[2]);
}
if (sizeof($aStructuredQuery) > 0) {
$sQuery = join(', ', $aStructuredQuery);
+ if ($iMaxAddressRank < 30)
+ {
+ $sAllowedTypesSQLList = '(\'place\',\'boundary\')';
+ }
}
if ($sQuery)
@@ -318,7 +327,7 @@
{
foreach($aSearchWords as $aSearchTerm)
{
- $aNewSearch = $aSearch;
+ $aNewSearch = $aSearch;
if ($aSearchTerm['country_code'])
{
$aNewSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
@@ -386,7 +395,7 @@
{
// Check which tokens we have, get the ID numbers
- $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator';
+ $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator, search_name_count';
$sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')';
$sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
// $sSQL .= ' group by word_token, word, class, type, location, country_code';
@@ -413,7 +422,7 @@
{
$aValidTokens[$aToken['word_token']] = array($aToken);
}
- if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
+ if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1 + $aToken['search_name_count'];
}
if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
@@ -521,13 +530,18 @@
{
$aSearch = $aCurrentSearch;
$aSearch['iSearchRank']++;
- if (($sPhraseType == '' || $sPhraseType == 'country') && $aSearchTerm['country_code'] !== null && $aSearchTerm['country_code'] != '0')
+ if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0')
{
if ($aSearch['sCountryCode'] === false)
{
$aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
// Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation)
- if ($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) $aSearch['iSearchRank'] += 5;
+ // If reverse order is enabled, it may appear at the beginning as well.
+ if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) &&
+ (!$bReverseInPlan || $iToken > 0 || $iPhrase > 0))
+ {
+ $aSearch['iSearchRank'] += 5;
+ }
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
}
@@ -728,6 +742,42 @@
}
}
+ if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0)
+ {
+ $aCopyGroupedSearches = $aGroupedSearches;
+ foreach($aCopyGroupedSearches as $iGroup => $aSearches)
+ {
+ foreach($aSearches as $iSearch => $aSearch)
+ {
+ $aReductionsList = array($aSearch['aAddress']);
+ $iSearchRank = $aSearch['iSearchRank'];
+ while(sizeof($aReductionsList) > 0)
+ {
+ $iSearchRank += 5;
+ if ($iSearchRank > iMaxRank) break 3;
+ $aNewReductionsList = array();
+ foreach($aReductionsList as $aReductionsWordList)
+ {
+ for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++)
+ {
+ $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1));
+ $aReverseSearch = $aSearch;
+ $aSearch['aAddress'] = $aReductionsWordListResult;
+ $aSearch['iSearchRank'] = $iSearchRank;
+ $aGroupedSearches[$iSearchRank][] = $aReverseSearch;
+ if (sizeof($aReductionsWordListResult) > 0)
+ {
+ $aNewReductionsList[] = $aReductionsWordListResult;
+ }
+ }
+ }
+ $aReductionsList = $aNewReductionsList;
+ }
+ }
+ }
+ ksort($aGroupedSearches);
+ }
+
// Filter out duplicate searches
$aSearchHash = array();
foreach($aGroupedSearches as $iGroup => $aSearches)
@@ -784,12 +834,19 @@
if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
$sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid)";
if ($sCountryCodesSQL) $sSQL .= " and country_code in ($sCountryCodesSQL)";
+ if (sizeof($aExcludePlaceIDs))
+ {
+ $sSQL .= " and place_id not in (".join(',',$aExcludePlaceIDs).")";
+ }
if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc";
$sSQL .= " limit $iLimit";
if (CONST_Debug) var_dump($sSQL);
$aPlaceIDs = $oDB->getCol($sSQL);
- if (!sizeof($aPlaceIDs))
+ // If excluded place IDs are given, it is fair to assume that
+ // there have been results in the small box, so no further
+ // expansion in that case.
+ if (!sizeof($aPlaceIDs) && !sizeof($aExcludePlaceIDs))
{
$sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
@@ -818,7 +875,7 @@
if (CONST_Debug) var_dump('
',$aSearch);
if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
$aPlaceIDs = array();
-
+
// First we need a position, either aName or fLat or both
$aTerms = array();
$aOrder = array();
@@ -826,7 +883,20 @@
// TODO: filter out the pointless search terms (2 letter name tokens and less)
// they might be right - but they are just too darned expensive to run
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
- if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+ if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
+ {
+ // For infrequent name terms disable index usage for address
+ if (CONST_Search_NameOnlySearchFrequencyThreshold &&
+ sizeof($aSearch['aName']) == 1 &&
+ $aPossibleMainWordIDs[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
+ {
+ $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
+ }
+ else
+ {
+ $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+ }
+ }
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
if ($aSearch['sHouseNumber']) $aTerms[] = "address_rank in (26,27)";
if ($aSearch['fLon'] && $aSearch['fLat'])
@@ -846,7 +916,7 @@
if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL";
if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc";
- $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.92-(search_rank::float/33) else importance end';
+ $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end';
if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
@@ -999,13 +1069,13 @@
if ($sNearPointSQL) $sOrderBySQL = "ST_Distance($sNearPointSQL, l.centroid)";
else if ($sPlaceIDs) $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)";
else if ($sPlaceGeom) $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)";
-
+
$sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l";
if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)";
if ($sPlaceIDs)
{
$sSQL .= ",placex as f where ";
- $sSQL .= "f.place_id in ($sPlaceIDs) and ST_DWithin(l.centroid, st_centroid(f.geometry), $fRange) ";
+ $sSQL .= "f.place_id in ($sPlaceIDs) and ST_DWithin(l.centroid, f.centroid, $fRange) ";
}
if ($sPlaceGeom)
{
@@ -1032,7 +1102,7 @@
else $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)";
$sSQL = "select distinct l.place_id".($sOrderBysSQL?','.$sOrderBysSQL:'')." from placex as l,placex as f where ";
- $sSQL .= "f.place_id in ( $sPlaceIDs) and ST_DWithin(l.geometry, st_centroid(f.geometry), $fRange) ";
+ $sSQL .= "f.place_id in ( $sPlaceIDs) and ST_DWithin(l.geometry, f.centroid, $fRange) ";
$sSQL .= "and l.class='".$aSearch['sClass']."' and l.type='".$aSearch['sType']."' ";
if (sizeof($aExcludePlaceIDs))
{
@@ -1073,7 +1143,7 @@
if ($iGroupLoop > 4) break;
if ($iQueryLoop > 30) break;
}
-//exit;
+
// Did we find anything?
if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs))
{
@@ -1090,11 +1160,12 @@
$sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
$sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,";
$sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
- $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
+ $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
// $sSQL .= $sOrderSQL." as porder, ";
- $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+ $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
$sSQL .= "from placex where place_id in ($sPlaceIDs) ";
$sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
+ if ($sAllowedTypesSQLList) $sSQL .= "and placex.class in $sAllowedTypesSQLList ";
$sSQL .= "and linked_place_id is null ";
$sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
if (!$bDeDupe) $sSQL .= ",place_id";
@@ -1159,9 +1230,9 @@
$sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
$sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,";
$sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
- $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
+ $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
// $sSQL .= $sOrderSQL." as porder, ";
- $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+ $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
$sSQL .= "from placex where place_id in ($sPlaceIDs) ";
$sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
$sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
@@ -1391,8 +1462,6 @@
}
uasort($aSearchResults, 'byImportance');
-//var_dump($aSearchResults);exit;
-
$aOSMIDDone = array();
$aClassTypeNameDone = array();
$aToFilter = $aSearchResults;
@@ -1434,7 +1503,7 @@
logEnd($oDB, $hLog, sizeof($aToFilter));
}
$sMoreURL = CONST_Website_BaseURL.'search?format='.urlencode($sOutputFormat).'&exclude_place_ids='.join(',',$aExcludePlaceIDs);
- $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
+ if (isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])) $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
if ($bShowPolygons) $sMoreURL .= '&polygon=1';
if ($bShowAddressDetails) $sMoreURL .= '&addressdetails=1';
if (isset($_GET['viewbox']) && $_GET['viewbox']) $sMoreURL .= '&viewbox='.urlencode($_GET['viewbox']);