]> git.openstreetmap.org Git - nominatim.git/blobdiff - website/search.php
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / website / search.php
index 541d5ec450e9678622fd171ba0d4ad51fb03474f..24f21e12f31d1690aca25e4d3064d3e9b7c136a2 100755 (executable)
@@ -1,8 +1,11 @@
 <?php
+       @define('CONST_ConnectionBucket_PageType', 'Search');
+
        require_once(dirname(dirname(__FILE__)).'/lib/init-website.php');
        require_once(CONST_BasePath.'/lib/log.php');
 
        ini_set('memory_limit', '200M');
+
        $oDB =& getDB();
 
        // Display defaults
        $sSuggestion = $sSuggestionURL = false;
        $bDeDupe = isset($_GET['dedupe'])?(bool)$_GET['dedupe']:true;
        $bReverseInPlan = false;
-       $iLimit = isset($_GET['limit'])?(int)$_GET['limit']:10;
+       $iFinalLimit = isset($_GET['limit'])?(int)$_GET['limit']:10;
        $iOffset = isset($_GET['offset'])?(int)$_GET['offset']:0;
        $iMaxRank = 20;
-       if ($iLimit > 100) $iLimit = 100;
+       if ($iFinalLimit > 50) $iFinalLimit = 50;
+       $iLimit = $iFinalLimit + min($iFinalLimit, 10);
        $iMinAddressRank = 0;
        $iMaxAddressRank = 30;
+       $sAllowedTypesSQLList = false;
 
        // Format for output
        if (isset($_GET['format']) && ($_GET['format'] == 'html' || $_GET['format'] == 'xml' || $_GET['format'] == 'json' ||  $_GET['format'] == 'jsonv2'))
 
        // Show / use polygons
        $bShowPolygons = (boolean)isset($_GET['polygon']) && $_GET['polygon'];
-       $bAsGeoJSON = (boolean)isset($_GET['polygon_geojson']) && $_GET['polygon_geojson'];
-       $bAsKML = (boolean)isset($_GET['polygon_kml']) && $_GET['polygon_kml'];
-       $bAsSVG = (boolean)isset($_GET['polygon_svg']) && $_GET['polygon_svg'];
-       $bAsText = (boolean)isset($_GET['polygon_text']) && $_GET['polygon_text'];
-       if ((($bShowPolygons?1:0)  
-          + ($bAsGeoJSON?1:0) 
-          + ($bAsKML?1:0) 
-          + ($bAsSVG?1:0) 
-          + ($bAsTEXT?1:0) 
-               ) > CONST_PolygonOutput_MaximumTypes) {
-               if (CONST_PolygonOutput_MaximumTypes) {
-                       userError("Select only ".CONST_PolygonOutput_MaximumTypes." polgyon output option");
-               } else {
-                       userError("Polygon output is disabled");
+    if ($sOutputFormat == 'html') {
+               $bAsText = $bShowPolygons;
+               $bShowPolygons = false;
+               $bAsGeoJSON = false;
+               $bAsKML = false;
+               $bAsSVG = false;
+       } else {
+               $bAsGeoJSON = (boolean)isset($_GET['polygon_geojson']) && $_GET['polygon_geojson'];
+               $bAsKML = (boolean)isset($_GET['polygon_kml']) && $_GET['polygon_kml'];
+               $bAsSVG = (boolean)isset($_GET['polygon_svg']) && $_GET['polygon_svg'];
+               $bAsText = (boolean)isset($_GET['polygon_text']) && $_GET['polygon_text'];
+               if ((($bShowPolygons?1:0)
+                  + ($bAsGeoJSON?1:0)
+                  + ($bAsKML?1:0)
+                  + ($bAsSVG?1:0)
+                  + ($bAsText?1:0)
+                       ) > CONST_PolygonOutput_MaximumTypes) {
+                       if (CONST_PolygonOutput_MaximumTypes) {
+                               userError("Select only ".CONST_PolygonOutput_MaximumTypes." polgyon output option");
+                       } else {
+                               userError("Polygon output is disabled");
+                       }
+                       exit;
                }
-               exit;
        }
 
        // Show address breakdown
@@ -58,6 +71,7 @@
        if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
+       if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true;
 
        $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
 
                                array('postalcode', 16, 25),
                                );
        $aStructuredQuery = array();
+       $sAllowedTypesSQLList = '';
        foreach($aStructuredOptions as $aStructuredOption)
        {
                loadStructuredAddressElement($aStructuredQuery, $iMinAddressRank, $iMaxAddressRank, $_GET, $aStructuredOption[0], $aStructuredOption[1], $aStructuredOption[2]);
        }
        if (sizeof($aStructuredQuery) > 0) {
                $sQuery = join(', ', $aStructuredQuery);
+               $sAllowedTypesSQLList = '(\'place\',\'boundary\')';
        }
 
        if ($sQuery)
                                failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL);
                        }
                        $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry";
+                       $bBoundingBoxSearch = true;
                }
 
                // Do we have anything that looks like a lat/lon pair?
                                        }
                                }
 
+                               if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0)
+                               {
+                                       $aCopyGroupedSearches = $aGroupedSearches;
+                                       foreach($aCopyGroupedSearches as $iGroup => $aSearches)
+                                       {
+                                               foreach($aSearches as $iSearch => $aSearch)
+                                               {
+                                                       $aReductionsList = array($aSearch['aAddress']);
+                                                       $iSearchRank = $aSearch['iSearchRank'];
+                                                       while(sizeof($aReductionsList) > 0)
+                                                       {
+                                                               $iSearchRank += 5;
+                                                               if ($iSearchRank > iMaxRank) break 3;
+                                                               $aNewReductionsList = array();
+                                                               foreach($aReductionsList as $aReductionsWordList)
+                                                               {
+                                                                       for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++)
+                                                                       {
+                                                                               $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1));
+                                                                               $aReverseSearch = $aSearch;
+                                                                               $aSearch['aAddress'] = $aReductionsWordListResult;
+                                                                               $aSearch['iSearchRank'] = $iSearchRank;
+                                                                               $aGroupedSearches[$iSearchRank][] = $aReverseSearch;
+                                                                               if (sizeof($aReductionsWordListResult) > 0)
+                                                                               {
+                                                                                       $aNewReductionsList[] = $aReductionsWordListResult;
+                                                                               }
+                                                                       }
+                                                               }
+                                                               $aReductionsList = $aNewReductionsList;
+                                                       }
+                                               }
+                                       }
+                                       ksort($aGroupedSearches);
+                               }
+
                                // Filter out duplicate searches
                                $aSearchHash = array();
                                foreach($aGroupedSearches as $iGroup => $aSearches)
                                                // Must have a location term
                                                if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['fLon'])
                                                {
-                                                       if ($aSearch['sCountryCode'] && !$aSearch['sClass'])
+                                                       if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'])
                                                        {
                                                                if (4 >= $iMinAddressRank && 4 <= $iMaxAddressRank)
                                                                {
                                                                if (!$aSearch['sClass']) continue;
                                                                if (CONST_Debug) var_dump('<hr>',$aSearch);
                                                                if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);     
-
                                                                $sSQL = "select count(*) from pg_tables where tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'";
                                                                if ($oDB->getOne($sSQL))
                                                                {
                                                                $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
                                                                if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
-                                                               $sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid) and linked_place_id is null";
+                                                               $sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid)";
                                                                if ($sCountryCodesSQL) $sSQL .= " and country_code in ($sCountryCodesSQL)";                                                             
                                                                if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc";
                                                                $sSQL .= " limit $iLimit";
                                                                {
                                                                        $sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
                                                                        if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
-                                                                       $sSQL .= " where st_contains($sViewboxLargeSQL, ct.centroid) and linked_place_id is null";
+                                                                       $sSQL .= " where st_contains($sViewboxLargeSQL, ct.centroid)";
                                                                        if ($sCountryCodesSQL) $sSQL .= " and country_code in ($sCountryCodesSQL)";                                                             
                                                                        if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc";
                                                                        $sSQL .= " limit $iLimit";
                                                        if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL";
                                                        if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc";
 
-                                                       $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.92-(search_rank::float/33) else importance end';
+                                                       $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end';
 
                                                        if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
                                                        if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
                                                                        $sSQL .= " limit ".$iLimit;
 
                                                                if (CONST_Debug) var_dump($sSQL);
+                                                               $iStartTime = time();
                                                                $aViewBoxPlaceIDs = $oDB->getAll($sSQL);
                                                                if (PEAR::IsError($aViewBoxPlaceIDs))
                                                                {
                                                                        failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
                                                                }
+                                                               if (time() - $iStartTime > 60) {
+                                                                       file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND);
+                                                               }
+
 //var_dump($aViewBoxPlaceIDs);
                                                                // Did we have an viewbox matches?
                                                                $aPlaceIDs = array();
                                                        {
                                                                $sPlaceIDs = join(',',$aPlaceIDs);
 
+                                                               $aClassPlaceIDs = array();
+
                                                                if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'name')
                                                                {
                                                                        // If they were searching for a named class (i.e. 'Kings Head pub') then we might have an extra match
                                                                        if ($sCountryCodesSQL) $sSQL .= " and country_code in ($sCountryCodesSQL)";                                                             
                                                                        $sSQL .= " order by rank_search asc limit $iLimit";
                                                                        if (CONST_Debug) var_dump($sSQL);
-                                                                       $aPlaceIDs = $oDB->getCol($sSQL);
+                                                                       $aClassPlaceIDs = $oDB->getCol($sSQL);
                                                                }
                                                                
                                                                if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'near') // & in
                                                                        if ($iMaxRank < 9 && $bCacheTable)
                                                                        {
                                                                                // Try and get a polygon to search in instead
-       $sSQL = "select geometry from placex where place_id in ($sPlaceIDs) and rank_search < $iMaxRank + 5 and st_geometrytype(geometry) in ('ST_Polygon','ST_MultiPolygon') order by rank_search asc limit 1";
-       if (CONST_Debug) var_dump($sSQL);
-       $sPlaceGeom = $oDB->getOne($sSQL);
+                                                                               $sSQL = "select geometry from placex where place_id in ($sPlaceIDs) and rank_search < $iMaxRank + 5 and st_geometrytype(geometry) in ('ST_Polygon','ST_MultiPolygon') order by rank_search asc limit 1";
+                                                                               if (CONST_Debug) var_dump($sSQL);
+                                                                               $sPlaceGeom = $oDB->getOne($sSQL);
                                                                        }
                                                                        
                                                                        if ($sPlaceGeom)
                                                                        else
                                                                        {
                                                                                $iMaxRank += 5;
-                                                                       $sSQL = "select place_id from placex where place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
-                                                                       if (CONST_Debug) var_dump($sSQL);
-                                                                       $aPlaceIDs = $oDB->getCol($sSQL);
-                                                                       $sPlaceIDs = join(',',$aPlaceIDs);
+                                                                               $sSQL = "select place_id from placex where place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
+                                                                               if (CONST_Debug) var_dump($sSQL);
+                                                                               $aPlaceIDs = $oDB->getCol($sSQL);
+                                                                               $sPlaceIDs = join(',',$aPlaceIDs);
                                                                        }
 
                                                                        if ($sPlaceIDs || $sPlaceGeom)
                                                                                else if ($sPlaceIDs) $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)";
                                                                                else if ($sPlaceGeom) $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)";
                                                                                
-                                                                               $sSQL = "select distinct l.place_id".($sOrderBysSQL?','.$sOrderBysSQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l";
+                                                                               $sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l";
                                                                                if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)";
                                                                                if ($sPlaceIDs)
                                                                                {
                                                                                        $sSQL .= " and l.place_id not in (".join(',',$aExcludePlaceIDs).")";
                                                                                }
                                                                                if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)";
-                                                                               if ($sOrderBy) $sSQL .= "order by ".$OrderBysSQL." asc";
+                                                                               if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc";
                                                                                if ($iOffset) $sSQL .= " offset $iOffset";
                                                                                $sSQL .= " limit $iLimit";
                                                                                if (CONST_Debug) var_dump($sSQL);
-                                                                               $aPlaceIDs = $oDB->getCol($sSQL);
+                                                                               $aClassPlaceIDs = array_merge($aClassPlaceIDs, $oDB->getCol($sSQL));
                                                                        }
                                                                        else
                                                                        {
                                                                                if ($iOffset) $sSQL .= " offset $iOffset";
                                                                                $sSQL .= " limit $iLimit";
                                                                                if (CONST_Debug) var_dump($sSQL);
-                                                                               $aPlaceIDs = $oDB->getCol($sSQL);
+                                                                               $aClassPlaceIDs = array_merge($aClassPlaceIDs, $oDB->getCol($sSQL));
                                                                        }
                                                                        }
                                                                }
+
+                                                               $aPlaceIDs = $aClassPlaceIDs;
+
                                                        }
                                                
                                                }
                                                }
                                                if ($iQueryLoop > 20) break;
                                        }
+
                                        //exit;
-                                       if (sizeof($aResultPlaceIDs)) break;
+                                       if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs)) break;
                                        if ($iGroupLoop > 4) break;
                                        if ($iQueryLoop > 30) break;
                                }
 //exit;
                                // Did we find anything?        
-                               if (sizeof($aResultPlaceIDs))
+                               if (isset($aResultPlaceIDs) && sizeof($aResultPlaceIDs))
                                {
 //var_Dump($aResultPlaceIDs);exit;
                                        // Get the details for display (is this a redundant extra step?)
                                        $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
                                        $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
 //                                     $sSQL .= $sOrderSQL." as porder, ";
-                                       $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+                                       $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
                                        $sSQL .= "from placex where place_id in ($sPlaceIDs) ";
                                        $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
+                                       if ($sAllowedTypesSQLList) $sSQL .= "and placex.class in $sAllowedTypesSQLList ";
                                        $sSQL .= "and linked_place_id is null ";
                                        $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
                                        if (!$bDeDupe) $sSQL .= ",place_id";
                                        $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
                                        $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
 //                                     $sSQL .= $sOrderSQL." as porder, ";
-                                       $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+                                       $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
                                        $sSQL .= "from placex where place_id in ($sPlaceIDs) ";
                                        $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
                                        $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
                        if ($bAsGeoJSON) $sSQL .= ",ST_AsGeoJSON(geometry) as asgeojson";
                        if ($bAsKML) $sSQL .= ",ST_AsKML(geometry) as askml";
                        if ($bAsSVG) $sSQL .= ",ST_AsSVG(geometry) as assvg";
-                       if ($bAsText) $sSQL .= ",ST_AsText(geometry) as astext";
-                       if ($bShowPolygons) $sSQL .= ",ST_AsText(geometry) as outlinestring";
+                       if ($bAsText || $bShowPolygons) $sSQL .= ",ST_AsText(geometry) as astext";
                        $sSQL .= " from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(Box2D(geometry)) = \'ST_Polygon\'';
                        $aPointPolygon = $oDB->getRow($sSQL);
                        if (PEAR::IsError($aPointPolygon))
                                if ($bShowPolygons) 
                                {
                                        // Translate geometary string to point array
-                                       if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['outlinestring'],$aMatch))
+                                       if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch))
                                        {
                                                preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
                                        }
-                                       elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['outlinestring'],$aMatch))
+                                       elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch))
                                        {
                                                preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
                                        }
-                                       elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['outlinestring'],$aMatch))
+                                       elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch))
                                        {
                                                $fRadius = 0.01;
                                                $iSteps = ($fRadius * 40000)^2;
        }
        uasort($aSearchResults, 'byImportance');
 
-//var_dump($aSearchResults);exit;
-       
        $aOSMIDDone = array();
        $aClassTypeNameDone = array();
        $aToFilter = $aSearchResults;
                }
 
                // Absolute limit on number of results
-               if (sizeof($aSearchResults) >= $iLimit) break;
+               if (sizeof($aSearchResults) >= $iFinalLimit) break;
        }
 
        $sDataDate = $oDB->getOne("select TO_CHAR(lastimportdate - '1 day'::interval,'YYYY/MM/DD') from import_status limit 1");
                logEnd($oDB, $hLog, sizeof($aToFilter));
        }
        $sMoreURL = CONST_Website_BaseURL.'search?format='.urlencode($sOutputFormat).'&exclude_place_ids='.join(',',$aExcludePlaceIDs);
-       $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
+       if (isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])) $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
        if ($bShowPolygons) $sMoreURL .= '&polygon=1';
        if ($bShowAddressDetails) $sMoreURL .= '&addressdetails=1';
        if (isset($_GET['viewbox']) && $_GET['viewbox']) $sMoreURL .= '&viewbox='.urlencode($_GET['viewbox']);