X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/cf5ced6618c0c18eed4cafc2287e8c3813c9131f..17475b99bbf94479e54c7b480031eb71536a1cec:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 2c736475..2d5d3394 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -12,10 +12,11 @@ protected $bIncludePolygonAsGeoJSON = false; protected $bIncludePolygonAsKML = false; protected $bIncludePolygonAsSVG = false; + protected $fPolygonSimplificationThreshold = 0.0; protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -102,6 +103,11 @@ $this->bIncludePolygonAsSVG = $b; } + function setPolygonSimplificationThreshold($f) + { + $this->fPolygonSimplificationThreshold = $f; + } + function setDeDupe($bDeDupe = true) { $this->bDeDupe = (bool)$bDeDupe; @@ -222,9 +228,12 @@ foreach(explode(',',$aParams['exclude_place_ids']) as $iExcludedPlaceID) { $iExcludedPlaceID = (int)$iExcludedPlaceID; - if ($iExcludedPlaceID) $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + if ($iExcludedPlaceID) + $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; } - $this->aExcludePlaceIDs = $aExcludePlaceIDs; + + if (isset($aExcludePlaceIDs)) + $this->aExcludePlaceIDs = $aExcludePlaceIDs; } // Only certain ranks of feature @@ -413,6 +422,7 @@ $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id "; + /* $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; @@ -427,6 +437,7 @@ $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; + */ } $sSQL .= " order by importance desc"; @@ -526,7 +537,7 @@ if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; } @@ -575,18 +586,6 @@ else $aSearch['sOperator'] = 'near'; // near = in for the moment if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; - // Do we have a shortcut id? - if ($aSearch['sOperator'] == 'name') - { - $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')"; - if ($iAmenityID = $this->oDB->getOne($sSQL)) - { - $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID); - $aSearch['aName'][$iAmenityID] = $iAmenityID; - $aSearch['sClass'] = ''; - $aSearch['sType'] = ''; - } - } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } @@ -594,7 +593,7 @@ { if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; } @@ -632,7 +631,7 @@ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version? + elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? { $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; $aSearch['iSearchRank'] += 1; @@ -765,7 +764,6 @@ if (!$this->sQuery && !$this->aStructuredQuery) return false; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$this->aLangPrefOrder))."]"; - $sCountryCodesSQL = false; if ($this->aCountryCodes && sizeof($this->aCountryCodes)) { @@ -868,10 +866,10 @@ preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); $aSpecialTerms = array(); - if (isset($aStructuredQuery['amenity']) && $aStructuredQuery['amenity']) + if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { - $aSpecialTermsRaw[] = array('['.$aStructuredQuery['amenity'].']', $aStructuredQuery['amenity']); - unset($aStructuredQuery['amenity']); + $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); + unset($this->aStructuredQuery['amenity']); } foreach($aSpecialTermsRaw as $aSpecialTerm) { @@ -969,8 +967,8 @@ foreach($aDatabaseWords as $aToken) { // Very special case - require 2 letter country param to match the country code found - if ($bStructuredPhrases && $aToken['country_code'] && !empty($aStructuredQuery['country']) - && strlen($aStructuredQuery['country']) == 2 && strtolower($aStructuredQuery['country']) != $aToken['country_code']) + if ($bStructuredPhrases && $aToken['country_code'] && !empty($this->aStructuredQuery['country']) + && strlen($this->aStructuredQuery['country']) == 2 && strtolower($this->aStructuredQuery['country']) != $aToken['country_code']) { continue; } @@ -1094,7 +1092,7 @@ if (CONST_Debug) var_Dump($aGroupedSearches); - if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0) + if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach($aCopyGroupedSearches as $iGroup => $aSearches) @@ -1240,7 +1238,7 @@ $aTerms = array(); $aOrder = array(); - if ($aSearch['sHouseNumber']) + if ($aSearch['sHouseNumber'] && sizeof($aSearch['aAddress'])) { $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; $aOrder[] = "exists(select place_id from placex where parent_place_id = search_name.place_id and transliteration(housenumber) ~* E'".$sHouseNumberRegex."' limit 1) desc"; @@ -1249,7 +1247,7 @@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address @@ -1257,12 +1255,13 @@ sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; + //if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@ -1372,6 +1371,7 @@ $aPlaceIDs = $this->oDB->getCol($sSQL); // If not try the aux fallback table + /* if (!sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; @@ -1383,6 +1383,7 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); } + */ if (!sizeof($aPlaceIDs)) { @@ -1591,9 +1592,11 @@ $aRecheckWords = preg_split('/\b[\s,\\-]*/u',$sQuery); foreach($aRecheckWords as $i => $sWord) { - if (!$sWord) unset($aRecheckWords[$i]); + if (!preg_match('/\pL/', $sWord)) unset($aRecheckWords[$i]); } + if (CONST_Debug) { echo 'Recheck words:<\i>'; var_dump($aRecheckWords); } + foreach($aSearchResults as $iResNum => $aResult) { if (CONST_Search_AreaPolygons) @@ -1601,13 +1604,22 @@ // Get the bounding box and outline polygon $sSQL = "select place_id,0 as numfeatures,st_area(geometry) as area,"; $sSQL .= "ST_Y(centroid) as centrelat,ST_X(centroid) as centrelon,"; - $sSQL .= "ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),4)) as minlat,ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),2)) as maxlat,"; - $sSQL .= "ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),1)) as minlon,ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),3)) as maxlon"; + $sSQL .= "ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,"; + $sSQL .= "ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon"; if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ",ST_AsGeoJSON(geometry) as asgeojson"; if ($this->bIncludePolygonAsKML) $sSQL .= ",ST_AsKML(geometry) as askml"; if ($this->bIncludePolygonAsSVG) $sSQL .= ",ST_AsSVG(geometry) as assvg"; if ($this->bIncludePolygonAsText || $this->bIncludePolygonAsPoints) $sSQL .= ",ST_AsText(geometry) as astext"; - $sSQL .= " from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(Box2D(geometry)) = \'ST_Polygon\''; + $sFrom = " from placex where place_id = ".$aResult['place_id']; + if ($this->fPolygonSimplificationThreshold > 0) + { + $sSQL .= " from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,".$this->fPolygonSimplificationThreshold.") as geometry".$sFrom.") as plx"; + } + else + { + $sSQL .= $sFrom; + } + $aPointPolygon = $this->oDB->getRow($sSQL); if (PEAR::IsError($aPointPolygon)) { @@ -1629,15 +1641,17 @@ if ($this->bIncludePolygonAsPoints) { - // Translate geometary string to point array + // Translate geometry string to point array if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + /* elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + */ elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) { $fRadius = 0.01; @@ -1752,7 +1766,7 @@ if (stripos($sAddress, $sWord)!==false) { $iCountWords++; - if (preg_match("/(^|,)\s*$sWord\s*(,|$)/", $sAddress)) $iCountWords += 0.1; + if (preg_match("/(^|,)\s*".preg_quote($sWord, '/')."\s*(,|$)/", $sAddress)) $iCountWords += 0.1; } } @@ -1771,11 +1785,11 @@ if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) && $aClassType[$aResult['class'].':'.$aResult['type']]['importance']) { - $aResult['foundorder'] = $aResult['foundorder'] + 0.000001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; + $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; } else { - $aResult['foundorder'] = $aResult['foundorder'] + 0.001; + $aResult['foundorder'] += 0.01; } $aSearchResults[$iResNum] = $aResult; }