X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/c798577f70867cd863cdecca90a18dc3b7fd9aa7..5c187182a3119640a65ed1545daef10c7f991f93:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 6632cefb..80561d27 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -6,12 +6,15 @@ protected $aLangPrefOrder = array(); protected $bIncludeAddressDetails = false; + protected $bIncludeExtraTags = false; + protected $bIncludeNameDetails = false; protected $bIncludePolygonAsPoints = false; protected $bIncludePolygonAsText = false; protected $bIncludePolygonAsGeoJSON = false; protected $bIncludePolygonAsKML = false; protected $bIncludePolygonAsSVG = false; + protected $fPolygonSimplificationThreshold = 0.0; protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; @@ -67,6 +70,16 @@ return $this->bIncludeAddressDetails; } + function getIncludeExtraTags() + { + return $this->bIncludeExtraTags; + } + + function getIncludeNameDetails() + { + return $this->bIncludeNameDetails; + } + function setIncludePolygonAsPoints($b = true) { $this->bIncludePolygonAsPoints = $b; @@ -102,6 +115,11 @@ $this->bIncludePolygonAsSVG = $b; } + function setPolygonSimplificationThreshold($f) + { + $this->fPolygonSimplificationThreshold = $f; + } + function setDeDupe($bDeDupe = true) { $this->bDeDupe = (bool)$bDeDupe; @@ -208,6 +226,11 @@ function loadParamArray($aParams) { if (isset($aParams['addressdetails'])) $this->bIncludeAddressDetails = (bool)$aParams['addressdetails']; + if ((float) CONST_Postgresql_Version > 9.2) + { + if (isset($aParams['extratags'])) $this->bIncludeExtraTags = (bool)$aParams['extratags']; + if (isset($aParams['namedetails'])) $this->bIncludeNameDetails = (bool)$aParams['namedetails']; + } if (isset($aParams['bounded'])) $this->bBoundedSearch = (bool)$aParams['bounded']; if (isset($aParams['dedupe'])) $this->bDeDupe = (bool)$aParams['dedupe']; @@ -222,9 +245,12 @@ foreach(explode(',',$aParams['exclude_place_ids']) as $iExcludedPlaceID) { $iExcludedPlaceID = (int)$iExcludedPlaceID; - if ($iExcludedPlaceID) $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + if ($iExcludedPlaceID) + $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; } - $this->aExcludePlaceIDs = $aExcludePlaceIDs; + + if (isset($aExcludePlaceIDs)) + $this->aExcludePlaceIDs = $aExcludePlaceIDs; } // Only certain ranks of feature @@ -380,6 +406,8 @@ $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,"; $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "hstore_to_json(extratags)::text as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "hstore_to_json(name)::text as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; $sSQL .= $sImportanceSQL."coalesce(importance,0.75-(rank_search::float/40)) as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(CASE WHEN placex.rank_search < 28 THEN placex.place_id ELSE placex.parent_place_id END) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; @@ -396,6 +424,8 @@ $sSQL .= ",langaddress "; $sSQL .= ",placename "; $sSQL .= ",ref "; + if ($this->bIncludeExtraTags) $sSQL .= ",extratags"; + if ($this->bIncludeNameDetails) $sSQL .= ",name"; $sSQL .= ",extratags->'place' "; if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) @@ -405,6 +435,8 @@ $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; $sSQL .= $sImportanceSQL."-1.15 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; @@ -418,6 +450,8 @@ $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; $sSQL .= "null as placename,"; $sSQL .= "null as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null as names,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; $sSQL .= $sImportanceSQL."-1.10 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; @@ -526,7 +560,7 @@ if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; } @@ -553,6 +587,8 @@ // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; + // also housenumbers should appear in the first or second phrase + if ($iPhrase > 1) $aSearch['iSearchRank'] += 1; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; /* // Fall back to not searching for this item (better than nothing) @@ -573,18 +609,6 @@ else $aSearch['sOperator'] = 'near'; // near = in for the moment if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; - // Do we have a shortcut id? - if ($aSearch['sOperator'] == 'name') - { - $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')"; - if ($iAmenityID = $this->oDB->getOne($sSQL)) - { - $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID); - $aSearch['aName'][$iAmenityID] = $iAmenityID; - $aSearch['sClass'] = ''; - $aSearch['sType'] = ''; - } - } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } @@ -592,7 +616,7 @@ { if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)) + if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; } @@ -630,7 +654,7 @@ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version? + elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? { $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; $aSearch['iSearchRank'] += 1; @@ -763,7 +787,6 @@ if (!$this->sQuery && !$this->aStructuredQuery) return false; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$this->aLangPrefOrder))."]"; - $sCountryCodesSQL = false; if ($this->aCountryCodes && sizeof($this->aCountryCodes)) { @@ -866,10 +889,10 @@ preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); $aSpecialTerms = array(); - if (isset($aStructuredQuery['amenity']) && $aStructuredQuery['amenity']) + if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { - $aSpecialTermsRaw[] = array('['.$aStructuredQuery['amenity'].']', $aStructuredQuery['amenity']); - unset($aStructuredQuery['amenity']); + $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); + unset($this->aStructuredQuery['amenity']); } foreach($aSpecialTermsRaw as $aSpecialTerm) { @@ -967,8 +990,8 @@ foreach($aDatabaseWords as $aToken) { // Very special case - require 2 letter country param to match the country code found - if ($bStructuredPhrases && $aToken['country_code'] && !empty($aStructuredQuery['country']) - && strlen($aStructuredQuery['country']) == 2 && strtolower($aStructuredQuery['country']) != $aToken['country_code']) + if ($bStructuredPhrases && $aToken['country_code'] && !empty($this->aStructuredQuery['country']) + && strlen($this->aStructuredQuery['country']) == 2 && strtolower($this->aStructuredQuery['country']) != $aToken['country_code']) { continue; } @@ -1054,7 +1077,7 @@ if (sizeof($aPhrases) > 1) { $aFinalPhrase = end($aPhrases); - $aFinalPhrase['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); + $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); } $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); @@ -1092,7 +1115,7 @@ if (CONST_Debug) var_Dump($aGroupedSearches); - if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0) + if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach($aCopyGroupedSearches as $iGroup => $aSearches) @@ -1172,7 +1195,7 @@ $sSQL = "select place_id from placex where calculated_country_code='".$aSearch['sCountryCode']."' and rank_search = 4"; if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) - $sSQL .= " and st_overlaps($this->sViewboxSmallSQL, geometry)"; + $sSQL .= " and _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " order by st_area(geometry) desc limit 1"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); @@ -1238,6 +1261,12 @@ $aTerms = array(); $aOrder = array(); + if ($aSearch['sHouseNumber'] && sizeof($aSearch['aAddress'])) + { + $sHouseNumberRegex = '\\\\m'.$aSearch['sHouseNumber'].'\\\\M'; + $aOrder[] = "exists(select place_id from placex where parent_place_id = search_name.place_id and transliteration(housenumber) ~* E'".$sHouseNumberRegex."' limit 1) desc"; + } + // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; @@ -1318,7 +1347,7 @@ $sSQL .= " where ".join(' and ',$aTerms); $sSQL .= " order by ".join(', ',$aOrder); if ($aSearch['sHouseNumber'] || $aSearch['sClass']) - $sSQL .= " limit 50"; + $sSQL .= " limit 20"; elseif (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && $aSearch['sClass']) $sSQL .= " limit 1"; else @@ -1583,23 +1612,49 @@ $aRecheckWords = preg_split('/\b[\s,\\-]*/u',$sQuery); foreach($aRecheckWords as $i => $sWord) { - if (!$sWord) unset($aRecheckWords[$i]); + if (!preg_match('/\pL/', $sWord)) unset($aRecheckWords[$i]); } + if (CONST_Debug) { echo 'Recheck words:<\i>'; var_dump($aRecheckWords); } + foreach($aSearchResults as $iResNum => $aResult) { + // Default + $fDiameter = 0.0001; + + if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) + && $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) + { + $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']; + } + elseif (isset($aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) + && $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) + { + $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']; + } + $fRadius = $fDiameter / 2; + if (CONST_Search_AreaPolygons) { // Get the bounding box and outline polygon $sSQL = "select place_id,0 as numfeatures,st_area(geometry) as area,"; $sSQL .= "ST_Y(centroid) as centrelat,ST_X(centroid) as centrelon,"; - $sSQL .= "ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),4)) as minlat,ST_Y(ST_PointN(ST_ExteriorRing(Box2D(geometry)),2)) as maxlat,"; - $sSQL .= "ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),1)) as minlon,ST_X(ST_PointN(ST_ExteriorRing(Box2D(geometry)),3)) as maxlon"; + $sSQL .= "ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,"; + $sSQL .= "ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon"; if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ",ST_AsGeoJSON(geometry) as asgeojson"; if ($this->bIncludePolygonAsKML) $sSQL .= ",ST_AsKML(geometry) as askml"; if ($this->bIncludePolygonAsSVG) $sSQL .= ",ST_AsSVG(geometry) as assvg"; if ($this->bIncludePolygonAsText || $this->bIncludePolygonAsPoints) $sSQL .= ",ST_AsText(geometry) as astext"; - $sSQL .= " from placex where place_id = ".$aResult['place_id'].' and st_geometrytype(Box2D(geometry)) = \'ST_Polygon\''; + $sFrom = " from placex where place_id = ".$aResult['place_id']; + if ($this->fPolygonSimplificationThreshold > 0) + { + $sSQL .= " from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,".$this->fPolygonSimplificationThreshold.") as geometry".$sFrom.") as plx"; + } + else + { + $sSQL .= $sFrom; + } + $aPointPolygon = $this->oDB->getRow($sSQL); if (PEAR::IsError($aPointPolygon)) { @@ -1621,7 +1676,7 @@ if ($this->bIncludePolygonAsPoints) { - // Translate geometary string to point array + // Translate geometry string to point array if (preg_match('#POLYGON\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); @@ -1632,18 +1687,13 @@ } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) { - $fRadius = 0.01; - $iSteps = ($fRadius * 40000)^2; + $iSteps = max(8, min(100, ($fRadius * 40000)^2)); $fStepSize = (2*pi())/$iSteps; $aPolyPoints = array(); for($f = 0; $f < 2*pi(); $f += $fStepSize) { $aPolyPoints[] = array('',$aMatch[1]+($fRadius*sin($f)),$aMatch[2]+($fRadius*cos($f))); } - $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius; - $aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius; - $aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius; - $aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius; } } @@ -1656,7 +1706,18 @@ $aResult['aPolyPoints'][] = array($aPoint[1], $aPoint[2]); } } - $aResult['aBoundingBox'] = array($aPointPolygon['minlat'],$aPointPolygon['maxlat'],$aPointPolygon['minlon'],$aPointPolygon['maxlon']); + + if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) + { + $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius; + $aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius; + } + if (abs($aPointPolygon['minlon'] - $aPointPolygon['maxlon']) < 0.0000001) + { + $aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius; + $aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius; + } + $aResult['aBoundingBox'] = array((string)$aPointPolygon['minlat'],(string)$aPointPolygon['maxlat'],(string)$aPointPolygon['minlon'],(string)$aPointPolygon['maxlon']); } } @@ -1669,28 +1730,8 @@ if (!isset($aResult['aBoundingBox'])) { - // Default - $fDiameter = 0.0001; - - if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) - && $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defdiameter']) - { - $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['defzoom']; - } - elseif (isset($aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) - && $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']) - { - $fDiameter = $aClassType[$aResult['class'].':'.$aResult['type']]['defdiameter']; - } - $fRadius = $fDiameter / 2; - $iSteps = max(8,min(100,$fRadius * 3.14 * 100000)); $fStepSize = (2*pi())/$iSteps; - $aPolyPoints = array(); - for($f = 0; $f < 2*pi(); $f += $fStepSize) - { - $aPolyPoints[] = array('',$aResult['lon']+($fRadius*sin($f)),$aResult['lat']+($fRadius*cos($f))); - } $aPointPolygon['minlat'] = $aResult['lat'] - $fRadius; $aPointPolygon['maxlat'] = $aResult['lat'] + $fRadius; $aPointPolygon['minlon'] = $aResult['lon'] - $fRadius; @@ -1699,6 +1740,11 @@ // Output data suitable for display (points and a bounding box) if ($this->bIncludePolygonAsPoints) { + $aPolyPoints = array(); + for($f = 0; $f < 2*pi(); $f += $fStepSize) + { + $aPolyPoints[] = array('',$aResult['lon']+($fRadius*sin($f)),$aResult['lat']+($fRadius*cos($f))); + } $aResult['aPolyPoints'] = array(); foreach($aPolyPoints as $aPoint) { @@ -1735,6 +1781,30 @@ } } + if ($this->bIncludeExtraTags) + { + if ($aResult['extra']) + { + $aResult['sExtraTags'] = json_decode($aResult['extra']); + } + else + { + $aResult['sExtraTags'] = (object) array(); + } + } + + if ($this->bIncludeNameDetails) + { + if ($aResult['names']) + { + $aResult['sNameDetails'] = json_decode($aResult['names']); + } + else + { + $aResult['sNameDetails'] = (object) array(); + } + } + // Adjust importance for the number of exact string matches in the result $aResult['importance'] = max(0.001,$aResult['importance']); $iCountWords = 0; @@ -1744,7 +1814,7 @@ if (stripos($sAddress, $sWord)!==false) { $iCountWords++; - if (preg_match("/(^|,)\s*$sWord\s*(,|$)/", $sAddress)) $iCountWords += 0.1; + if (preg_match("/(^|,)\s*".preg_quote($sWord, '/')."\s*(,|$)/", $sAddress)) $iCountWords += 0.1; } } @@ -1763,12 +1833,13 @@ if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance']) && $aClassType[$aResult['class'].':'.$aResult['type']]['importance']) { - $aResult['foundorder'] = $aResult['foundorder'] + 0.000001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; + $aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance']; } else { - $aResult['foundorder'] = $aResult['foundorder'] + 0.001; + $aResult['foundorder'] += 0.01; } + if (CONST_Debug) { var_dump($aResult); } $aSearchResults[$iResNum] = $aResult; } uasort($aSearchResults, 'byImportance');