X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/032f24bfefcd6c8b5a2211a361430f5523150ab1..5b4bbab9be984706313d9273d029df42f5762b54:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 7ea39e49..0546983f 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -2,6 +2,7 @@ namespace Nominatim; +require_once(CONST_BasePath.'/lib/NearPoint.php'); require_once(CONST_BasePath.'/lib/PlaceLookup.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); @@ -32,7 +33,6 @@ class Geocode protected $bFallback = false; protected $aCountryCodes = false; - protected $aNearPoint = false; protected $bBoundedSearch = false; protected $aViewBox = false; @@ -67,19 +67,45 @@ class Geocode $this->aLangPrefOrder = $aLangPref; } - public function getIncludeAddressDetails() + public function getMoreUrlParams() { - return $this->bIncludeAddressDetails; - } + if ($this->aStructuredQuery) { + $aParams = $this->aStructuredQuery; + } else { + $aParams = array('q' => $this->sQuery); + } - public function getIncludeExtraTags() - { - return $this->bIncludeExtraTags; - } + if ($this->aExcludePlaceIDs) { + $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs); + } - public function getIncludeNameDetails() - { - return $this->bIncludeNameDetails; + if ($this->bIncludeAddressDetails) $aParams['addressdetails'] = '1'; + if ($this->bIncludeExtraTags) $aParams['extratags'] = '1'; + if ($this->bIncludeNameDetails) $aParams['namedetails'] = '1'; + + if ($this->bIncludePolygonAsPoints) $aParams['polygon'] = '1'; + if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1'; + if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1'; + if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1'; + if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1'; + + if ($this->fPolygonSimplificationThreshold > 0.0) { + $aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold; + } + + if ($this->bBoundedSearch) $aParams['bounded'] = '1'; + if (!$this->bDeDupe) $aParams['dedupe'] = '0'; + + if ($this->aCountryCodes) { + $aParams['countrycodes'] = implode(',', $this->aCountryCodes); + } + + if ($this->aViewBox) { + $aParams['viewbox'] = $this->aViewBox[0].','.$this->aViewBox[3] + .','.$this->aViewBox[2].','.$this->aViewBox[1]; + } + + return $aParams; } public function setIncludePolygonAsPoints($b = true) @@ -121,17 +147,6 @@ class Geocode $this->iLimit = $iLimit + min($iLimit, 10); } - public function getExcludedPlaceIDs() - { - return $this->aExcludePlaceIDs; - } - - public function getViewBoxString() - { - if (!$this->aViewBox) return null; - return $this->aViewBox[0].','.$this->aViewBox[3].','.$this->aViewBox[2].','.$this->aViewBox[1]; - } - public function setFeatureType($sFeatureType) { switch ($sFeatureType) { @@ -215,11 +230,6 @@ class Geocode ); } - public function setNearPoint($aNearPoint, $fRadiusDeg = 0.1) - { - $this->aNearPoint = array((float)$aNearPoint[0], (float)$aNearPoint[1], (float)$fRadiusDeg); - } - public function setQuery($sQueryString) { $this->sQuery = $sQueryString; @@ -340,7 +350,7 @@ class Geocode return true; } - public function setStructuredQuery($sAmentiy = false, $sStreet = false, $sCity = false, $sCounty = false, $sState = false, $sCountry = false, $sPostalCode = false) + public function setStructuredQuery($sAmenity = false, $sStreet = false, $sCity = false, $sCounty = false, $sState = false, $sCountry = false, $sPostalCode = false) { $this->sQuery = false; @@ -352,7 +362,7 @@ class Geocode $this->aStructuredQuery = array(); $this->sAllowedTypesSQLList = ''; - $this->loadStructuredAddressElement($sAmentiy, 'amenity', 26, 30, false); + $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); $this->loadStructuredAddressElement($sCity, 'city', 14, 24, false); $this->loadStructuredAddressElement($sCounty, 'county', 9, 13, false); @@ -413,7 +423,7 @@ class Geocode $sSQL .= " rank_address,"; $sSQL .= " min(place_id) AS place_id, "; $sSQL .= " min(parent_place_id) AS parent_place_id, "; - $sSQL .= " calculated_country_code AS country_code, "; + $sSQL .= " country_code, "; $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; $sSQL .= " get_name_by_language(name, $sLanguagePrefArraySQL) AS placename,"; $sSQL .= " get_name_by_language(name, ARRAY['ref']) AS ref,"; @@ -456,7 +466,7 @@ class Geocode $sSQL .= " admin_level, "; $sSQL .= " rank_search, "; $sSQL .= " rank_address, "; - $sSQL .= " calculated_country_code, "; + $sSQL .= " country_code, "; $sSQL .= " importance, "; if (!$this->bDeDupe) $sSQL .= "place_id,"; $sSQL .= " langaddress, "; @@ -466,6 +476,35 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQL."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@ -541,7 +580,7 @@ class Geocode $sSQL .= " 30 AS rank_address, "; $sSQL .= " min(place_id) as place_id, "; $sSQL .= " min(parent_place_id) AS parent_place_id, "; - $sSQL .= " calculated_country_code AS country_code, "; + $sSQL .= " country_code, "; $sSQL .= " get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) AS langaddress, "; $sSQL .= " null AS placename, "; $sSQL .= " null AS ref, "; @@ -566,7 +605,7 @@ class Geocode $sSQL .= " SELECT "; $sSQL .= " osm_id, "; $sSQL .= " place_id, "; - $sSQL .= " calculated_country_code, "; + $sSQL .= " country_code, "; $sSQL .= " CASE "; // interpolate the housenumbers here $sSQL .= " WHEN startnumber != endnumber "; $sSQL .= " THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) "; @@ -585,7 +624,7 @@ class Geocode $sSQL .= " osm_id, "; $sSQL .= " place_id, "; $sSQL .= " housenumber_for_place, "; - $sSQL .= " calculated_country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique + $sSQL .= " country_code "; //is this group by really needed?, place_id + housenumber (in combination) are unique if (!$this->bDeDupe) $sSQL .= ", place_id "; if (CONST_Use_Aux_Location_data) { @@ -643,7 +682,7 @@ class Geocode return $aSearchResults; } - public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery) { /* Calculate all searches using aValidTokens i.e. @@ -656,12 +695,12 @@ class Geocode Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -692,36 +731,33 @@ class Geocode if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['fLat'] === '') { - $aSearch['fLat'] = $aSearchTerm['lat']; - $aSearch['fLon'] = $aSearchTerm['lon']; - $aSearch['fRadius'] = $aSearchTerm['radius']; + if ($aSearch['oNear'] === false) { + $aSearch['oNear'] = new NearPoint( + $aSearchTerm['lat'], + $aSearchTerm['lon'], + $aSearchTerm['radius'] + ); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif ($sPhraseType == 'postalcode') { + } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($aSearchTerm['operator'] == '' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']) == 0)) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = substr($aSearchTerm['word_token'], 1); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($sPhraseType == 'postalcode' || sizeof($aSearch['aName'])) { + $aSearch['sPostcode'] = substr($aSearchTerm['word_token'], 1); + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { if ($aSearch['sHouseNumber'] === '') { @@ -740,13 +776,19 @@ class Geocode */ } } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) { - if ($aSearch['sClass'] === '') { - $aSearch['sOperator'] = $aSearchTerm['operator']; + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($aSearch['sClass'] === '' + && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { $aSearch['sClass'] = $aSearchTerm['class']; $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + if ($aSearchTerm['operator'] == '') { + $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near'; + $aSearch['iSearchRank'] += 2; + } else { + $aSearch['sOperator'] = 'near'; // near = in for the moment + } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } @@ -899,7 +941,14 @@ class Geocode public function lookup() { - if (!$this->sQuery && !$this->aStructuredQuery) return false; + if (!$this->sQuery && !$this->aStructuredQuery) return array(); + + $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + if ($oNormalizer !== null) { + $sNormQuery = $oNormalizer->transliterate($this->sQuery); + } else { + $sNormQuery = null; + } $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; @@ -908,6 +957,9 @@ class Geocode } $sQuery = $this->sQuery; + if (!preg_match('//u', $sQuery)) { + userError("Query string is not UTF-8 encoded."); + } // Conflicts between US state abreviations and various words for 'the' in different languages if (isset($this->aLangPrefOrder['name:en'])) { @@ -933,8 +985,9 @@ class Geocode } // Do we have anything that looks like a lat/lon pair? - if ($aLooksLike = looksLikeLatLonPair($sQuery)) { - $this->setNearPoint(array($aLooksLike['lat'], $aLooksLike['lon'])); + $oNearPoint = false; + if ($aLooksLike = NearPoint::extractFromQuery($sQuery)) { + $oNearPoint = $aLooksLike['pt']; $sQuery = $aLooksLike['query']; } @@ -956,21 +1009,11 @@ class Geocode 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', - 'fLat' => '', - 'fLon' => '', - 'fRadius' => '' + 'sPostcode' => '', + 'oNear' => $oNearPoint ) ); - // Do we have a radius search? - $sNearPointSQL = false; - if ($this->aNearPoint) { - $sNearPointSQL = "ST_SetSRID(ST_Point(".(float)$this->aNearPoint[1].",".(float)$this->aNearPoint[0]."),4326)"; - $aSearches[0]['fLat'] = (float)$this->aNearPoint[0]; - $aSearches[0]['fLon'] = (float)$this->aNearPoint[1]; - $aSearches[0]['fRadius'] = (float)$this->aNearPoint[2]; - } - // Any 'special' terms in the search? $bSpecialTerms = false; preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); @@ -1091,21 +1134,9 @@ class Geocode } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { @@ -1134,7 +1165,7 @@ class Geocode // array with: placeid => -1 | tiger-housenumber $aResultPlaceIDs = array(); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@ -1146,7 +1177,7 @@ class Geocode $aFinalPhrase = end($aPhrases); $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@ -1229,13 +1260,16 @@ class Geocode if (CONST_Debug) echo "