X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a0de20e9bcfec679eb929f2bedfe2ac28db1590b..819b858ba70fa01484ec8547f3f34b654aa4181c:/lib/Geocode.php diff --git a/lib/Geocode.php b/lib/Geocode.php index 5a8ff199..ac92257f 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -25,7 +25,7 @@ class Geocode protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -63,7 +63,7 @@ class Geocode private function normTerm($sTerm) { if ($this->oNormalizer === null) { - return null; + return $sTerm; } return $this->oNormalizer->transliterate($sTerm); @@ -313,12 +313,7 @@ class Geocode if (count($aViewbox) != 4) { userError("Bad parmater 'viewbox'. Expected 4 coordinates."); } - $this->setViewBox(array( - $aViewbox[0], - $aViewbox[3], - $aViewbox[2], - $aViewbox[1] - )); + $this->setViewBox($aViewbox); } else { $aRoute = $oParams->getStringList('route'); $fRouteWidth = $oParams->getFloat('routewidth'); @@ -737,6 +732,8 @@ class Geocode // If the token is valid if (isset($aValidTokens[' '.$sToken])) { + // TODO variable should go into aCurrentSearch + $bHavePostcode = false; foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank']++; @@ -749,18 +746,19 @@ class Geocode } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { + } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && - isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + if (!$bHavePostcode && $aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && + isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { // If we have structured search or this is the first term, // make the postcode the primary search element. if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word']; + $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; + $bHavePostcode = true; } // If we have a structured search or this is not the first term, @@ -790,7 +788,7 @@ class Geocode // require a normalized exact match of the term // if we have the normalizer version of the query // available - if ($aSearch['sClass'] === '' + if ($aSearch['sOperator'] === '' && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { $aSearch['sClass'] = $aSearchTerm['class']; $aSearch['sType'] = $aSearchTerm['type']; @@ -915,6 +913,19 @@ class Geocode //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); } + + // Revisit searches, giving penalty to unlikely combinations + $aGroupedSearches = array(); + foreach ($aSearches as $aSearch) { + if (!$aSearch['aName']) { + if ($aSearch['sHouseNumber']) { + continue; + } + } + $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + } + ksort($aGroupedSearches); + return $aGroupedSearches; } @@ -1021,15 +1032,24 @@ class Geocode // Any 'special' terms in the search? $bSpecialTerms = false; - preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - $aSpecialTerms = array(); + preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); foreach ($aSpecialTermsRaw as $aSpecialTerm) { $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - $aSpecialTerms[strtolower($aSpecialTerm[1])] = $aSpecialTerm[2]; + if (!$bSpecialTerms) { + $aNewSearches = array(); + foreach ($aSearches as $aSearch) { + $aNewSearch = $aSearch; + $aNewSearch['sClass'] = $aSpecialTerm[1]; + $aNewSearch['sType'] = $aSpecialTerm[2]; + $aNewSearches[] = $aNewSearch; + } + + $aSearches = $aNewSearches; + $bSpecialTerms = true; + } } preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - $aSpecialTerms = array(); if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); unset($this->aStructuredQuery['amenity']); @@ -1037,32 +1057,28 @@ class Geocode foreach ($aSpecialTermsRaw as $aSpecialTerm) { $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string")); + if ($bSpecialTerms) { + continue; + } + + $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string")); $sSQL = 'SELECT * '; $sSQL .= 'FROM ( '; $sSQL .= ' SELECT word_id, word_token, word, class, type, country_code, operator'; $sSQL .= ' FROM word '; $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')'; $sSQL .= ') AS x '; - $sSQL .= ' WHERE (class is not null AND class not in (\'place\')) '; - $sSQL .= ' OR country_code is not null'; + $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))'; if (CONST_Debug) var_Dump($sSQL); $aSearchWords = chksql($this->oDB->getAll($sSQL)); $aNewSearches = array(); foreach ($aSearches as $aSearch) { foreach ($aSearchWords as $aSearchTerm) { $aNewSearch = $aSearch; - if ($aSearchTerm['country_code']) { - $aNewSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; - } - if ($aSearchTerm['class']) { - $aNewSearch['sClass'] = $aSearchTerm['class']; - $aNewSearch['sType'] = $aSearchTerm['type']; - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; - } + $aNewSearch['sClass'] = $aSearchTerm['class']; + $aNewSearch['sType'] = $aSearchTerm['type']; + $aNewSearches[] = $aNewSearch; + $bSpecialTerms = true; } } $aSearches = $aNewSearches; @@ -1111,14 +1127,10 @@ class Geocode if (CONST_Debug) var_Dump($sSQL); $aValidTokens = array(); - if (sizeof($aTokens)) { - $aDatabaseWords = chksql( - $this->oDB->getAll($sSQL), - "Could not get word tokens." - ); - } else { - $aDatabaseWords = array(); - } + $aDatabaseWords = chksql( + $this->oDB->getAll($sSQL), + "Could not get word tokens." + ); $aPossibleMainWordIDs = array(); $aWordFrequencyScores = array(); foreach ($aDatabaseWords as $aToken) { @@ -1269,8 +1281,8 @@ class Geocode } // No location term? - if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) { - if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { + if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) { + if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; @@ -1290,39 +1302,32 @@ class Geocode if (chksql($this->oDB->getOne($sSQL))) { $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)"; - $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; + if ($aSearch['oNear']) { + $sSQL .= " WHERE ".$aSearch['oNear']->withinSQL('ct.centroid'); + } else { + $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; + } if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; + if ($this->sViewboxCentreSQL) { + $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; + } elseif ($aSearch['oNear']) { + $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('ct.centroid').' ASC'; + } $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - - // If excluded place IDs are given, it is fair to assume that - // there have been results in the small box, so no further - // expansion in that case. - // Also don't expand if bounded results were requested. - if (!sizeof($aPlaceIDs) && !sizeof($this->aExcludePlaceIDs) && !$this->bBoundedSearch) { - $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; - if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " WHERE ST_Contains($this->sViewboxLargeSQL, ct.centroid)"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } - } else { + } else if ($aSearch['oNear']) { $sSQL = "SELECT place_id "; $sSQL .= "FROM placex "; $sSQL .= "WHERE class='".$aSearch['sClass']."' "; $sSQL .= " AND type='".$aSearch['sType']."'"; - $sSQL .= " AND ST_Contains($this->sViewboxSmallSQL, geometry) "; + $sSQL .= " AND ".$aSearch['oNear']->withinSQL('geometry'); $sSQL .= " AND linked_place_id is null"; if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, centroid) ASC"; + $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); @@ -1385,19 +1390,20 @@ class Geocode // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { + /*if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } + }*/ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@ -1570,7 +1576,8 @@ class Geocode } // Fallback to the road (if no housenumber was found) - if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])) { + if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber']) + && ($aSearch['aAddress'] || $aSearch['sCountryCode'])) { $aPlaceIDs = $aRoadPlaceIDs; //set to -1, if no housenumbers were found $searchedHousenumber = -1; @@ -1646,7 +1653,7 @@ class Geocode } elseif ($sPlaceIDs) { $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; } elseif ($sPlaceGeom) { - $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; + $sOrderBySQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; } $sSQL = "select distinct i.place_id".($sOrderBySQL?', i.order_term':'')." from ("; @@ -1682,7 +1689,7 @@ class Geocode $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; } - $sSQL = "SELECT distinct l.place_id".($sOrderBysSQL?','.$sOrderBysSQL:''); + $sSQL = "SELECT distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:''); $sSQL .= " FROM placex as l, placex as f "; $sSQL .= " WHERE f.place_id in ($sPlaceIDs) "; $sSQL .= " AND ST_DWithin(l.geometry, f.centroid, $fRange) "; @@ -1692,7 +1699,7 @@ class Geocode $sSQL .= " AND l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } if ($sCountryCodesSQL) $sSQL .= " AND l.country_code in ($sCountryCodesSQL)"; - if ($sOrderBy) $sSQL .= "ORDER BY ".$OrderBysSQL." ASC"; + if ($sOrderBySQL) $sSQL .= "ORDER BY ".$sOrderBySQL." ASC"; if ($this->iOffset) $sSQL .= " OFFSET $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1735,9 +1742,11 @@ class Geocode // Need to verify passes rank limits before dropping out of the loop (yuk!) // reduces the number of place ids, like a filter // rank_address is 30 for interpolated housenumbers + $sWherePlaceId = 'WHERE place_id in ('; + $sWherePlaceId .= join(',', array_keys($aResultPlaceIDs)).') '; + $sSQL = "SELECT place_id "; - $sSQL .= "FROM placex "; - $sSQL .= "WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") "; + $sSQL .= "FROM placex ".$sWherePlaceId; $sSQL .= " AND ("; $sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) { @@ -1746,20 +1755,23 @@ class Geocode if ($this->aAddressRankList) { $sSQL .= " OR placex.rank_address in (".join(',', $this->aAddressRankList).")"; } - if (CONST_Use_US_Tiger_Data) { - $sSQL .= " ) "; + $sSQL .= " ) UNION "; + $sSQL .= " SELECT place_id FROM location_postcode lp ".$sWherePlaceId; + $sSQL .= " AND (lp.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; + if ($this->aAddressRankList) { + $sSQL .= " OR lp.rank_address in (".join(',', $this->aAddressRankList).")"; + } + $sSQL .= ") "; + if (CONST_Use_US_Tiger_Data && $this->iMaxAddressRank == 30) { + $sSQL .= "UNION "; + $sSQL .= " SELECT place_id "; + $sSQL .= " FROM location_property_tiger ".$sWherePlaceId; + } + if ($this->iMaxAddressRank == 30) { $sSQL .= "UNION "; $sSQL .= " SELECT place_id "; - $sSQL .= " FROM location_property_tiger "; - $sSQL .= " WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") "; - $sSQL .= " AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',', $this->aAddressRankList).")"; + $sSQL .= " FROM location_property_osmline ".$sWherePlaceId; } - $sSQL .= ") UNION "; - $sSQL .= " SELECT place_id "; - $sSQL .= " FROM location_property_osmline "; - $sSQL .= " WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")"; - $sSQL .= " AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); $tempIDs = array();