X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/91129b37f605a06f85bc8aa07eede84fb16ad644..92061778f82345dce7138d79519d2ada5733dccd:/lib/Geocode.php?ds=sidebyside diff --git a/lib/Geocode.php b/lib/Geocode.php index ecd5be1c..15915f94 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -51,10 +51,22 @@ class Geocode protected $sQuery = false; protected $aStructuredQuery = false; + protected $oNormalizer = null; + public function __construct(&$oDB) { $this->oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + private function normTerm($sTerm) + { + if ($this->oNormalizer === null) { + return $sTerm; + } + + return $this->oNormalizer->transliterate($sTerm); } public function setReverseInPlan($bReverse) @@ -292,7 +304,7 @@ class Geocode $aViewbox = $oParams->getStringList('viewboxlbrt'); if ($aViewbox) { if (count($aViewbox) != 4) { - userError("Bad parmater 'viewbox'. Expected 4 coordinates."); + userError("Bad parmater 'viewboxlbrt'. Expected 4 coordinates."); } $this->setViewbox($aViewbox); } else { @@ -301,12 +313,7 @@ class Geocode if (count($aViewbox) != 4) { userError("Bad parmater 'viewbox'. Expected 4 coordinates."); } - $this->setViewBox(array( - $aViewbox[0], - $aViewbox[3], - $aViewbox[2], - $aViewbox[1] - )); + $this->setViewBox($aViewbox); } else { $aRoute = $oParams->getStringList('route'); $fRouteWidth = $oParams->getFloat('routewidth'); @@ -360,7 +367,7 @@ class Geocode $this->aAddressRankList = array(); $this->aStructuredQuery = array(); - $this->sAllowedTypesSQLList = ''; + $this->sAllowedTypesSQLList = False; $this->loadStructuredAddressElement($sAmenity, 'amenity', 26, 30, false); $this->loadStructuredAddressElement($sStreet, 'street', 26, 30, false); @@ -373,7 +380,7 @@ class Geocode if (sizeof($this->aStructuredQuery) > 0) { $this->sQuery = join(', ', $this->aStructuredQuery); if ($this->iMaxAddressRank < 30) { - $sAllowedTypesSQLList = '(\'place\',\'boundary\')'; + $this->sAllowedTypesSQLList = '(\'place\',\'boundary\')'; } } } @@ -410,8 +417,15 @@ class Geocode $sPlaceIDs = join(',', array_keys($aPlaceIDs)); $sImportanceSQL = ''; - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom = ''; + if ($this->sViewboxSmallSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; + } + if ($this->sViewboxLargeSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; + } $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@ -476,6 +490,35 @@ class Geocode if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = lp.parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode lp"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@ -653,7 +696,7 @@ class Geocode return $aSearchResults; } - public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases) + public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery) { /* Calculate all searches using aValidTokens i.e. @@ -666,12 +709,14 @@ class Geocode Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + $iGlobalRank = 0; + + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@ -689,6 +734,8 @@ class Geocode // If the token is valid if (isset($aValidTokens[' '.$sToken])) { + // TODO variable should go into aCurrentSearch + $bHavePostcode = false; foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank']++; @@ -700,49 +747,42 @@ class Geocode $aSearch['iSearchRank'] += 5; } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; + // If it is at the beginning, we can be almost sure that this is the wrong order + // Increase score for all searches. + if ($iToken == 0 && $iPhrase == 0) { + $iGlobalRank++; + } } - } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['oNear'] === false) { - $aSearch['oNear'] = new NearPoint( - $aSearchTerm['lat'], - $aSearchTerm['lon'], - $aSearchTerm['radius'] - ); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } elseif ($sPhraseType == 'postalcode') { + } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + if ($aSearch['sPostcode'] === '' && + isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']); if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; + $bHavePostcode = true; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { + $aSearch['sPostcode'] = $aSearchTerm['word']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '') { + if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { $aSearch['sHouseNumber'] = $sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; - // also housenumbers should appear in the first or second phrase - if ($iPhrase > 1) $aSearch['iSearchRank'] += 1; + // also must not appear in the middle of the address + if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; /* // Fall back to not searching for this item (better than nothing) @@ -752,13 +792,19 @@ class Geocode */ } } elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null) { - if ($aSearch['sClass'] === '') { - $aSearch['sOperator'] = $aSearchTerm['operator']; + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($aSearch['sOperator'] === '' + && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) { $aSearch['sClass'] = $aSearchTerm['class']; $aSearch['sType'] = $aSearchTerm['type']; - if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name'; - else $aSearch['sOperator'] = 'near'; // near = in for the moment - if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1; + if ($aSearchTerm['operator'] == '') { + $aSearch['sOperator'] = sizeof($aSearch['aName']) ? 'name' : 'near'; + $aSearch['iSearchRank'] += 2; + } else { + $aSearch['sOperator'] = 'near'; // near = in for the moment + } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } @@ -813,7 +859,8 @@ class Geocode } } - if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) { + if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch']) + && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank'] += 1; if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; @@ -874,6 +921,20 @@ class Geocode //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); } + + // Revisit searches, giving penalty to unlikely combinations + $aGroupedSearches = array(); + foreach ($aSearches as $aSearch) { + if (!$aSearch['aName']) { + if ($aSearch['sHouseNumber']) { + continue; + } + } + $aSearch['iSearchRank'] += $iGlobalRank; + $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch; + } + ksort($aGroupedSearches); + return $aGroupedSearches; } @@ -913,6 +974,7 @@ class Geocode { if (!$this->sQuery && !$this->aStructuredQuery) return array(); + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@ -972,21 +1034,31 @@ class Geocode 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', + 'sPostcode' => '', 'oNear' => $oNearPoint ) ); // Any 'special' terms in the search? $bSpecialTerms = false; - preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - $aSpecialTerms = array(); + preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); foreach ($aSpecialTermsRaw as $aSpecialTerm) { $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - $aSpecialTerms[strtolower($aSpecialTerm[1])] = $aSpecialTerm[2]; + if (!$bSpecialTerms) { + $aNewSearches = array(); + foreach ($aSearches as $aSearch) { + $aNewSearch = $aSearch; + $aNewSearch['sClass'] = $aSpecialTerm[1]; + $aNewSearch['sType'] = $aSpecialTerm[2]; + $aNewSearches[] = $aNewSearch; + } + + $aSearches = $aNewSearches; + $bSpecialTerms = true; + } } preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER); - $aSpecialTerms = array(); if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) { $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']); unset($this->aStructuredQuery['amenity']); @@ -994,32 +1066,28 @@ class Geocode foreach ($aSpecialTermsRaw as $aSpecialTerm) { $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery); - $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string")); + if ($bSpecialTerms) { + continue; + } + + $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".pg_escape_string($aSpecialTerm[1])."') AS string")); $sSQL = 'SELECT * '; $sSQL .= 'FROM ( '; $sSQL .= ' SELECT word_id, word_token, word, class, type, country_code, operator'; $sSQL .= ' FROM word '; $sSQL .= ' WHERE word_token in (\' '.$sToken.'\')'; $sSQL .= ') AS x '; - $sSQL .= ' WHERE (class is not null AND class not in (\'place\')) '; - $sSQL .= ' OR country_code is not null'; + $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))'; if (CONST_Debug) var_Dump($sSQL); $aSearchWords = chksql($this->oDB->getAll($sSQL)); $aNewSearches = array(); foreach ($aSearches as $aSearch) { foreach ($aSearchWords as $aSearchTerm) { $aNewSearch = $aSearch; - if ($aSearchTerm['country_code']) { - $aNewSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']); - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; - } - if ($aSearchTerm['class']) { - $aNewSearch['sClass'] = $aSearchTerm['class']; - $aNewSearch['sType'] = $aSearchTerm['type']; - $aNewSearches[] = $aNewSearch; - $bSpecialTerms = true; - } + $aNewSearch['sClass'] = $aSearchTerm['class']; + $aNewSearch['sType'] = $aSearchTerm['type']; + $aNewSearches[] = $aNewSearch; + $bSpecialTerms = true; } } $aSearches = $aNewSearches; @@ -1068,14 +1136,10 @@ class Geocode if (CONST_Debug) var_Dump($sSQL); $aValidTokens = array(); - if (sizeof($aTokens)) { - $aDatabaseWords = chksql( - $this->oDB->getAll($sSQL), - "Could not get word tokens." - ); - } else { - $aDatabaseWords = array(); - } + $aDatabaseWords = chksql( + $this->oDB->getAll($sSQL), + "Could not get word tokens." + ); $aPossibleMainWordIDs = array(); $aWordFrequencyScores = array(); foreach ($aDatabaseWords as $aToken) { @@ -1096,21 +1160,9 @@ class Geocode } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { @@ -1139,7 +1191,7 @@ class Geocode // array with: placeid => -1 | tiger-housenumber $aResultPlaceIDs = array(); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@ -1151,7 +1203,7 @@ class Geocode $aFinalPhrase = end($aPhrases); $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@ -1177,7 +1229,6 @@ class Geocode ksort($aGroupedSearches); } - if (CONST_Debug) var_Dump($aGroupedSearches); if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { @@ -1234,13 +1285,16 @@ class Geocode if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); + if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + continue; + } + // No location term? - if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) { - if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { + if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) { + if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -1257,39 +1311,32 @@ class Geocode if (chksql($this->oDB->getOne($sSQL))) { $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; if ($sCountryCodesSQL) $sSQL .= " JOIN placex USING (place_id)"; - $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; + if ($aSearch['oNear']) { + $sSQL .= " WHERE ".$aSearch['oNear']->withinSQL('ct.centroid'); + } else { + $sSQL .= " WHERE st_contains($this->sViewboxSmallSQL, ct.centroid)"; + } if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if (sizeof($this->aExcludePlaceIDs)) { $sSQL .= " AND place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; + if ($this->sViewboxCentreSQL) { + $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; + } elseif ($aSearch['oNear']) { + $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('ct.centroid').' ASC'; + } $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - - // If excluded place IDs are given, it is fair to assume that - // there have been results in the small box, so no further - // expansion in that case. - // Also don't expand if bounded results were requested. - if (!sizeof($aPlaceIDs) && !sizeof($this->aExcludePlaceIDs) && !$this->bBoundedSearch) { - $sSQL = "SELECT place_id FROM place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct"; - if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)"; - $sSQL .= " WHERE ST_Contains($this->sViewboxLargeSQL, ct.centroid)"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, ct.centroid) ASC"; - $sSQL .= " LIMIT $this->iLimit"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); - } - } else { + } else if ($aSearch['oNear']) { $sSQL = "SELECT place_id "; $sSQL .= "FROM placex "; $sSQL .= "WHERE class='".$aSearch['sClass']."' "; $sSQL .= " AND type='".$aSearch['sType']."'"; - $sSQL .= " AND ST_Contains($this->sViewboxSmallSQL, geometry) "; + $sSQL .= " AND ".$aSearch['oNear']->withinSQL('geometry'); $sSQL .= " AND linked_place_id is null"; if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; - if ($this->sViewboxCentreSQL) $sSQL .= " ORDER BY ST_Distance($this->sViewboxCentreSQL, centroid) ASC"; + $sSQL .= " ORDER BY ".$aSearch['oNear']->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $this->iLimit"; if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); @@ -1299,6 +1346,24 @@ class Geocode // If a coordinate is given, the search must either // be for a name or a special search. Ignore everythin else. $aPlaceIDs = array(); + } elseif ($aSearch['sOperator'] == 'postcode') { + $sSQL = "SELECT p.place_id FROM location_postcode p "; + if (sizeof($aSearch['aAddress'])) { + $sSQL .= ", search_name s "; + $sSQL .= "WHERE s.place_id = p.parent_place_id "; + $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND "; + } else { + $sSQL .= " WHERE "; + } + $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; + if ($aSearch['sCountryCode']) { + $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; + } elseif ($sCountryCodesSQL) { + $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; + } + $sSQL .= " LIMIT $this->iLimit"; + if (CONST_Debug) var_dump($sSQL); + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); } else { $aPlaceIDs = array(); @@ -1334,26 +1399,25 @@ class Geocode // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - /*if (sizeof($aSearch['aAddressNonSearch'])) { + if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - }*/ + } } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; if ($aSearch['sHouseNumber']) { $aTerms[] = "address_rank between 16 and 27"; - } else { + } elseif (!$aSearch['sClass'] || $aSearch['sOperator'] == 'name') { if ($this->iMinAddressRank > 0) { $aTerms[] = "address_rank >= ".$this->iMinAddressRank; } @@ -1365,6 +1429,12 @@ class Geocode $aTerms[] = $aSearch['oNear']->withinSQL('centroid'); $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); + } elseif ($aSearch['sPostcode']) { + if (!sizeof($aSearch['aAddress'])) { + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + } else { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; + } } if (sizeof($this->aExcludePlaceIDs)) { $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")"; @@ -1514,7 +1584,8 @@ class Geocode } // Fallback to the road (if no housenumber was found) - if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])) { + if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber']) + && ($aSearch['aAddress'] || $aSearch['sCountryCode'])) { $aPlaceIDs = $aRoadPlaceIDs; //set to -1, if no housenumbers were found $searchedHousenumber = -1; @@ -1543,8 +1614,9 @@ class Geocode } if (!$aSearch['sOperator'] || $aSearch['sOperator'] == 'near') { // & in + $sClassTable = 'place_classtype_'.$aSearch['sClass'].'_'.$aSearch['sType']; $sSQL = "SELECT count(*) FROM pg_tables "; - $sSQL .= "WHERE tablename = 'place_classtype_".$aSearch['sClass']."_".$aSearch['sType']."'"; + $sSQL .= "WHERE tablename = '$sClassTable'"; $bCacheTable = chksql($this->oDB->getOne($sSQL)); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; @@ -1589,10 +1661,11 @@ class Geocode } elseif ($sPlaceIDs) { $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; } elseif ($sPlaceGeom) { - $sOrderBysSQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; + $sOrderBySQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; } - $sSQL = "select distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:'')." from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." as l"; + $sSQL = "select distinct i.place_id".($sOrderBySQL?', i.order_term':'')." from ("; + $sSQL .= "select l.place_id".($sOrderBySQL?','.$sOrderBySQL.' as order_term':'')." from ".$sClassTable." as l"; if ($sCountryCodesSQL) $sSQL .= " join placex as lp using (place_id)"; if ($sPlaceIDs) { $sSQL .= ",placex as f where "; @@ -1606,7 +1679,8 @@ class Geocode $sSQL .= " and l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } if ($sCountryCodesSQL) $sSQL .= " and lp.country_code in ($sCountryCodesSQL)"; - if ($sOrderBySQL) $sSQL .= "order by ".$sOrderBySQL." asc"; + $sSQL .= 'limit 300) i '; + if ($sOrderBySQL) $sSQL .= "order by order_term asc"; if ($this->iOffset) $sSQL .= " offset $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1623,7 +1697,7 @@ class Geocode $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; } - $sSQL = "SELECT distinct l.place_id".($sOrderBysSQL?','.$sOrderBysSQL:''); + $sSQL = "SELECT distinct l.place_id".($sOrderBySQL?','.$sOrderBySQL:''); $sSQL .= " FROM placex as l, placex as f "; $sSQL .= " WHERE f.place_id in ($sPlaceIDs) "; $sSQL .= " AND ST_DWithin(l.geometry, f.centroid, $fRange) "; @@ -1633,7 +1707,7 @@ class Geocode $sSQL .= " AND l.place_id not in (".join(',', $this->aExcludePlaceIDs).")"; } if ($sCountryCodesSQL) $sSQL .= " AND l.country_code in ($sCountryCodesSQL)"; - if ($sOrderBy) $sSQL .= "ORDER BY ".$OrderBysSQL." ASC"; + if ($sOrderBySQL) $sSQL .= "ORDER BY ".$sOrderBySQL." ASC"; if ($this->iOffset) $sSQL .= " OFFSET $this->iOffset"; $sSQL .= " limit $this->iLimit"; if (CONST_Debug) var_dump($sSQL); @@ -1650,6 +1724,21 @@ class Geocode var_Dump($aPlaceIDs); } + if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; + $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aPlaceIDs = $aFilteredPlaceIDs; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_Dump($aPlaceIDs); + } + } + } + foreach ($aPlaceIDs as $iPlaceID) { // array for placeID => -1 | Tiger housenumber $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; @@ -1661,9 +1750,11 @@ class Geocode // Need to verify passes rank limits before dropping out of the loop (yuk!) // reduces the number of place ids, like a filter // rank_address is 30 for interpolated housenumbers + $sWherePlaceId = 'WHERE place_id in ('; + $sWherePlaceId .= join(',', array_keys($aResultPlaceIDs)).') '; + $sSQL = "SELECT place_id "; - $sSQL .= "FROM placex "; - $sSQL .= "WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") "; + $sSQL .= "FROM placex ".$sWherePlaceId; $sSQL .= " AND ("; $sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) { @@ -1672,20 +1763,23 @@ class Geocode if ($this->aAddressRankList) { $sSQL .= " OR placex.rank_address in (".join(',', $this->aAddressRankList).")"; } - if (CONST_Use_US_Tiger_Data) { - $sSQL .= " ) "; + $sSQL .= " ) UNION "; + $sSQL .= " SELECT place_id FROM location_postcode lp ".$sWherePlaceId; + $sSQL .= " AND (lp.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; + if ($this->aAddressRankList) { + $sSQL .= " OR lp.rank_address in (".join(',', $this->aAddressRankList).")"; + } + $sSQL .= ") "; + if (CONST_Use_US_Tiger_Data && $this->iMaxAddressRank == 30) { + $sSQL .= "UNION "; + $sSQL .= " SELECT place_id "; + $sSQL .= " FROM location_property_tiger ".$sWherePlaceId; + } + if ($this->iMaxAddressRank == 30) { $sSQL .= "UNION "; $sSQL .= " SELECT place_id "; - $sSQL .= " FROM location_property_tiger "; - $sSQL .= " WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") "; - $sSQL .= " AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',', $this->aAddressRankList).")"; + $sSQL .= " FROM location_property_osmline ".$sWherePlaceId; } - $sSQL .= ") UNION "; - $sSQL .= " SELECT place_id "; - $sSQL .= " FROM location_property_osmline "; - $sSQL .= " WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")"; - $sSQL .= " AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); $tempIDs = array();