From: Sarah Hoffmann Date: Sat, 16 Sep 2017 14:14:01 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~373 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/115c909fdf65966774940ef957cf0bd63ba844f2?hp=-c Merge remote-tracking branch 'upstream/master' --- 115c909fdf65966774940ef957cf0bd63ba844f2 diff --combined lib/Geocode.php index b0f0c764,b8933704..eb6152aa --- a/lib/Geocode.php +++ b/lib/Geocode.php @@@ -25,7 -25,7 +25,7 @@@ class Geocod protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@@ -51,10 -51,22 +51,22 @@@ protected $sQuery = false; protected $aStructuredQuery = false; + protected $oNormalizer = null; + public function __construct(&$oDB) { $this->oDB =& $oDB; + $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); + } + + private function normTerm($sTerm) + { + if ($this->oNormalizer === null) { + return null; + } + + return $this->oNormalizer->transliterate($sTerm); } public function setReverseInPlan($bReverse) @@@ -410,8 -422,15 +422,15 @@@ $sPlaceIDs = join(',', array_keys($aPlaceIDs)); $sImportanceSQL = ''; - if ($this->sViewboxSmallSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; - if ($this->sViewboxLargeSQL) $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom = ''; + if ($this->sViewboxSmallSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxSmallSQL, geometry) THEN 1 ELSE 0.75 END * "; + } + if ($this->sViewboxLargeSQL) { + $sImportanceSQL .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sImportanceSQLGeom .= " CASE WHEN ST_Contains($this->sViewboxLargeSQL, geometry) THEN 1 ELSE 0.75 END * "; + } $sSQL = "SELECT "; $sSQL .= " osm_type,"; @@@ -476,6 -495,35 +495,35 @@@ if ($this->bIncludeNameDetails) $sSQL .= "name, "; $sSQL .= " extratags->'place' "; + // postcode table + $sSQL .= "UNION "; + $sSQL .= "SELECT"; + $sSQL .= " 'P' as osm_type,"; + $sSQL .= " (SELECT osm_id from placex p WHERE p.place_id = lp.parent_place_id) as osm_id,"; + $sSQL .= " 'place' as class, 'postcode' as type,"; + $sSQL .= " null as admin_level, rank_search, rank_address,"; + $sSQL .= " place_id, parent_place_id, country_code,"; + $sSQL .= " get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) AS langaddress,"; + $sSQL .= " postcode as placename,"; + $sSQL .= " postcode as ref,"; + if ($this->bIncludeExtraTags) $sSQL .= "null AS extra,"; + if ($this->bIncludeNameDetails) $sSQL .= "null AS names,"; + $sSQL .= " ST_x(st_centroid(geometry)) AS lon, ST_y(st_centroid(geometry)) AS lat,"; + $sSQL .= $sImportanceSQLGeom."(0.75-(rank_search::float/40)) AS importance, "; + $sSQL .= " ("; + $sSQL .= " SELECT max(p.importance*(p.rank_address+2))"; + $sSQL .= " FROM "; + $sSQL .= " place_addressline s, "; + $sSQL .= " placex p"; + $sSQL .= " WHERE s.place_id = lp.parent_place_id"; + $sSQL .= " AND p.place_id = s.address_place_id "; + $sSQL .= " AND s.isaddress"; + $sSQL .= " AND p.importance is not null"; + $sSQL .= " ) AS addressimportance, "; + $sSQL .= " null AS extra_place "; + $sSQL .= "FROM location_postcode lp"; + $sSQL .= " WHERE place_id in ($sPlaceIDs) "; + if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { // only Tiger housenumbers and interpolation lines need to be interpolated, because they are saved as lines // with start- and endnumber, the common osm housenumbers are usually saved as points @@@ -666,12 -714,12 +714,12 @@@ Score how good the search is so they can be ordered */ - foreach ($aPhrases as $iPhrase => $sPhrase) { + foreach ($aPhrases as $iPhrase => $aPhrase) { $aNewPhraseSearches = array(); if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase]; else $sPhraseType = ''; - foreach ($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset) { + foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) { // Too many permutations - too expensive if ($iWordSet > 120) break; @@@ -701,42 -749,29 +749,29 @@@ } if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - } elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null) { - if ($aSearch['oNear'] === false) { - $aSearch['oNear'] = new NearPoint( - $aSearchTerm['lat'], - $aSearchTerm['lon'], - $aSearchTerm['radius'] - ); - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; - } - } elseif ($sPhraseType == 'postalcode') { + } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) { // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both - if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - // If we already have a name try putting the postcode first - if (sizeof($aSearch['aName'])) { + if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' && + isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) { $aNewSearch = $aSearch; + $aNewSearch['sOperator'] = 'postcode'; $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']); - $aNewSearch['aName'] = array(); - $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch; } - if (sizeof($aSearch['aName'])) { - if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strpos($sToken, ' ') !== false)) { - $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - } else { - $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - $aSearch['iSearchRank'] += 1000; // skip; - } - } else { - $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - //$aSearch['iNamePhrase'] = $iPhrase; + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($aSearch['sOperator'] !== 'postcode' && ($sPhraseType == 'postalcode' || sizeof($aSearch['aName']))) { + $aSearch['sPostcode'] = $aSearchTerm['word']; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house') { - if ($aSearch['sHouseNumber'] === '') { + if ($aSearch['sHouseNumber'] === '' && $aSearch['sOperator'] !== 'postcode') { $aSearch['sHouseNumber'] = $sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty @@@ -919,13 -954,7 +954,7 @@@ { if (!$this->sQuery && !$this->aStructuredQuery) return array(); - $oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules); - if ($oNormalizer !== null) { - $sNormQuery = $oNormalizer->transliterate($this->sQuery); - } else { - $sNormQuery = null; - } - + $sNormQuery = $this->normTerm($this->sQuery); $sLanguagePrefArraySQL = "ARRAY[".join(',', array_map("getDBQuoted", $this->aLangPrefOrder))."]"; $sCountryCodesSQL = false; if ($this->aCountryCodes) { @@@ -985,6 -1014,7 +1014,7 @@@ 'sClass' => '', 'sType' => '', 'sHouseNumber' => '', + 'sPostcode' => '', 'oNear' => $oNearPoint ) ); @@@ -1109,21 -1139,9 +1139,9 @@@ } if (CONST_Debug) var_Dump($aPhrases, $aValidTokens); - // Try and calculate GB postcodes we might be missing + // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code foreach ($aTokens as $sToken) { - // Source of gb postcodes is now definitive - always use - if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { - if (substr($aData[1], -2, 1) != ' ') { - $aData[0] = substr($aData[0], 0, strlen($aData[1])-1).' '.substr($aData[0], strlen($aData[1])-1); - $aData[1] = substr($aData[1], 0, -1).' '.substr($aData[1], -1, 1); - } - $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $this->oDB); - if ($aGBPostcodeLocation) { - $aValidTokens[$sToken] = $aGBPostcodeLocation; - } - } elseif (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - // US ZIP+4 codes - if there is no token, - // merge in the 5-digit ZIP code + if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { if (isset($aValidTokens[$aData[1]])) { foreach ($aValidTokens[$aData[1]] as $aToken) { if (!$aToken['class']) { @@@ -1190,7 -1208,6 +1208,6 @@@ ksort($aGroupedSearches); } - if (CONST_Debug) var_Dump($aGroupedSearches); if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) { $aCopyGroupedSearches = $aGroupedSearches; foreach ($aCopyGroupedSearches as $iGroup => $aSearches) { @@@ -1247,13 -1264,16 +1264,16 @@@ if (CONST_Debug) echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens); + if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) { + continue; + } + // No location term? if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress']) && !$aSearch['oNear']) { if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber']) { // Just looking for a country by code - look it up if (4 >= $this->iMinAddressRank && 4 <= $this->iMaxAddressRank) { $sSQL = "SELECT place_id FROM placex WHERE country_code='".$aSearch['sCountryCode']."' AND rank_search = 4"; - if ($sCountryCodesSQL) $sSQL .= " AND country_code in ($sCountryCodesSQL)"; if ($bBoundingBoxSearch) $sSQL .= " AND _st_intersects($this->sViewboxSmallSQL, geometry)"; $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@@ -1312,6 -1332,24 +1332,24 @@@ // If a coordinate is given, the search must either // be for a name or a special search. Ignore everythin else. $aPlaceIDs = array(); + } elseif ($aSearch['sOperator'] == 'postcode') { + $sSQL = "SELECT p.place_id FROM location_postcode p "; + if (sizeof($aSearch['aAddress'])) { + $sSQL .= ", search_name s "; + $sSQL .= "WHERE s.place_id = p.parent_place_id "; + $sSQL .= "AND array_cat(s.nameaddress_vector, s.name_vector) @> ARRAY[".join($aSearch['aAddress'], ",")."] AND "; + } else { + $sSQL .= " WHERE "; + } + $sSQL .= "p.postcode = '".pg_escape_string(reset($aSearch['aName']))."'"; + if ($aSearch['sCountryCode']) { + $sSQL .= " AND p.country_code = '".$aSearch['sCountryCode']."'"; + } elseif ($sCountryCodesSQL) { + $sSQL .= " AND p.country_code in ($sCountryCodesSQL)"; + } + $sSQL .= " LIMIT $this->iLimit"; + if (CONST_Debug) var_dump($sSQL); + $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); } else { $aPlaceIDs = array(); @@@ -1347,20 -1385,19 +1385,20 @@@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address if (CONST_Search_NameOnlySearchFrequencyThreshold && sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold ) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) { + /*if (sizeof($aSearch['aAddressNonSearch'])) { $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]"; - } + }*/ } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@@ -1378,6 -1415,12 +1416,12 @@@ $aTerms[] = $aSearch['oNear']->withinSQL('centroid'); $aOrder[] = $aSearch['oNear']->distanceSQL('centroid'); + } elseif ($aSearch['sPostcode']) { + if (!sizeof($aSearch['aAddress'])) { + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + } else { + $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$aSearch['sPostcode']."')"; + } } if (sizeof($this->aExcludePlaceIDs)) { $aTerms[] = "place_id not in (".join(',', $this->aExcludePlaceIDs).")"; @@@ -1666,6 -1709,21 +1710,21 @@@ var_Dump($aPlaceIDs); } + if (sizeof($aPlaceIDs) && $aSearch['sPostcode']) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; + $sSQL .= " AND postcode = '".pg_escape_string($aSearch['sPostcode'])."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aPlaceIDs = $aFilteredPlaceIDs; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_Dump($aPlaceIDs); + } + } + } + foreach ($aPlaceIDs as $iPlaceID) { // array for placeID => -1 | Tiger housenumber $aResultPlaceIDs[$iPlaceID] = $searchedHousenumber; diff --combined lib/lib.php index c6eadc6c,8f353a4d..28b44c50 --- a/lib/lib.php +++ b/lib/lib.php @@@ -116,25 -116,6 +116,6 @@@ function getTokensFromSets($aSets } - function gbPostcodeCalculate($sPostcode, $sPostcodeSector, $sPostcodeEnd, &$oDB) - { - // Try an exact match on the gb_postcode table - $sSQL = 'select \'AA\', ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where postcode = \''.$sPostcode.'\''; - $aNearPostcodes = chksql($oDB->getAll($sSQL)); - - if (sizeof($aNearPostcodes)) { - $aPostcodes = array(); - foreach ($aNearPostcodes as $aPostcode) { - $aPostcodes[] = array('lat' => $aPostcode['lat'], 'lon' => $aPostcode['lon'], 'radius' => 0.005); - } - - return $aPostcodes; - } - - return false; - } - - function getClassTypes() { return array( @@@ -516,7 -497,7 +497,7 @@@ function _debugDumpGroupedSearches($aDa echo ""; echo ""; echo ""; - echo ""; + echo ""; echo ""; foreach ($aData as $iRank => $aRankedSet) { foreach ($aRankedSet as $aRow) { @@@ -561,6 -542,7 +542,7 @@@ echo ""; echo ""; + echo ""; echo ""; echo ""; @@@ -631,10 -613,10 +613,10 @@@ function geometryText2Points($geometry_ // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); // - } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { +/* } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); - // + */ } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#', $geometry_as_text, $aMatch)) { // $aPolyPoints = createPointsAroundCenter($aMatch[1], $aMatch[2], $fRadius); diff --combined sql/functions.sql index e00c97a2,a9bca542..820bdbf8 --- a/sql/functions.sql +++ b/sql/functions.sql @@@ -83,6 -83,26 +83,26 @@@ END $$ LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER + AS $$ + DECLARE + lookup_token TEXT; + lookup_word TEXT; + return_word_id INTEGER; + BEGIN + lookup_word := upper(trim(postcode)); + lookup_token := ' ' || make_standard_name(lookup_word); + SELECT min(word_id) FROM word WHERE word_token = lookup_token and class='place' and type='postcode' into return_word_id; + IF return_word_id IS NULL THEN + return_word_id := nextval('seq_word'); + INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word, 'place', 'postcode', null, 0); + END IF; + RETURN return_word_id; + END; + $$ + LANGUAGE plpgsql; + CREATE OR REPLACE FUNCTION getorcreate_country(lookup_word TEXT, lookup_country_code varchar(2)) RETURNS INTEGER AS $$ @@@ -236,6 -256,99 +256,99 @@@ END $$ LANGUAGE plpgsql IMMUTABLE; + + CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT, + OUT rank_search SMALLINT, OUT rank_address SMALLINT) + AS $$ + DECLARE + part TEXT; + BEGIN + rank_search := 30; + rank_address := 30; + postcode := upper(postcode); + + IF country_code = 'gb' THEN + IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN + rank_search := 25; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN + rank_search := 23; + rank_address := 5; + ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN + rank_search := 21; + rank_address := 5; + END IF; + + ELSEIF country_code = 'sg' THEN + IF postcode ~ '^([0-9]{6})$' THEN + rank_search := 25; + rank_address := 11; + END IF; + + ELSEIF country_code = 'de' THEN + IF postcode ~ '^([0-9]{5})$' THEN + rank_search := 21; + rank_address := 11; + END IF; + + ELSE + -- Guess at the postcode format and coverage (!) + IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local + rank_search := 21; + rank_address := 11; + ELSE + -- Does it look splitable into and area and local code? + part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); + + IF part IS NOT NULL THEN + rank_search := 25; + rank_address := 11; + ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN + rank_search := 21; + rank_address := 11; + END IF; + END IF; + END IF; + + END; + $$ + LANGUAGE plpgsql IMMUTABLE; + + -- Find the nearest artificial postcode for the given geometry. + -- TODO For areas there should not be more than two inside the geometry. + CREATE OR REPLACE FUNCTION get_nearest_postcode(country VARCHAR(2), geom GEOMETRY) RETURNS TEXT + AS $$ + DECLARE + outcode TEXT; + cnt INTEGER; + BEGIN + -- If the geometry is an area then only one postcode must be within + -- that area, otherwise consider the area as not having a postcode. + IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN + SELECT min(postcode), count(*) FROM + (SELECT postcode FROM location_postcode + WHERE ST_Contains(geom, location_postcode.geometry) LIMIT 2) sub + INTO outcode, cnt; + + IF cnt = 1 THEN + RETURN outcode; + ELSE + RETURN null; + END IF; + END IF; + + SELECT postcode FROM location_postcode + WHERE ST_DWithin(geom, location_postcode.geometry, 0.05) + AND location_postcode.country_code = country + ORDER BY ST_Distance(geom, location_postcode.geometry) LIMIT 1 + INTO outcode; + + RETURN outcode; + END; + $$ + LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION create_country(src HSTORE, lookup_country_code varchar(2)) RETURNS VOID AS $$ DECLARE @@@ -515,36 -628,38 +628,38 @@@ CREATE OR REPLACE FUNCTION add_location keywords INTEGER[], rank_search INTEGER, rank_address INTEGER, + in_postcode TEXT, geometry GEOMETRY ) RETURNS BOOLEAN AS $$ DECLARE locationid INTEGER; - isarea BOOLEAN; centroid GEOMETRY; diameter FLOAT; x BOOLEAN; splitGeom RECORD; secgeo GEOMETRY; + postcode TEXT; BEGIN IF rank_search > 25 THEN RAISE EXCEPTION 'Adding location with rank > 25 (% rank %)', place_id, rank_search; END IF; - -- RAISE WARNING 'Adding location with rank > 25 (% rank %)', place_id, rank_search; - x := deleteLocationArea(partition, place_id, rank_search); - isarea := false; - IF (ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(geometry)) THEN + -- add postcode only if it contains a single entry, i.e. ignore postcode lists + postcode := NULL; + IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN + postcode := upper(trim (in_postcode)); + END IF; - isArea := true; + IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon') THEN centroid := ST_Centroid(geometry); FOR secgeo IN select split_geometry(geometry) AS geom LOOP - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, centroid, secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, false, postcode, centroid, secgeo); END LOOP; ELSE @@@ -569,7 -684,7 +684,7 @@@ -- RAISE WARNING 'adding % diameter %', place_id, diameter; secgeo := ST_Buffer(geometry, diameter); - x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, ST_Centroid(geometry), secgeo); + x := insertLocationAreaLarge(partition, place_id, country_code, keywords, rank_search, rank_address, true, postcode, ST_Centroid(geometry), secgeo); END IF; @@@ -722,54 -837,10 +837,10 @@@ BEGI RETURN NULL; END IF; - NEW.postcode := NEW.address->'postcode'; - NEW.name := hstore('ref', NEW.postcode); - - IF NEW.country_code = 'gb' THEN - - IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN - NEW.rank_search := 25; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN - NEW.rank_search := 23; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN - NEW.rank_search := 21; - NEW.rank_address := 5; - END IF; - - ELSEIF NEW.country_code = 'sg' THEN + NEW.name := hstore('ref', NEW.address->'postcode'); - IF NEW.postcode ~ '^([0-9]{6})$' THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - END IF; - - ELSEIF NEW.country_code = 'de' THEN - - IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - - ELSE - -- Guess at the postcode format and coverage (!) - IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.rank_search := 21; - NEW.rank_address := 11; - ELSE - -- Does it look splitable into and area and local code? - postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); - - IF postcode IS NOT NULL THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - END IF; - END IF; + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode') + INTO NEW.rank_search, NEW.rank_address; ELSEIF NEW.class = 'place' THEN IF NEW.type in ('continent') THEN @@@ -988,8 -1059,8 +1059,8 @@@ DECLAR linegeo GEOMETRY; splitline GEOMETRY; sectiongeo GEOMETRY; + interpol_postcode TEXT; postcode TEXT; - seg_postcode TEXT; BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN @@@ -1008,9 -1079,11 +1079,11 @@@ NEW.address->'place', NEW.partition, place_centroid, NEW.linegeo); - - IF NEW.address is not NULL and NEW.address ? 'postcode' THEN - NEW.postcode = NEW.address->'postcode'; + IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + interpol_postcode := NEW.address->'postcode'; + housenum := getorcreate_postcode_id(NEW.address->'postcode'); + ELSE + interpol_postcode := NULL; END IF; -- if the line was newly inserted, split the line as necessary @@@ -1023,7 -1096,6 +1096,6 @@@ linegeo := NEW.linegeo; startnumber := NULL; - postcode := NEW.postcode; FOR nodeidpos in 1..array_upper(waynodes, 1) LOOP @@@ -1056,15 -1128,24 +1128,24 @@@ sectiongeo := ST_Reverse(sectiongeo); END IF; - seg_postcode := coalesce(postcode, - prevnode.address->'postcode', - nextnode.address->'postcode'); + -- determine postcode + postcode := coalesce(interpol_postcode, + prevnode.address->'postcode', + nextnode.address->'postcode', + postcode); + + IF postcode is NULL THEN + SELECT placex.postcode FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + END IF; + IF postcode is NULL THEN + postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); + END IF; IF NEW.startnumber IS NULL THEN NEW.startnumber := startnumber; NEW.endnumber := endnumber; NEW.linegeo := sectiongeo; - NEW.postcode := seg_postcode; + NEW.postcode := upper(trim(postcode)); ELSE insert into location_property_osmline (linegeo, partition, osm_id, parent_place_id, @@@ -1073,7 -1154,7 +1154,7 @@@ geometry_sector, indexed_status) values (sectiongeo, NEW.partition, NEW.osm_id, NEW.parent_place_id, startnumber, endnumber, NEW.interpolationtype, - NEW.address, seg_postcode, + NEW.address, postcode, NEW.country_code, NEW.geometry_sector, 0); END IF; END IF; @@@ -1097,7 -1178,42 +1178,42 @@@ END $$ LANGUAGE plpgsql; + -- Trigger for updates of location_postcode + -- + -- Computes the parent object the postcode most likely refers to. + -- This will be the place that determines the address displayed when + -- searching for this postcode. + CREATE OR REPLACE FUNCTION postcode_update() RETURNS + TRIGGER + AS $$ + DECLARE + partition SMALLINT; + location RECORD; + BEGIN + IF NEW.indexed_status != 0 OR OLD.indexed_status = 0 THEN + RETURN NEW; + END IF; + + NEW.indexed_date = now(); + partition := get_partition(NEW.country_code); + + SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode) + INTO NEW.rank_search, NEW.rank_address; + + NEW.parent_place_id = 0; + FOR location IN + SELECT place_id + FROM getNearFeatures(partition, NEW.geometry, NEW.rank_search, '{}'::int[]) + WHERE NOT isguess ORDER BY rank_address DESC LIMIT 1 + LOOP + NEW.parent_place_id = location.place_id; + END LOOP; + + RETURN NEW; + END; + $$ + LANGUAGE plpgsql; CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER @@@ -1161,11 -1277,6 +1277,6 @@@ BEGI --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; - IF NEW.class = 'place' AND NEW.type = 'postcodearea' THEN - -- Silently do nothing - RETURN NEW; - END IF; - NEW.indexed_date = now(); result := deleteSearchName(NEW.partition, NEW.place_id); @@@ -1202,13 -1313,16 +1313,16 @@@ addr_street = NEW.address->'street'; addr_place = NEW.address->'place'; - NEW.postcode = NEW.address->'postcode'; + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + i := getorcreate_postcode_id(NEW.address->'postcode'); + END IF; END IF; -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate place_centroid := ST_PointOnSurface(NEW.geometry); NEW.centroid := null; + NEW.postcode := null; --DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(place_centroid); -- recalculate country and partition @@@ -1445,25 -1559,16 +1559,16 @@@ NEW.country_code := location.country_code; --DEBUG: RAISE WARNING 'Got parent details from search name'; - -- Merge the postcode into the parent's address if necessary - IF NEW.postcode IS NOT NULL THEN - --DEBUG: RAISE WARNING 'Merging postcode into parent'; - isin_tokens := '{}'::int[]; - address_street_word_id := getorcreate_word_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - address_street_word_id := getorcreate_name_id(make_standard_name(NEW.postcode)); - IF address_street_word_id is not null - and not ARRAY[address_street_word_id] <@ location.nameaddress_vector THEN - isin_tokens := isin_tokens || address_street_word_id; - END IF; - IF isin_tokens != '{}'::int[] THEN - UPDATE search_name - SET nameaddress_vector = search_name.nameaddress_vector || isin_tokens - WHERE place_id = NEW.parent_place_id; - END IF; + -- determine postcode + IF NEW.rank_search > 4 THEN + IF NEW.address is not null AND NEW.address ? 'postcode' THEN + NEW.postcode = upper(trim(NEW.address->'postcode')); + ELSE + SELECT postcode FROM placex WHERE place_id = NEW.parent_place_id INTO NEW.postcode; + END IF; + IF NEW.postcode is null THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid); + END IF; END IF; -- If there is no name it isn't searchable, don't bother to create a search record @@@ -1481,7 -1586,7 +1586,7 @@@ -- Just be happy with inheriting from parent road only IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'Place added to location table'; END IF; @@@ -1668,6 -1773,7 +1773,7 @@@ isin := avals(NEW.address); IF array_upper(isin, 1) IS NOT NULL THEN FOR i IN 1..array_upper(isin, 1) LOOP + -- TODO further split terms with comma and semicolon address_street_word_id := get_name_id(make_standard_name(isin[i])); IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); @@@ -1682,26 -1788,6 +1788,6 @@@ END LOOP; END IF; END IF; - --DEBUG: RAISE WARNING '"address:* tokens collected'; - IF NEW.postcode IS NOT NULL THEN - isin := regexp_split_to_array(NEW.postcode, E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - isin_tokens := isin_tokens || address_street_word_id; - END IF; - - -- merge into address vector - address_street_word_id := get_word_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); - END IF; - END LOOP; - END IF; - END IF; - --DEBUG: RAISE WARNING 'postcode tokens collected'; -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too @@@ -1773,6 -1859,11 +1859,11 @@@ VALUES (NEW.place_id, location.place_id, true, location_isaddress, location.distance, location.rank_address); IF location_isaddress THEN + -- add postcode if we have one + -- (If multiple postcodes are available, we end up with the highest ranking one.) + IF location.postcode is not null THEN + NEW.postcode = location.postcode; + END IF; address_havelevel[location.rank_address] := true; IF NOT location.isguess THEN @@@ -1807,6 -1898,11 +1898,11 @@@ nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) VALUES (NEW.place_id, location.place_id, false, NOT address_havelevel[location.rank_address], location.distance, location.rank_address); + IF NEW.postcode is null AND location.postcode is not null + AND NOT address_havelevel[location.rank_address] THEN + NEW.postcode := location.postcode; + END IF; + address_havelevel[location.rank_address] := true; IF location.rank_address > parent_place_id_rank THEN @@@ -1849,11 -1945,20 +1945,20 @@@ END IF; --DEBUG: RAISE WARNING 'search terms for long ways added'; + IF NEW.address is not null AND NEW.address ? 'postcode' + AND NEW.address->'postcode' not similar to '%(,|;)%' THEN + NEW.postcode := upper(trim(NEW.address->'postcode')); + END IF; + + IF NEW.postcode is null AND NEW.rank_search > 8 THEN + NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); + END IF; + -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); --DEBUG: RAISE WARNING 'added to location (full)'; END IF; @@@ -2250,46 -2355,6 +2355,6 @@@ END $$ LANGUAGE plpgsql IMMUTABLE; - - CREATE OR REPLACE FUNCTION get_address_postcode(for_place_id BIGINT) RETURNS TEXT - AS $$ - DECLARE - result TEXT[]; - search TEXT[]; - for_postcode TEXT; - found INTEGER; - location RECORD; - BEGIN - - found := 1000; - search := ARRAY['ref']; - result := '{}'; - - select postcode from placex where place_id = for_place_id limit 1 into for_postcode; - - FOR location IN - select rank_address,name,distance,length(name::text) as namelength - from place_addressline join placex on (address_place_id = placex.place_id) - where place_addressline.place_id = for_place_id and rank_address in (5,11) - order by rank_address desc,rank_search desc,fromarea desc,distance asc,namelength desc - LOOP - IF array_upper(search, 1) IS NOT NULL AND array_upper(location.name, 1) IS NOT NULL THEN - FOR j IN 1..array_upper(search, 1) LOOP - FOR k IN 1..array_upper(location.name, 1) LOOP - IF (found > location.rank_address AND location.name[k].key = search[j] AND location.name[k].value != '') AND NOT result @> ARRAY[trim(location.name[k].value)] AND (for_postcode IS NULL OR location.name[k].value ilike for_postcode||'%') THEN - result[(100 - location.rank_address)] := trim(location.name[k].value); - found := location.rank_address; - END IF; - END LOOP; - END LOOP; - END IF; - END LOOP; - - RETURN array_to_string(result,', '); - END; - $$ - LANGUAGE plpgsql; - --housenumber only needed for tiger data CREATE OR REPLACE FUNCTION get_address_by_language(for_place_id BIGINT, housenumber INTEGER, languagepref TEXT[]) RETURNS TEXT AS $$ @@@ -2373,11 -2438,19 +2438,19 @@@ BEGI -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux - WHERE place_id = in_place_id + WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; -- %NOAUXDATA% END IF; + -- postcode table + IF for_place_id IS NULL THEN + select parent_place_id, country_code, rank_address, postcode, 'place', 'postcode' + FROM location_postcode + WHERE place_id = in_place_id + INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode, searchclass, searchtype; + END IF; + IF for_place_id IS NULL THEN select parent_place_id, country_code, housenumber, rank_search, postcode, name, class, type from placex WHERE place_id = in_place_id and rank_search > 27 @@@ -2396,9 -2469,8 +2469,8 @@@ found := 1000; hadcountry := false; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, - class, type, admin_level, true as fromarea, true as isaddress, + select placex.place_id, osm_type, osm_id, name, + class, type, admin_level, true as isaddress, CASE WHEN rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, 0 as distance, country_code, postcode from placex @@@ -2408,13 -2480,9 +2480,9 @@@ IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; - END IF; - IF searchpostcode IS NULL and location.postcode IS NOT NULL THEN - searchpostcode := location.postcode; - END IF; - IF location.rank_address = 4 AND location.isaddress THEN + ELSEIF location.rank_address = 4 THEN hadcountry := true; END IF; IF location.rank_address < 4 AND NOT hadcountry THEN @@@ -2425,18 -2493,17 +2493,17 @@@ END IF; END IF; countrylocation := ROW(location.place_id, location.osm_type, location.osm_id, location.name, location.class, - location.type, location.admin_level, location.fromarea, location.isaddress, location.rank_address, + location.type, location.admin_level, true, location.isaddress, location.rank_address, location.distance)::addressline; RETURN NEXT countrylocation; found := location.rank_address; END LOOP; FOR location IN - select placex.place_id, osm_type, osm_id, - CASE WHEN class = 'place' and type = 'postcode' THEN hstore('name', postcode) ELSE name END as name, + select placex.place_id, osm_type, osm_id, name, CASE WHEN extratags ? 'place' THEN 'place' ELSE class END as class, CASE WHEN extratags ? 'place' THEN extratags->'place' ELSE type END as type, - admin_level, fromarea, isaddress, + admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress, CASE WHEN address_place_id = for_place_id AND rank_address = 0 THEN 100 WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, distance,country_code,postcode from place_addressline join placex on (address_place_id = placex.place_id) @@@ -2450,12 -2517,9 +2517,9 @@@ IF searchcountrycode IS NULL AND location.country_code IS NOT NULL THEN searchcountrycode := location.country_code; END IF; - IF searchpostcode IS NOT NULL and location.type = 'postcode' THEN + IF location.type in ('postcode', 'postal_code') THEN location.isaddress := FALSE; END IF; - IF searchpostcode IS NULL and location.isaddress and location.type != 'postcode' and location.postcode IS NOT NULL and location.postcode not similar to '%(,|;)%' THEN - searchpostcode := location.postcode; - END IF; IF location.rank_address = 4 AND location.isaddress THEN hadcountry := true; END IF; @@@ -2489,7 -2553,6 +2553,6 @@@ IF searchhousename IS NOT NULL THEN location := ROW(in_place_id, null, null, searchhousename, searchclass, searchtype, null, true, true, 29, 0)::addressline; - -- location := ROW(in_place_id, null, null, searchhousename, 'place', 'house_name', null, true, true, 29, 0)::addressline; RETURN NEXT location; END IF; @@@ -2635,9 -2698,7 +2698,7 @@@ BEGI IF out_postcode IS NULL THEN SELECT postcode from placex where place_id = out_parent_place_id INTO out_postcode; END IF; - IF out_postcode IS NULL THEN - out_postcode := getNearestPostcode(out_partition, place_centroid); - END IF; + -- XXX look into postcode table newpoints := 0; insert into location_property_aux (place_id, partition, parent_place_id, housenumber, postcode, centroid)
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypehouse#operatorclasstypepostcodehouse#LatLonRadius
".$aRow['sClass']."".$aRow['sType']."".$aRow['sPostcode']."".$aRow['sHouseNumber']."".$aRow['fLat']."