From: Sarah Hoffmann Date: Fri, 3 Jun 2022 15:12:01 +0000 (+0200) Subject: icu: switch postcodes to using the pre-formatted one X-Git-Tag: v4.1.0~22^2~17 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/b7704833e4b011541928372a46ea692c3a496b5c icu: switch postcodes to using the pre-formatted one --- diff --git a/lib-sql/functions/interpolation.sql b/lib-sql/functions/interpolation.sql index c8cfbcc6..3a994711 100644 --- a/lib-sql/functions/interpolation.sql +++ b/lib-sql/functions/interpolation.sql @@ -156,7 +156,6 @@ DECLARE linegeo GEOMETRY; splitline GEOMETRY; sectiongeo GEOMETRY; - interpol_postcode TEXT; postcode TEXT; stepmod SMALLINT; BEGIN @@ -174,8 +173,6 @@ BEGIN ST_PointOnSurface(NEW.linegeo), NEW.linegeo); - interpol_postcode := token_normalized_postcode(NEW.address->'postcode'); - NEW.token_info := token_strip_info(NEW.token_info); IF NEW.address ? '_inherited' THEN NEW.address := hstore('interpolation', NEW.address->'interpolation'); @@ -207,6 +204,11 @@ BEGIN FOR nextnode IN SELECT DISTINCT ON (nodeidpos) osm_id, address, geometry, + -- Take the postcode from the node only if it has a housenumber itself. + -- Note that there is a corner-case where the node has a wrongly + -- formatted postcode and therefore 'postcode' contains a derived + -- variant. + CASE WHEN address ? 'postcode' THEN placex.postcode ELSE NULL::text END as postcode, substring(address->'housenumber','[0-9]+')::integer as hnr FROM placex, generate_series(1, array_upper(waynodes, 1)) nodeidpos WHERE osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT @@ -260,13 +262,10 @@ BEGIN endnumber := newend; -- determine postcode - postcode := coalesce(interpol_postcode, - token_normalized_postcode(prevnode.address->'postcode'), - token_normalized_postcode(nextnode.address->'postcode'), - postcode); - IF postcode is NULL THEN - SELECT token_normalized_postcode(placex.postcode) - FROM placex WHERE place_id = NEW.parent_place_id INTO postcode; + postcode := coalesce(prevnode.postcode, nextnode.postcode, postcode); + IF postcode is NULL and NEW.parent_place_id > 0 THEN + SELECT placex.postcode FROM placex + WHERE place_id = NEW.parent_place_id INTO postcode; END IF; IF postcode is NULL THEN postcode := get_nearest_postcode(NEW.country_code, nextnode.geometry); diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 6143a1ed..1f7e6dc6 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -992,7 +992,7 @@ BEGIN {% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %} -- determine postcode - NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'), + NEW.postcode := coalesce(token_get_postcode(NEW.token_info), location.postcode, get_nearest_postcode(NEW.country_code, NEW.centroid)); @@ -1150,8 +1150,7 @@ BEGIN {% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %} - NEW.postcode := coalesce(token_normalized_postcode(NEW.address->'postcode'), - NEW.postcode); + NEW.postcode := coalesce(token_get_postcode(NEW.token_info), NEW.postcode); -- if we have a name add this to the name search table IF NEW.name IS NOT NULL THEN diff --git a/lib-sql/tokenizer/icu_tokenizer.sql b/lib-sql/tokenizer/icu_tokenizer.sql index f323334b..f86a0a37 100644 --- a/lib-sql/tokenizer/icu_tokenizer.sql +++ b/lib-sql/tokenizer/icu_tokenizer.sql @@ -104,6 +104,13 @@ AS $$ $$ LANGUAGE SQL IMMUTABLE STRICT; +CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB) + RETURNS TEXT +AS $$ + SELECT info->>'postcode'; +$$ LANGUAGE SQL IMMUTABLE STRICT; + + -- Return token info that should be saved permanently in the database. CREATE OR REPLACE FUNCTION token_strip_info(info JSONB) RETURNS JSONB diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index e9812ba0..61c47c11 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -675,6 +675,9 @@ class _TokenInfo: if self.address_tokens: out['addr'] = self.address_tokens + if self.postcode: + out['postcode'] = self.postcode + return out