From 5d1fa597ead7fbba41e137037ef145d0d9b154d6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 16 Jan 2020 22:08:37 +0100 Subject: [PATCH] clean up get_word_id function Replaced by addr_ids_from_name() which also normalises the string. --- sql/functions/normalization.sql | 31 ++++++-------------------- sql/functions/placex_triggers.sql | 37 ++++++++++++++++--------------- 2 files changed, 26 insertions(+), 42 deletions(-) diff --git a/sql/functions/normalization.sql b/sql/functions/normalization.sql index cf8e63bc..66d0214a 100644 --- a/sql/functions/normalization.sql +++ b/sql/functions/normalization.sql @@ -201,33 +201,16 @@ END; $$ LANGUAGE plpgsql; - -CREATE OR REPLACE FUNCTION get_word_id(lookup_word TEXT) - RETURNS INTEGER - AS $$ -DECLARE - lookup_token TEXT; - return_word_id INTEGER; -BEGIN - lookup_token := trim(lookup_word); - SELECT min(word_id) FROM word - WHERE word_token = lookup_token and class is null and type is null - INTO return_word_id; - RETURN return_word_id; -END; -$$ -LANGUAGE plpgsql STABLE; - - -CREATE OR REPLACE FUNCTION get_name_id(lookup_word TEXT) - RETURNS INTEGER +-- Normalize a string and lookup its word ids (partial words). +CREATE OR REPLACE FUNCTION addr_ids_from_name(lookup_word TEXT) + RETURNS INTEGER[] AS $$ DECLARE lookup_token TEXT; - return_word_id INTEGER; + return_word_id INTEGER[]; BEGIN - lookup_token := ' '||trim(lookup_word); - SELECT min(word_id) FROM word + lookup_token := make_standard_name(lookup_word); + SELECT array_agg(word_id) FROM word WHERE word_token = lookup_token and class is null and type is null INTO return_word_id; RETURN return_word_id; @@ -236,7 +219,7 @@ $$ LANGUAGE plpgsql STABLE; --- Normalize a string and look up its name ids. +-- Normalize a string and look up its name ids (full words). CREATE OR REPLACE FUNCTION word_ids_from_name(lookup_word TEXT) RETURNS INTEGER[] AS $$ diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index d6076356..cedbb608 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -413,7 +413,6 @@ CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER AS $$ DECLARE - near_centroid GEOMETRY; search_maxdistance FLOAT[]; @@ -421,17 +420,13 @@ DECLARE address_havelevel BOOLEAN[]; i INTEGER; - iMax FLOAT; location RECORD; - way RECORD; - relation RECORD; relation_members TEXT[]; addr_item RECORD; search_diameter FLOAT; search_prevdiameter FLOAT; search_maxrank INTEGER; address_maxrank INTEGER; - address_street_word_id INTEGER; address_street_word_ids INTEGER[]; parent_place_id_rank BIGINT; @@ -732,15 +727,20 @@ BEGIN IF NEW.address IS NOT NULL THEN FOR addr_item IN SELECT * FROM each(NEW.address) LOOP - IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', 'district', 'region', 'county', 'municipality', 'hamlet', 'village', 'subdistrict', 'town', 'neighbourhood', 'quarter', 'parish') THEN - address_street_word_id := get_name_id(make_standard_name(addr_item.value)); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - isin_tokens := isin_tokens || address_street_word_id; + IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', + 'district', 'region', 'county', 'municipality', + 'hamlet', 'village', 'subdistrict', 'town', + 'neighbourhood', 'quarter', 'parish') + THEN + address_street_word_ids := word_ids_from_name(addr_item.value); + IF address_street_word_ids is not null THEN + isin_tokens := array_merge(isin_tokens, address_street_word_ids); END IF; IF NOT %REVERSE-ONLY% THEN - address_street_word_id := get_word_id(make_standard_name(addr_item.value)); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); + address_street_word_ids := addr_ids_from_name(addr_item.value); + IF address_street_word_ids is not null THEN + nameaddress_vector := array_merge(nameaddress_vector, + address_street_word_ids); END IF; END IF; END IF; @@ -749,16 +749,17 @@ BEGIN isin := regexp_split_to_array(addr_item.value, E'[;,]'); IF array_upper(isin, 1) IS NOT NULL THEN FOR i IN 1..array_upper(isin, 1) LOOP - address_street_word_id := get_name_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL AND NOT(ARRAY[address_street_word_id] <@ isin_tokens) THEN - isin_tokens := isin_tokens || address_street_word_id; + address_street_word_ids := word_ids_from_name(isin[i]); + IF address_street_word_ids is not null THEN + isin_tokens := array_merge(isin_tokens, address_street_word_ids); END IF; -- merge word into address vector IF NOT %REVERSE-ONLY% THEN - address_street_word_id := get_word_id(make_standard_name(isin[i])); - IF address_street_word_id IS NOT NULL THEN - nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); + address_street_word_ids := addr_ids_from_name(isin[i]); + IF address_street_word_ids is not null THEN + nameaddress_vector := array_merge(nameaddress_vector, + address_street_word_ids); END IF; END IF; END LOOP; -- 2.39.5