X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/efafa5271957fb54b356ec1c90e8613f14de40d4..b5a6d7a4a66e132f294cd8caa97c80d64cd2e039:/lib-sql/tokenizer/icu_tokenizer.sql diff --git a/lib-sql/tokenizer/icu_tokenizer.sql b/lib-sql/tokenizer/icu_tokenizer.sql index 03408b4a..04fcedcb 100644 --- a/lib-sql/tokenizer/icu_tokenizer.sql +++ b/lib-sql/tokenizer/icu_tokenizer.sql @@ -41,10 +41,17 @@ AS $$ $$ LANGUAGE SQL IMMUTABLE STRICT; +CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB) + RETURNS BOOLEAN +AS $$ + SELECT info->>'street' is not null or info->>'place' is null; +$$ LANGUAGE SQL IMMUTABLE; + + CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN AS $$ - SELECT info->>'street' is not null; + SELECT info->>'street' is not null and info->>'street' != '{}'; $$ LANGUAGE SQL IMMUTABLE; @@ -97,10 +104,10 @@ AS $$ $$ LANGUAGE SQL IMMUTABLE STRICT; -CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT) +CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB) RETURNS TEXT AS $$ - SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END; + SELECT info->>'postcode'; $$ LANGUAGE SQL IMMUTABLE STRICT; @@ -200,3 +207,49 @@ BEGIN END; $$ LANGUAGE plpgsql; + + +CREATE OR REPLACE FUNCTION create_analyzed_hnr_id(norm_term TEXT, lookup_terms TEXT[]) + RETURNS INTEGER + AS $$ +DECLARE + return_id INTEGER; +BEGIN + SELECT min(word_id) INTO return_id + FROM word WHERE word = norm_term and type = 'H'; + + IF return_id IS NULL THEN + return_id := nextval('seq_word'); + INSERT INTO word (word_id, word_token, type, word, info) + SELECT return_id, lookup_term, 'H', norm_term, + json_build_object('lookup', lookup_terms[1]) + FROM unnest(lookup_terms) as lookup_term; + END IF; + + RETURN return_id; +END; +$$ +LANGUAGE plpgsql; + +CREATE OR REPLACE FUNCTION create_postcode_word(postcode TEXT, lookup_terms TEXT[]) + RETURNS BOOLEAN + AS $$ +DECLARE + existing INTEGER; +BEGIN + SELECT count(*) INTO existing + FROM word WHERE word = postcode and type = 'P'; + + IF existing > 0 THEN + RETURN TRUE; + END IF; + + -- postcodes don't need word ids + INSERT INTO word (word_token, type, word) + SELECT lookup_term, 'P', postcode FROM unnest(lookup_terms) as lookup_term; + + RETURN FALSE; +END; +$$ +LANGUAGE plpgsql; +