From: Sarah Hoffmann Date: Mon, 19 Apr 2021 14:54:22 +0000 (+0200) Subject: simplify token precomputation X-Git-Tag: v4.0.0~107^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/b88b952f56d6ac1eec46202ba05a04fcd12575d9 simplify token precomputation Rename function to reflect that it is only used for precomputation. The token IDs are not really needed, so don't bother to compute the array of tokens. --- diff --git a/data/words.sql b/data/words.sql index ac250739..5613d927 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,7 @@ st 5557484 -- prefill word table -select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; +select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql index f283f916..c7bd2fc5 100644 --- a/lib-sql/functions/normalization.sql +++ b/lib-sql/functions/normalization.sql @@ -377,40 +377,26 @@ $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION make_keywords(src TEXT) - RETURNS INTEGER[] +CREATE OR REPLACE FUNCTION precompute_words(src TEXT) + RETURNS INTEGER AS $$ DECLARE - result INTEGER[]; s TEXT; w INTEGER; words TEXT[]; i INTEGER; j INTEGER; BEGIN - result := '{}'::INTEGER[]; - s := make_standard_name(src); w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - words := string_to_array(s, ' '); IF array_upper(words, 1) IS NOT NULL THEN FOR j IN 1..array_upper(words, 1) LOOP IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; + w := getorcreate_word_id(words[j]); END IF; END LOOP; END IF; @@ -421,9 +407,6 @@ BEGIN s := make_standard_name(words[j]); IF s != '' THEN w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; END IF; END LOOP; END IF; @@ -433,13 +416,10 @@ BEGIN s := make_standard_name(s); IF s != '' THEN w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; END IF; END IF; - RETURN result; + RETURN 1; END; $$ LANGUAGE plpgsql; diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py index e2852acb..1311ef5d 100644 --- a/test/python/test_tools_database_import.py +++ b/test/python/test_tools_database_import.py @@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory): @pytest.mark.parametrize("threads", (1, 5)) def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table, temp_db_cursor, threads): - for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'): + for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'): temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT) RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL """.format(func))