From: Sarah Hoffmann Date: Mon, 19 Apr 2021 14:54:22 +0000 (+0200) Subject: simplify token precomputation X-Git-Tag: v4.0.0~107^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/b88b952f56d6ac1eec46202ba05a04fcd12575d9?ds=sidebyside;hp=--cc simplify token precomputation Rename function to reflect that it is only used for precomputation. The token IDs are not really needed, so don't bother to compute the array of tokens. --- b88b952f56d6ac1eec46202ba05a04fcd12575d9 diff --git a/data/words.sql b/data/words.sql index ac250739..5613d927 100644 --- a/data/words.sql +++ b/data/words.sql @@ -29787,7 +29787,7 @@ st 5557484 -- prefill word table -select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null; +select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null; select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w; -- copy the word frequencies diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql index f283f916..c7bd2fc5 100644 --- a/lib-sql/functions/normalization.sql +++ b/lib-sql/functions/normalization.sql @@ -377,40 +377,26 @@ $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION make_keywords(src TEXT) - RETURNS INTEGER[] +CREATE OR REPLACE FUNCTION precompute_words(src TEXT) + RETURNS INTEGER AS $$ DECLARE - result INTEGER[]; s TEXT; w INTEGER; words TEXT[]; i INTEGER; j INTEGER; BEGIN - result := '{}'::INTEGER[]; - s := make_standard_name(src); w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; - words := string_to_array(s, ' '); IF array_upper(words, 1) IS NOT NULL THEN FOR j IN 1..array_upper(words, 1) LOOP IF (words[j] != '') THEN - w = getorcreate_word_id(words[j]); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; + w := getorcreate_word_id(words[j]); END IF; END LOOP; END IF; @@ -421,9 +407,6 @@ BEGIN s := make_standard_name(words[j]); IF s != '' THEN w := getorcreate_word_id(s); - IF w IS NOT NULL AND NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; END IF; END LOOP; END IF; @@ -433,13 +416,10 @@ BEGIN s := make_standard_name(s); IF s != '' THEN w := getorcreate_name_id(s, src); - IF NOT (ARRAY[w] <@ result) THEN - result := result || w; - END IF; END IF; END IF; - RETURN result; + RETURN 1; END; $$ LANGUAGE plpgsql; diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py index e2852acb..1311ef5d 100644 --- a/test/python/test_tools_database_import.py +++ b/test/python/test_tools_database_import.py @@ -187,7 +187,7 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory): @pytest.mark.parametrize("threads", (1, 5)) def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table, temp_db_cursor, threads): - for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'): + for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'): temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT) RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL """.format(func))