]> git.openstreetmap.org Git - nominatim.git/commitdiff
do not save word counts of 1
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 20 Mar 2025 19:01:46 +0000 (20:01 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 31 Mar 2025 12:52:50 +0000 (14:52 +0200)
This is the default setting, which will be assumed when the count is
missing.

src/nominatim_db/tokenizer/icu_tokenizer.py

index 3da1171f5aed0fee93c12610058e8b1a8edfa143..2b17d6117f059475ed27f685ce7ce75a1c16d3fa 100644 (file)
@@ -121,10 +121,10 @@ class ICUTokenizer(AbstractTokenizer):
                            SELECT unnest(nameaddress_vector) as id, count(*)
                                  FROM search_name GROUP BY id)
                   SELECT coalesce(a.id, w.id) as id,
-                         (CASE WHEN w.count is null THEN '{}'::JSONB
+                         (CASE WHEN w.count is null or w.count <= 1 THEN '{}'::JSONB
                               ELSE jsonb_build_object('count', w.count) END
                           ||
-                          CASE WHEN a.count is null THEN '{}'::JSONB
+                          CASE WHEN a.count is null or a.count <= 1 THEN '{}'::JSONB
                               ELSE jsonb_build_object('addr_count', a.count) END) as info
                   FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
                   """)
@@ -134,9 +134,10 @@ class ICUTokenizer(AbstractTokenizer):
                 drop_tables(conn, 'tmp_word')
                 cur.execute("""CREATE TABLE tmp_word AS
                                 SELECT word_id, word_token, type, word,
-                                       (CASE WHEN wf.info is null THEN word.info
-                                        ELSE coalesce(word.info, '{}'::jsonb) || wf.info
-                                        END) as info
+                                       coalesce(word.info, '{}'::jsonb)
+                                       - 'count' - 'addr_count' ||
+                                       coalesce(wf.info, '{}'::jsonb)
+                                       as info
                                 FROM word LEFT JOIN word_frequencies wf
                                      ON word.word_id = wf.id
                             """)