From a3e4e8e5cdffb0056bccb79e11690ed01c9aa5ea Mon Sep 17 00:00:00 2001
From: Sarah Hoffmann <lonvia@denofr.de>
Date: Tue, 22 Feb 2022 20:15:04 +0100
Subject: [PATCH] delete unused country name tokens

---
 nominatim/tokenizer/icu_tokenizer.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py
index f5addd3e..98a1daed 100644
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -415,18 +415,24 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
             cur.execute("""SELECT word_token FROM word
                             WHERE type = 'C' and word = %s""",
                         (country_code, ))
-            word_tokens.difference_update((t[0] for t in cur))
+            existing_tokens = {t[0] for t in cur}
 
             # Only add those names that are not yet in the list.
-            if word_tokens:
+            new_tokens = word_tokens - existing_tokens
+            if new_tokens:
                 cur.execute("""INSERT INTO word (word_token, type, word)
                                (SELECT token, 'C', %s
                                 FROM unnest(%s) as token)
-                            """, (country_code, list(word_tokens)))
-
-            # No names are deleted at the moment.
-            # If deletion is made possible, then the static names from the
-            # initial 'country_name' table should be kept.
+                            """, (country_code, list(new_tokens)))
+
+            # Delete names that no longer exist.
+            gone_tokens = existing_tokens - word_tokens
+            if gone_tokens:
+                cur.execute("""DELETE FROM word
+                               USING unnest(%s) as token
+                               WHERE type = 'C' and word = %s
+                                     and word_token = token""",
+                            (list(gone_tokens), country_code))
 
 
     def process_place(self, place):
-- 
2.39.5