]> git.openstreetmap.org Git - nominatim.git/commitdiff
delete unused country name tokens
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 22 Feb 2022 19:15:04 +0000 (20:15 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 23 Feb 2022 08:23:06 +0000 (09:23 +0100)
nominatim/tokenizer/icu_tokenizer.py

index f5addd3e4ff84b0bd6d8f0a4da35a995eb609b18..98a1daedc37ea142bde24264ebc46fb22f8b2219 100644 (file)
@@ -415,18 +415,24 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
             cur.execute("""SELECT word_token FROM word
                             WHERE type = 'C' and word = %s""",
                         (country_code, ))
-            word_tokens.difference_update((t[0] for t in cur))
+            existing_tokens = {t[0] for t in cur}
 
             # Only add those names that are not yet in the list.
-            if word_tokens:
+            new_tokens = word_tokens - existing_tokens
+            if new_tokens:
                 cur.execute("""INSERT INTO word (word_token, type, word)
                                (SELECT token, 'C', %s
                                 FROM unnest(%s) as token)
-                            """, (country_code, list(word_tokens)))
-
-            # No names are deleted at the moment.
-            # If deletion is made possible, then the static names from the
-            # initial 'country_name' table should be kept.
+                            """, (country_code, list(new_tokens)))
+
+            # Delete names that no longer exist.
+            gone_tokens = existing_tokens - word_tokens
+            if gone_tokens:
+                cur.execute("""DELETE FROM word
+                               USING unnest(%s) as token
+                               WHERE type = 'C' and word = %s
+                                     and word_token = token""",
+                            (list(gone_tokens), country_code))
 
 
     def process_place(self, place):