]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/legacy_tokenizer.py
Merge pull request #2384 from lonvia/actions-add-icu-tokenizer
[nominatim.git] / nominatim / tokenizer / legacy_tokenizer.py
index 3808c68e069f3f00b7f76bb84847d7b43ccb4ba0..6040f88f154bad6eb25e81c9498017d914db3e88 100644 (file)
@@ -271,8 +271,7 @@ class LegacyNameAnalyzer:
             self.conn = None
 
 
-    @staticmethod
-    def get_word_token_info(conn, words):
+    def get_word_token_info(self, words):
         """ Return token information for the given list of words.
             If a word starts with # it is assumed to be a full name
             otherwise is a partial name.
@@ -283,7 +282,7 @@ class LegacyNameAnalyzer:
             The function is used for testing and debugging only
             and not necessarily efficient.
         """
-        with conn.cursor() as cur:
+        with self.conn.cursor() as cur:
             cur.execute("""SELECT t.term, word_token, word_id
                            FROM word, (SELECT unnest(%s::TEXT[]) as term) t
                            WHERE word_token = (CASE
@@ -352,7 +351,7 @@ class LegacyNameAnalyzer:
 
 
 
-    def update_special_phrases(self, phrases):
+    def update_special_phrases(self, phrases, should_replace):
         """ Replace the search index for special phrases with the new phrases.
         """
         norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
@@ -375,13 +374,13 @@ class LegacyNameAnalyzer:
                     cur,
                     """ INSERT INTO word (word_id, word_token, word, class, type,
                                           search_name_count, operator)
-                        (SELECT nextval('seq_word'), make_standard_name(name), name,
+                        (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
                                 class, type, 0,
                                 CASE WHEN op in ('in', 'near') THEN op ELSE null END
                            FROM (VALUES %s) as v(name, class, type, op))""",
                     to_add)
 
-            if to_delete:
+            if to_delete and should_replace:
                 psycopg2.extras.execute_values(
                     cur,
                     """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
@@ -400,11 +399,11 @@ class LegacyNameAnalyzer:
             cur.execute(
                 """INSERT INTO word (word_id, word_token, country_code)
                    (SELECT nextval('seq_word'), lookup_token, %s
-                      FROM (SELECT ' ' || make_standard_name(n) as lookup_token
+                      FROM (SELECT DISTINCT ' ' || make_standard_name(n) as lookup_token
                             FROM unnest(%s)n) y
                       WHERE NOT EXISTS(SELECT * FROM word
                                        WHERE word_token = lookup_token and country_code = %s))
-                """, (country_code, names, country_code))
+                """, (country_code, list(names.values()), country_code))
 
 
     def process_place(self, place):
@@ -422,7 +421,7 @@ class LegacyNameAnalyzer:
 
             country_feature = place.get('country_feature')
             if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), list(names.values()))
+                self.add_country_names(country_feature.lower(), names)
 
         address = place.get('address')
 
@@ -513,10 +512,9 @@ class _TokenInfo:
         """
         def _get_place(name):
             with conn.cursor() as cur:
-                cur.execute("""SELECT (addr_ids_from_name(%s)
-                                       || getorcreate_name_id(make_standard_name(%s), ''))::text,
+                cur.execute("""SELECT make_keywords(hstore('name' , %s))::text,
                                       word_ids_from_name(%s)::text""",
-                            (name, name, name))
+                            (name, name))
                 return cur.fetchone()
 
         self.data['place_search'], self.data['place_match'] = \