X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/83775289523eda29fe8d82ff2e92c6faa5c76898..1618aba5f282a27fc45af28c4eeebb6dcd28c332:/nominatim/tokenizer/legacy_icu_tokenizer.py diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 59ad09aa..32dd6535 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -371,22 +371,28 @@ class LegacyICUNameAnalyzer: """ word_tokens = set() for name in self._compute_full_names(names): - if name: - word_tokens.add(' ' + self.name_processor.get_search_normalized(name)) + norm_name = self.name_processor.get_search_normalized(name) + if norm_name: + word_tokens.add(norm_name) with self.conn.cursor() as cur: # Get existing names - cur.execute("SELECT word_token FROM word WHERE country_code = %s", + cur.execute("""SELECT word_token FROM word + WHERE type = 'C' and info->>'cc'= %s""", (country_code, )) word_tokens.difference_update((t[0] for t in cur)) + # Only add those names that are not yet in the list. if word_tokens: - cur.execute("""INSERT INTO word (word_id, word_token, country_code, - search_name_count) - (SELECT nextval('seq_word'), token, %s, 0 + cur.execute("""INSERT INTO word (word_token, type, info) + (SELECT token, 'C', json_build_object('cc', %s) FROM unnest(%s) as token) """, (country_code, list(word_tokens))) + # No names are deleted at the moment. + # If deletion is made possible, then the static names from the + # initial 'country_name' table should be kept. + def process_place(self, place): """ Determine tokenizer information about the given place.