+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Tokenizer implementing normalisation as used before Nominatim 4.
"""
""" Recompute the frequency of full words.
"""
with connect(self.dsn) as conn:
- with conn.cursor() as cur:
- cur.drop_table("word_frequencies")
- LOG.info("Computing word frequencies")
- cur.execute("""CREATE TEMP TABLE word_frequencies AS
- SELECT unnest(name_vector) as id, count(*)
- FROM search_name GROUP BY id""")
- cur.execute("CREATE INDEX ON word_frequencies(id)")
- LOG.info("Update word table with recomputed frequencies")
- cur.execute("""UPDATE word SET search_name_count = count
- FROM word_frequencies
- WHERE word_token like ' %' and word_id = id""")
- cur.drop_table("word_frequencies")
+ if conn.table_exists('search_name'):
+ with conn.cursor() as cur:
+ cur.drop_table("word_frequencies")
+ LOG.info("Computing word frequencies")
+ cur.execute("""CREATE TEMP TABLE word_frequencies AS
+ SELECT unnest(name_vector) as id, count(*)
+ FROM search_name GROUP BY id""")
+ cur.execute("CREATE INDEX ON word_frequencies(id)")
+ LOG.info("Update word table with recomputed frequencies")
+ cur.execute("""UPDATE word SET search_name_count = count
+ FROM word_frequencies
+ WHERE word_token like ' %' and word_id = id""")
+ cur.drop_table("word_frequencies")
conn.commit()
+
+ def update_word_tokens(self):
+ """ No house-keeping implemented for the legacy tokenizer.
+ """
+ LOG.info("No tokenizer clean-up available.")
+
+
def name_analyzer(self):
""" Create a new analyzer for tokenizing names and queries
using this tokinzer. Analyzers are context managers and should
with conn.cursor() as cur:
return cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))
- self.data['street'] = self.cache.streets.get(street, _get_street)
+ tokens = self.cache.streets.get(street, _get_street)
+ if tokens:
+ self.data['street'] = tokens
def add_place(self, conn, place):
tokens = {}
for key, value in terms:
- tokens[key] = self.cache.address_terms.get(value, _get_address_term)
+ items = self.cache.address_terms.get(value, _get_address_term)
+ if items[0] or items[1]:
+ tokens[key] = items
- self.data['addr'] = tokens
+ if tokens:
+ self.data['addr'] = tokens
class _LRU: