X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/c86cfefc4813d275073fb5f2b196ddfcc7f26aef..e8e2502e2f9d2275b8d567341400672adea9fea3:/nominatim/tokenizer/icu_tokenizer.py diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 12d1eccd..686fbd79 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -93,6 +93,25 @@ class LegacyICUTokenizer(AbstractTokenizer): return None + def update_statistics(self): + """ Recompute frequencies for all name words. + """ + with connect(self.dsn) as conn: + with conn.cursor() as cur: + cur.drop_table("word_frequencies") + LOG.info("Computing word frequencies") + cur.execute("""CREATE TEMP TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id""") + cur.execute("CREATE INDEX ON word_frequencies(id)") + LOG.info("Update word table with recomputed frequencies") + cur.execute("""UPDATE word + SET info = info || jsonb_build_object('count', count) + FROM word_frequencies WHERE word_id = id""") + cur.drop_table("word_frequencies") + conn.commit() + + def name_analyzer(self): """ Create a new analyzer for tokenizing names and queries using this tokinzer. Analyzers are context managers and should