From 53dbe58ada3fb34534fa2a1d079c2cbbbe09496c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 26 Oct 2021 11:42:42 +0200 Subject: [PATCH] do not count words when in reverse-only mode --- nominatim/tokenizer/icu_tokenizer.py | 25 +++++++++++++------------ nominatim/tokenizer/legacy_tokenizer.py | 25 +++++++++++++------------ 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index e7ee57ad..3331a321 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -99,18 +99,19 @@ class LegacyICUTokenizer(AbstractTokenizer): """ Recompute frequencies for all name words. """ with connect(self.dsn) as conn: - with conn.cursor() as cur: - cur.drop_table("word_frequencies") - LOG.info("Computing word frequencies") - cur.execute("""CREATE TEMP TABLE word_frequencies AS - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute("CREATE INDEX ON word_frequencies(id)") - LOG.info("Update word table with recomputed frequencies") - cur.execute("""UPDATE word - SET info = info || jsonb_build_object('count', count) - FROM word_frequencies WHERE word_id = id""") - cur.drop_table("word_frequencies") + if conn.table_exists('search_name'): + with conn.cursor() as cur: + cur.drop_table("word_frequencies") + LOG.info("Computing word frequencies") + cur.execute("""CREATE TEMP TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id""") + cur.execute("CREATE INDEX ON word_frequencies(id)") + LOG.info("Update word table with recomputed frequencies") + cur.execute("""UPDATE word + SET info = info || jsonb_build_object('count', count) + FROM word_frequencies WHERE word_id = id""") + cur.drop_table("word_frequencies") conn.commit() diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index d901a68d..0edcdcca 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -190,18 +190,19 @@ class LegacyTokenizer(AbstractTokenizer): """ Recompute the frequency of full words. """ with connect(self.dsn) as conn: - with conn.cursor() as cur: - cur.drop_table("word_frequencies") - LOG.info("Computing word frequencies") - cur.execute("""CREATE TEMP TABLE word_frequencies AS - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute("CREATE INDEX ON word_frequencies(id)") - LOG.info("Update word table with recomputed frequencies") - cur.execute("""UPDATE word SET search_name_count = count - FROM word_frequencies - WHERE word_token like ' %' and word_id = id""") - cur.drop_table("word_frequencies") + if conn.table_exists('search_name'): + with conn.cursor() as cur: + cur.drop_table("word_frequencies") + LOG.info("Computing word frequencies") + cur.execute("""CREATE TEMP TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id""") + cur.execute("CREATE INDEX ON word_frequencies(id)") + LOG.info("Update word table with recomputed frequencies") + cur.execute("""UPDATE word SET search_name_count = count + FROM word_frequencies + WHERE word_token like ' %' and word_id = id""") + cur.drop_table("word_frequencies") conn.commit() def name_analyzer(self): -- 2.39.5