From: Sarah Hoffmann Date: Tue, 26 Oct 2021 09:42:42 +0000 (+0200) Subject: do not count words when in reverse-only mode X-Git-Tag: v4.0.0~6^2~1 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/53dbe58ada3fb34534fa2a1d079c2cbbbe09496c?ds=sidebyside do not count words when in reverse-only mode --- diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index e7ee57ad..3331a321 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -99,18 +99,19 @@ class LegacyICUTokenizer(AbstractTokenizer): """ Recompute frequencies for all name words. """ with connect(self.dsn) as conn: - with conn.cursor() as cur: - cur.drop_table("word_frequencies") - LOG.info("Computing word frequencies") - cur.execute("""CREATE TEMP TABLE word_frequencies AS - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute("CREATE INDEX ON word_frequencies(id)") - LOG.info("Update word table with recomputed frequencies") - cur.execute("""UPDATE word - SET info = info || jsonb_build_object('count', count) - FROM word_frequencies WHERE word_id = id""") - cur.drop_table("word_frequencies") + if conn.table_exists('search_name'): + with conn.cursor() as cur: + cur.drop_table("word_frequencies") + LOG.info("Computing word frequencies") + cur.execute("""CREATE TEMP TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id""") + cur.execute("CREATE INDEX ON word_frequencies(id)") + LOG.info("Update word table with recomputed frequencies") + cur.execute("""UPDATE word + SET info = info || jsonb_build_object('count', count) + FROM word_frequencies WHERE word_id = id""") + cur.drop_table("word_frequencies") conn.commit() diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index d901a68d..0edcdcca 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -190,18 +190,19 @@ class LegacyTokenizer(AbstractTokenizer): """ Recompute the frequency of full words. """ with connect(self.dsn) as conn: - with conn.cursor() as cur: - cur.drop_table("word_frequencies") - LOG.info("Computing word frequencies") - cur.execute("""CREATE TEMP TABLE word_frequencies AS - SELECT unnest(name_vector) as id, count(*) - FROM search_name GROUP BY id""") - cur.execute("CREATE INDEX ON word_frequencies(id)") - LOG.info("Update word table with recomputed frequencies") - cur.execute("""UPDATE word SET search_name_count = count - FROM word_frequencies - WHERE word_token like ' %' and word_id = id""") - cur.drop_table("word_frequencies") + if conn.table_exists('search_name'): + with conn.cursor() as cur: + cur.drop_table("word_frequencies") + LOG.info("Computing word frequencies") + cur.execute("""CREATE TEMP TABLE word_frequencies AS + SELECT unnest(name_vector) as id, count(*) + FROM search_name GROUP BY id""") + cur.execute("CREATE INDEX ON word_frequencies(id)") + LOG.info("Update word table with recomputed frequencies") + cur.execute("""UPDATE word SET search_name_count = count + FROM word_frequencies + WHERE word_token like ' %' and word_id = id""") + cur.drop_table("word_frequencies") conn.commit() def name_analyzer(self):