X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/be65c8303f18d0f92bbf5bc9558f8789d33f21d9..ec7184c53315711b02ac66a05cf04a618e1d3ee3:/nominatim/tokenizer/legacy_tokenizer.py

diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py
index dc6972dc..d901a68d 100644
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -113,7 +113,7 @@ class LegacyTokenizer(AbstractTokenizer):
             self._init_db_tables(config)
 
 
-    def init_from_project(self):
+    def init_from_project(self, _):
         """ Initialise the tokenizer from the project directory.
         """
         with connect(self.dsn) as conn:
@@ -142,7 +142,7 @@ class LegacyTokenizer(AbstractTokenizer):
                               modulepath=modulepath)
 
 
-    def check_database(self):
+    def check_database(self, _):
         """ Check that the tokenizer is set up correctly.
         """
         hint = """\
@@ -186,6 +186,24 @@ class LegacyTokenizer(AbstractTokenizer):
             self._save_config(conn, config)
 
 
+    def update_statistics(self):
+        """ Recompute the frequency of full words.
+        """
+        with connect(self.dsn) as conn:
+            with conn.cursor() as cur:
+                cur.drop_table("word_frequencies")
+                LOG.info("Computing word frequencies")
+                cur.execute("""CREATE TEMP TABLE word_frequencies AS
+                                 SELECT unnest(name_vector) as id, count(*)
+                                 FROM search_name GROUP BY id""")
+                cur.execute("CREATE INDEX ON word_frequencies(id)")
+                LOG.info("Update word table with recomputed frequencies")
+                cur.execute("""UPDATE word SET search_name_count = count
+                               FROM word_frequencies
+                               WHERE word_token like ' %' and word_id = id""")
+                cur.drop_table("word_frequencies")
+            conn.commit()
+
     def name_analyzer(self):
         """ Create a new analyzer for tokenizing names and queries
             using this tokinzer. Analyzers are context managers and should