]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/clicmd/refresh.py
icu: move token deduplication into TokenInfo
[nominatim.git] / nominatim / clicmd / refresh.py
index 9dca4e42e073db9317ba5d02578a93a9340261b9..b8a88b6d615b5b5c04445f393a71a81c1b6cc112 100644 (file)
@@ -1,10 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Implementation of 'refresh' subcommand.
 """
 import logging
 from pathlib import Path
 
 """
 Implementation of 'refresh' subcommand.
 """
 import logging
 from pathlib import Path
 
-from ..db.connection import connect
+from nominatim.db.connection import connect
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -17,14 +23,24 @@ class UpdateRefresh:
     """\
     Recompute auxiliary data used by the indexing process.
 
     """\
     Recompute auxiliary data used by the indexing process.
 
-    These functions must not be run in parallel with other update commands.
+    This sub-commands updates various static data and functions in the database.
+    It usually needs to be run after changing various aspects of the
+    configuration. The configuration documentation will mention the exact
+    command to use in such case.
+
+    Warning: the 'update' command must not be run in parallel with other update
+             commands like 'replication' or 'add-data'.
     """
     """
+    def __init__(self):
+        self.tokenizer = None
 
     @staticmethod
     def add_args(parser):
         group = parser.add_argument_group('Data arguments')
         group.add_argument('--postcodes', action='store_true',
                            help='Update postcode centroid table')
 
     @staticmethod
     def add_args(parser):
         group = parser.add_argument_group('Data arguments')
         group.add_argument('--postcodes', action='store_true',
                            help='Update postcode centroid table')
+        group.add_argument('--word-tokens', action='store_true',
+                           help='Clean up search terms')
         group.add_argument('--word-counts', action='store_true',
                            help='Compute frequency of full-word search terms')
         group.add_argument('--address-levels', action='store_true',
         group.add_argument('--word-counts', action='store_true',
                            help='Compute frequency of full-word search terms')
         group.add_argument('--address-levels', action='store_true',
@@ -32,7 +48,7 @@ class UpdateRefresh:
         group.add_argument('--functions', action='store_true',
                            help='Update the PL/pgSQL functions in the database')
         group.add_argument('--wiki-data', action='store_true',
         group.add_argument('--functions', action='store_true',
                            help='Update the PL/pgSQL functions in the database')
         group.add_argument('--wiki-data', action='store_true',
-                           help='Update Wikipedia/data importance numbers.')
+                           help='Update Wikipedia/data importance numbers')
         group.add_argument('--importance', action='store_true',
                            help='Recompute place importances (expensive!)')
         group.add_argument('--website', action='store_true',
         group.add_argument('--importance', action='store_true',
                            help='Recompute place importances (expensive!)')
         group.add_argument('--website', action='store_true',
@@ -43,29 +59,45 @@ class UpdateRefresh:
         group.add_argument('--enable-debug-statements', action='store_true',
                            help='Enable debug warning statements in functions')
 
         group.add_argument('--enable-debug-statements', action='store_true',
                            help='Enable debug warning statements in functions')
 
-    @staticmethod
-    def run(args):
-        from ..tools import refresh
+
+    def run(self, args):
+        from ..tools import refresh, postcodes
+        from ..indexer.indexer import Indexer
+
 
         if args.postcodes:
 
         if args.postcodes:
-            LOG.warning("Update postcodes centroid")
-            refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir)
+            if postcodes.can_compute(args.config.get_libpq_dsn()):
+                LOG.warning("Update postcodes centroid")
+                tokenizer = self._get_tokenizer(args.config)
+                postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                           args.project_dir, tokenizer)
+                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                                  args.threads or 1)
+                indexer.index_postcodes()
+            else:
+                LOG.error("The place table doesn't exist. "
+                          "Postcode updates on a frozen database is not possible.")
+
+        if args.word_tokens:
+            LOG.warning('Updating word tokens')
+            tokenizer = self._get_tokenizer(args.config)
+            tokenizer.update_word_tokens()
 
         if args.word_counts:
 
         if args.word_counts:
-            LOG.warning('Recompute frequency of full-word search terms')
-            refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir)
+            LOG.warning('Recompute word statistics')
+            self._get_tokenizer(args.config).update_statistics()
 
         if args.address_levels:
 
         if args.address_levels:
-            cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
-            LOG.warning('Updating address levels from %s', cfg)
+            LOG.warning('Updating address levels')
             with connect(args.config.get_libpq_dsn()) as conn:
             with connect(args.config.get_libpq_dsn()) as conn:
-                refresh.load_address_levels_from_file(conn, cfg)
+                refresh.load_address_levels_from_config(conn, args.config)
 
         if args.functions:
             LOG.warning('Create functions')
             with connect(args.config.get_libpq_dsn()) as conn:
 
         if args.functions:
             LOG.warning('Create functions')
             with connect(args.config.get_libpq_dsn()) as conn:
-                refresh.create_functions(conn, args.config, args.sqllib_dir,
+                refresh.create_functions(conn, args.config,
                                          args.diffs, args.enable_debug_statements)
                                          args.diffs, args.enable_debug_statements)
+                self._get_tokenizer(args.config).update_sql_functions(args.config)
 
         if args.wiki_data:
             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
 
         if args.wiki_data:
             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
@@ -85,6 +117,16 @@ class UpdateRefresh:
         if args.website:
             webdir = args.project_dir / 'website'
             LOG.warning('Setting up website directory at %s', webdir)
         if args.website:
             webdir = args.project_dir / 'website'
             LOG.warning('Setting up website directory at %s', webdir)
-            refresh.setup_website(webdir, args.phplib_dir, args.config)
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.setup_website(webdir, args.config, conn)
 
         return 0
 
         return 0
+
+
+    def _get_tokenizer(self, config):
+        if self.tokenizer is None:
+            from ..tokenizer import factory as tokenizer_factory
+
+            self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+        return self.tokenizer