]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/clicmd/refresh.py
reinit the tokenizer directory on website refresh
[nominatim.git] / nominatim / clicmd / refresh.py
index baff9b6acc0c38bb94cbe6c3f36dfdbba542316e..3c245cd46e84b385a39288f537bb50813f8f284f 100644 (file)
@@ -1,11 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Implementation of 'refresh' subcommand.
 """
 import logging
 from pathlib import Path
 
 """
 Implementation of 'refresh' subcommand.
 """
 import logging
 from pathlib import Path
 
-from ..db.connection import connect
-from ..tools.exec_utils import run_legacy_script
+from nominatim.db.connection import connect
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -18,14 +23,24 @@ class UpdateRefresh:
     """\
     Recompute auxiliary data used by the indexing process.
 
     """\
     Recompute auxiliary data used by the indexing process.
 
-    These functions must not be run in parallel with other update commands.
+    This sub-commands updates various static data and functions in the database.
+    It usually needs to be run after changing various aspects of the
+    configuration. The configuration documentation will mention the exact
+    command to use in such case.
+
+    Warning: the 'update' command must not be run in parallel with other update
+             commands like 'replication' or 'add-data'.
     """
     """
+    def __init__(self):
+        self.tokenizer = None
 
     @staticmethod
     def add_args(parser):
         group = parser.add_argument_group('Data arguments')
         group.add_argument('--postcodes', action='store_true',
                            help='Update postcode centroid table')
 
     @staticmethod
     def add_args(parser):
         group = parser.add_argument_group('Data arguments')
         group.add_argument('--postcodes', action='store_true',
                            help='Update postcode centroid table')
+        group.add_argument('--word-tokens', action='store_true',
+                           help='Clean up search terms')
         group.add_argument('--word-counts', action='store_true',
                            help='Compute frequency of full-word search terms')
         group.add_argument('--address-levels', action='store_true',
         group.add_argument('--word-counts', action='store_true',
                            help='Compute frequency of full-word search terms')
         group.add_argument('--address-levels', action='store_true',
@@ -33,7 +48,7 @@ class UpdateRefresh:
         group.add_argument('--functions', action='store_true',
                            help='Update the PL/pgSQL functions in the database')
         group.add_argument('--wiki-data', action='store_true',
         group.add_argument('--functions', action='store_true',
                            help='Update the PL/pgSQL functions in the database')
         group.add_argument('--wiki-data', action='store_true',
-                           help='Update Wikipedia/data importance numbers.')
+                           help='Update Wikipedia/data importance numbers')
         group.add_argument('--importance', action='store_true',
                            help='Recompute place importances (expensive!)')
         group.add_argument('--website', action='store_true',
         group.add_argument('--importance', action='store_true',
                            help='Recompute place importances (expensive!)')
         group.add_argument('--website', action='store_true',
@@ -44,45 +59,78 @@ class UpdateRefresh:
         group.add_argument('--enable-debug-statements', action='store_true',
                            help='Enable debug warning statements in functions')
 
         group.add_argument('--enable-debug-statements', action='store_true',
                            help='Enable debug warning statements in functions')
 
-    @staticmethod
-    def run(args):
-        from ..tools import refresh
+
+    def run(self, args):
+        from ..tools import refresh, postcodes
+        from ..indexer.indexer import Indexer
+
 
         if args.postcodes:
 
         if args.postcodes:
-            LOG.warning("Update postcodes centroid")
-            conn = connect(args.config.get_libpq_dsn())
-            refresh.update_postcodes(conn, args.sqllib_dir)
-            conn.close()
+            if postcodes.can_compute(args.config.get_libpq_dsn()):
+                LOG.warning("Update postcodes centroid")
+                tokenizer = self._get_tokenizer(args.config)
+                postcodes.update_postcodes(args.config.get_libpq_dsn(),
+                                           args.project_dir, tokenizer)
+                indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
+                                  args.threads or 1)
+                indexer.index_postcodes()
+            else:
+                LOG.error("The place table doesn't exist. "
+                          "Postcode updates on a frozen database is not possible.")
+
+        if args.word_tokens:
+            LOG.warning('Updating word tokens')
+            tokenizer = self._get_tokenizer(args.config)
+            tokenizer.update_word_tokens()
 
         if args.word_counts:
 
         if args.word_counts:
-            LOG.warning('Recompute frequency of full-word search terms')
-            conn = connect(args.config.get_libpq_dsn())
-            refresh.recompute_word_counts(conn, args.data_dir)
-            conn.close()
+            LOG.warning('Recompute word statistics')
+            self._get_tokenizer(args.config).update_statistics()
 
         if args.address_levels:
 
         if args.address_levels:
-            cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
-            LOG.warning('Updating address levels from %s', cfg)
-            conn = connect(args.config.get_libpq_dsn())
-            refresh.load_address_levels_from_file(conn, cfg)
-            conn.close()
+            LOG.warning('Updating address levels')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.load_address_levels_from_config(conn, args.config)
 
         if args.functions:
             LOG.warning('Create functions')
 
         if args.functions:
             LOG.warning('Create functions')
-            conn = connect(args.config.get_libpq_dsn())
-            refresh.create_functions(conn, args.config, args.sqllib_dir,
-                                     args.diffs, args.enable_debug_statements)
-            conn.close()
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.create_functions(conn, args.config,
+                                         args.diffs, args.enable_debug_statements)
+                self._get_tokenizer(args.config).update_sql_functions(args.config)
 
         if args.wiki_data:
 
         if args.wiki_data:
-            run_legacy_script('setup.php', '--import-wikipedia-articles',
-                              nominatim_env=args, throw_on_fail=True)
+            data_path = Path(args.config.WIKIPEDIA_DATA_PATH
+                             or args.project_dir)
+            LOG.warning('Import wikipdia article importance from %s', data_path)
+            if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                 data_path) > 0:
+                LOG.fatal('FATAL: Wikipedia importance dump file not found')
+                return 1
+
         # Attention: importance MUST come after wiki data import.
         if args.importance:
         # Attention: importance MUST come after wiki data import.
         if args.importance:
-            run_legacy_script('update.php', '--recompute-importance',
-                              nominatim_env=args, throw_on_fail=True)
+            LOG.warning('Update importance values for database')
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.recompute_importance(conn)
+
         if args.website:
         if args.website:
-            run_legacy_script('setup.php', '--setup-website',
-                              nominatim_env=args, throw_on_fail=True)
+            webdir = args.project_dir / 'website'
+            LOG.warning('Setting up website directory at %s', webdir)
+            # This is a little bit hacky: call the tokenizer setup, so that
+            # the tokenizer directory gets repopulated as well, in case it
+            # wasn't there yet.
+            self._get_tokenizer(args.config)
+            with connect(args.config.get_libpq_dsn()) as conn:
+                refresh.setup_website(webdir, args.config, conn)
 
         return 0
 
         return 0
+
+
+    def _get_tokenizer(self, config):
+        if self.tokenizer is None:
+            from ..tokenizer import factory as tokenizer_factory
+
+            self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
+
+        return self.tokenizer