1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
11 from pathlib import Path
13 from nominatim.db.connection import connect
15 # Do not repeat documentation of subcommand classes.
16 # pylint: disable=C0111
17 # Using non-top-level imports to avoid eventually unused imports.
18 # pylint: disable=E0012,C0415
20 LOG = logging.getLogger()
24 Recompute auxiliary data used by the indexing process.
26 This sub-commands updates various static data and functions in the database.
27 It usually needs to be run after changing various aspects of the
28 configuration. The configuration documentation will mention the exact
29 command to use in such case.
31 Warning: the 'update' command must not be run in parallel with other update
32 commands like 'replication' or 'add-data'.
39 group = parser.add_argument_group('Data arguments')
40 group.add_argument('--postcodes', action='store_true',
41 help='Update postcode centroid table')
42 group.add_argument('--word-tokens', action='store_true',
43 help='Clean up search terms')
44 group.add_argument('--word-counts', action='store_true',
45 help='Compute frequency of full-word search terms')
46 group.add_argument('--address-levels', action='store_true',
47 help='Reimport address level configuration')
48 group.add_argument('--functions', action='store_true',
49 help='Update the PL/pgSQL functions in the database')
50 group.add_argument('--wiki-data', action='store_true',
51 help='Update Wikipedia/data importance numbers')
52 group.add_argument('--importance', action='store_true',
53 help='Recompute place importances (expensive!)')
54 group.add_argument('--website', action='store_true',
55 help='Refresh the directory that serves the scripts for the web API')
56 group = parser.add_argument_group('Arguments for function refresh')
57 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
58 help='Do not enable code for propagating updates')
59 group.add_argument('--enable-debug-statements', action='store_true',
60 help='Enable debug warning statements in functions')
64 from ..tools import refresh, postcodes
65 from ..indexer.indexer import Indexer
69 if postcodes.can_compute(args.config.get_libpq_dsn()):
70 LOG.warning("Update postcodes centroid")
71 tokenizer = self._get_tokenizer(args.config)
72 postcodes.update_postcodes(args.config.get_libpq_dsn(),
73 args.project_dir, tokenizer)
74 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
76 indexer.index_postcodes()
78 LOG.error("The place table doesn't exist. "
79 "Postcode updates on a frozen database is not possible.")
82 LOG.warning('Updating word tokens')
83 tokenizer = self._get_tokenizer(args.config)
84 tokenizer.update_word_tokens()
87 LOG.warning('Recompute word statistics')
88 self._get_tokenizer(args.config).update_statistics()
90 if args.address_levels:
91 LOG.warning('Updating address levels')
92 with connect(args.config.get_libpq_dsn()) as conn:
93 refresh.load_address_levels_from_config(conn, args.config)
96 LOG.warning('Create functions')
97 with connect(args.config.get_libpq_dsn()) as conn:
98 refresh.create_functions(conn, args.config,
99 args.diffs, args.enable_debug_statements)
100 self._get_tokenizer(args.config).update_sql_functions(args.config)
103 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
105 LOG.warning('Import wikipdia article importance from %s', data_path)
106 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
108 LOG.fatal('FATAL: Wikipedia importance dump file not found')
111 # Attention: importance MUST come after wiki data import.
113 LOG.warning('Update importance values for database')
114 with connect(args.config.get_libpq_dsn()) as conn:
115 refresh.recompute_importance(conn)
118 webdir = args.project_dir / 'website'
119 LOG.warning('Setting up website directory at %s', webdir)
120 # This is a little bit hacky: call the tokenizer setup, so that
121 # the tokenizer directory gets repopulated as well, in case it
123 self._get_tokenizer(args.config)
124 with connect(args.config.get_libpq_dsn()) as conn:
125 refresh.setup_website(webdir, args.config, conn)
130 def _get_tokenizer(self, config):
131 if self.tokenizer is None:
132 from ..tokenizer import factory as tokenizer_factory
134 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
136 return self.tokenizer