1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
10 from argparse import ArgumentTypeError
12 from pathlib import Path
14 from nominatim.db.connection import connect
16 # Do not repeat documentation of subcommand classes.
17 # pylint: disable=C0111
18 # Using non-top-level imports to avoid eventually unused imports.
19 # pylint: disable=E0012,C0415
21 LOG = logging.getLogger()
23 def _parse_osm_object(obj):
24 """ Parse the given argument into a tuple of OSM type and ID.
25 Raises an ArgumentError if the format is not recognized.
27 if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
28 raise ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
30 return (obj[0].upper(), int(obj[1:]))
35 Recompute auxiliary data used by the indexing process.
37 This sub-commands updates various static data and functions in the database.
38 It usually needs to be run after changing various aspects of the
39 configuration. The configuration documentation will mention the exact
40 command to use in such case.
42 Warning: the 'update' command must not be run in parallel with other update
43 commands like 'replication' or 'add-data'.
50 group = parser.add_argument_group('Data arguments')
51 group.add_argument('--postcodes', action='store_true',
52 help='Update postcode centroid table')
53 group.add_argument('--word-tokens', action='store_true',
54 help='Clean up search terms')
55 group.add_argument('--word-counts', action='store_true',
56 help='Compute frequency of full-word search terms')
57 group.add_argument('--address-levels', action='store_true',
58 help='Reimport address level configuration')
59 group.add_argument('--functions', action='store_true',
60 help='Update the PL/pgSQL functions in the database')
61 group.add_argument('--wiki-data', action='store_true',
62 help='Update Wikipedia/data importance numbers')
63 group.add_argument('--importance', action='store_true',
64 help='Recompute place importances (expensive!)')
65 group.add_argument('--website', action='store_true',
66 help='Refresh the directory that serves the scripts for the web API')
67 group.add_argument('--data-object', action='append',
68 type=_parse_osm_object, metavar='OBJECT',
69 help='Mark the given OSM object as requiring an update'
70 ' (format: [NWR]<id>)')
71 group.add_argument('--data-area', action='append',
72 type=_parse_osm_object, metavar='OBJECT',
73 help='Mark the area around the given OSM object as requiring an update'
74 ' (format: [NWR]<id>)')
76 group = parser.add_argument_group('Arguments for function refresh')
77 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
78 help='Do not enable code for propagating updates')
79 group.add_argument('--enable-debug-statements', action='store_true',
80 help='Enable debug warning statements in functions')
83 def run(self, args): #pylint: disable=too-many-branches
84 from ..tools import refresh, postcodes
85 from ..indexer.indexer import Indexer
89 if postcodes.can_compute(args.config.get_libpq_dsn()):
90 LOG.warning("Update postcodes centroid")
91 tokenizer = self._get_tokenizer(args.config)
92 postcodes.update_postcodes(args.config.get_libpq_dsn(),
93 args.project_dir, tokenizer)
94 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
96 indexer.index_postcodes()
98 LOG.error("The place table doesn't exist. "
99 "Postcode updates on a frozen database is not possible.")
102 LOG.warning('Updating word tokens')
103 tokenizer = self._get_tokenizer(args.config)
104 tokenizer.update_word_tokens()
107 LOG.warning('Recompute word statistics')
108 self._get_tokenizer(args.config).update_statistics()
110 if args.address_levels:
111 LOG.warning('Updating address levels')
112 with connect(args.config.get_libpq_dsn()) as conn:
113 refresh.load_address_levels_from_config(conn, args.config)
116 LOG.warning('Create functions')
117 with connect(args.config.get_libpq_dsn()) as conn:
118 refresh.create_functions(conn, args.config,
119 args.diffs, args.enable_debug_statements)
120 self._get_tokenizer(args.config).update_sql_functions(args.config)
123 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
125 LOG.warning('Import wikipdia article importance from %s', data_path)
126 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
128 LOG.fatal('FATAL: Wikipedia importance dump file not found')
131 # Attention: importance MUST come after wiki data import.
133 LOG.warning('Update importance values for database')
134 with connect(args.config.get_libpq_dsn()) as conn:
135 refresh.recompute_importance(conn)
138 webdir = args.project_dir / 'website'
139 LOG.warning('Setting up website directory at %s', webdir)
140 # This is a little bit hacky: call the tokenizer setup, so that
141 # the tokenizer directory gets repopulated as well, in case it
143 self._get_tokenizer(args.config)
144 with connect(args.config.get_libpq_dsn()) as conn:
145 refresh.setup_website(webdir, args.config, conn)
147 if args.data_object or args.data_area:
148 with connect(args.config.get_libpq_dsn()) as conn:
149 for obj in args.data_object or []:
150 refresh.invalidate_osm_object(*obj, conn, recursive=False)
151 for obj in args.data_area or []:
152 refresh.invalidate_osm_object(*obj, conn, recursive=True)
158 def _get_tokenizer(self, config):
159 if self.tokenizer is None:
160 from ..tokenizer import factory as tokenizer_factory
162 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
164 return self.tokenizer