1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
10 from typing import Tuple, Optional
13 from pathlib import Path
15 from nominatim.config import Configuration
16 from nominatim.db.connection import connect
17 from nominatim.tokenizer.base import AbstractTokenizer
18 from nominatim.clicmd.args import NominatimArgs
20 # Do not repeat documentation of subcommand classes.
21 # pylint: disable=C0111
22 # Using non-top-level imports to avoid eventually unused imports.
23 # pylint: disable=E0012,C0415
25 LOG = logging.getLogger()
27 def _parse_osm_object(obj: str) -> Tuple[str, int]:
28 """ Parse the given argument into a tuple of OSM type and ID.
29 Raises an ArgumentError if the format is not recognized.
31 if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
32 raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
34 return (obj[0].upper(), int(obj[1:]))
39 Recompute auxiliary data used by the indexing process.
41 This sub-commands updates various static data and functions in the database.
42 It usually needs to be run after changing various aspects of the
43 configuration. The configuration documentation will mention the exact
44 command to use in such case.
46 Warning: the 'update' command must not be run in parallel with other update
47 commands like 'replication' or 'add-data'.
49 def __init__(self) -> None:
50 self.tokenizer: Optional[AbstractTokenizer] = None
52 def add_args(self, parser: argparse.ArgumentParser) -> None:
53 group = parser.add_argument_group('Data arguments')
54 group.add_argument('--postcodes', action='store_true',
55 help='Update postcode centroid table')
56 group.add_argument('--word-tokens', action='store_true',
57 help='Clean up search terms')
58 group.add_argument('--word-counts', action='store_true',
59 help='Compute frequency of full-word search terms')
60 group.add_argument('--address-levels', action='store_true',
61 help='Reimport address level configuration')
62 group.add_argument('--functions', action='store_true',
63 help='Update the PL/pgSQL functions in the database')
64 group.add_argument('--wiki-data', action='store_true',
65 help='Update Wikipedia/data importance numbers')
66 group.add_argument('--osm-views', action='store_true',
67 help='Update OSM views/data importance numbers')
68 group.add_argument('--importance', action='store_true',
69 help='Recompute place importances (expensive!)')
70 group.add_argument('--website', action='store_true',
71 help='Refresh the directory that serves the scripts for the web API')
72 group.add_argument('--data-object', action='append',
73 type=_parse_osm_object, metavar='OBJECT',
74 help='Mark the given OSM object as requiring an update'
75 ' (format: [NWR]<id>)')
76 group.add_argument('--data-area', action='append',
77 type=_parse_osm_object, metavar='OBJECT',
78 help='Mark the area around the given OSM object as requiring an update'
79 ' (format: [NWR]<id>)')
81 group = parser.add_argument_group('Arguments for function refresh')
82 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
83 help='Do not enable code for propagating updates')
84 group.add_argument('--enable-debug-statements', action='store_true',
85 help='Enable debug warning statements in functions')
88 def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
89 from ..tools import refresh, postcodes
90 from ..indexer.indexer import Indexer
94 if postcodes.can_compute(args.config.get_libpq_dsn()):
95 LOG.warning("Update postcodes centroid")
96 tokenizer = self._get_tokenizer(args.config)
97 postcodes.update_postcodes(args.config.get_libpq_dsn(),
98 args.project_dir, tokenizer)
99 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
101 indexer.index_postcodes()
103 LOG.error("The place table doesn't exist. "
104 "Postcode updates on a frozen database is not possible.")
107 LOG.warning('Updating word tokens')
108 tokenizer = self._get_tokenizer(args.config)
109 tokenizer.update_word_tokens()
112 LOG.warning('Recompute word statistics')
113 self._get_tokenizer(args.config).update_statistics()
115 if args.address_levels:
116 LOG.warning('Updating address levels')
117 with connect(args.config.get_libpq_dsn()) as conn:
118 refresh.load_address_levels_from_config(conn, args.config)
121 LOG.warning('Create functions')
122 with connect(args.config.get_libpq_dsn()) as conn:
123 refresh.create_functions(conn, args.config,
124 args.diffs, args.enable_debug_statements)
125 self._get_tokenizer(args.config).update_sql_functions(args.config)
128 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
130 LOG.warning('Import wikipdia article importance from %s', data_path)
131 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
133 LOG.fatal('FATAL: Wikipedia importance dump file not found')
137 data_path = Path(args.project_dir)
138 LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
139 num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
141 LOG.fatal('FATAL: OSM views GeoTIFF file not found')
144 LOG.fatal('FATAL: PostGIS version number is less than 3')
147 # Attention: importance MUST come after wiki data import.
149 LOG.warning('Update importance values for database')
150 with connect(args.config.get_libpq_dsn()) as conn:
151 refresh.recompute_importance(conn)
154 webdir = args.project_dir / 'website'
155 LOG.warning('Setting up website directory at %s', webdir)
156 # This is a little bit hacky: call the tokenizer setup, so that
157 # the tokenizer directory gets repopulated as well, in case it
159 self._get_tokenizer(args.config)
160 with connect(args.config.get_libpq_dsn()) as conn:
161 refresh.setup_website(webdir, args.config, conn)
163 if args.data_object or args.data_area:
164 with connect(args.config.get_libpq_dsn()) as conn:
165 for obj in args.data_object or []:
166 refresh.invalidate_osm_object(*obj, conn, recursive=False)
167 for obj in args.data_area or []:
168 refresh.invalidate_osm_object(*obj, conn, recursive=True)
174 def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
175 if self.tokenizer is None:
176 from ..tokenizer import factory as tokenizer_factory
178 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
180 return self.tokenizer