1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of 'refresh' subcommand.
10 from typing import Tuple, Optional
13 from pathlib import Path
16 from ..config import Configuration
17 from ..db.connection import connect, table_exists
18 from ..tokenizer.base import AbstractTokenizer
19 from .args import NominatimArgs
22 LOG = logging.getLogger()
25 def _parse_osm_object(obj: str) -> Tuple[str, int]:
26 """ Parse the given argument into a tuple of OSM type and ID.
27 Raises an ArgumentError if the format is not recognized.
29 if len(obj) < 2 or obj[0].lower() not in 'nrw' or not obj[1:].isdigit():
30 raise argparse.ArgumentTypeError("Cannot parse OSM ID. Expect format: [N|W|R]<id>.")
32 return (obj[0].upper(), int(obj[1:]))
37 Recompute auxiliary data used by the indexing process.
39 This sub-commands updates various static data and functions in the database.
40 It usually needs to be run after changing various aspects of the
41 configuration. The configuration documentation will mention the exact
42 command to use in such case.
44 Warning: the 'update' command must not be run in parallel with other update
45 commands like 'replication' or 'add-data'.
47 def __init__(self) -> None:
48 self.tokenizer: Optional[AbstractTokenizer] = None
50 def add_args(self, parser: argparse.ArgumentParser) -> None:
51 group = parser.add_argument_group('Data arguments')
52 group.add_argument('--postcodes', action='store_true',
53 help='Update postcode centroid table')
54 group.add_argument('--word-tokens', action='store_true',
55 help='Clean up search terms')
56 group.add_argument('--word-counts', action='store_true',
57 help='Compute frequency of full-word search terms')
58 group.add_argument('--address-levels', action='store_true',
59 help='Reimport address level configuration')
60 group.add_argument('--functions', action='store_true',
61 help='Update the PL/pgSQL functions in the database')
62 group.add_argument('--wiki-data', action='store_true',
63 help='Update Wikipedia/data importance numbers')
64 group.add_argument('--secondary-importance', action='store_true',
65 help='Update secondary importance raster data')
66 group.add_argument('--importance', action='store_true',
67 help='Recompute place importances (expensive!)')
68 group.add_argument('--website', action='store_true',
69 help='DEPRECATED. This function has no function anymore'
70 ' and will be removed in a future version.')
71 group.add_argument('--data-object', action='append',
72 type=_parse_osm_object, metavar='OBJECT',
73 help='Mark the given OSM object as requiring an update'
74 ' (format: [NWR]<id>)')
75 group.add_argument('--data-area', action='append',
76 type=_parse_osm_object, metavar='OBJECT',
77 help='Mark the area around the given OSM object as requiring an update'
78 ' (format: [NWR]<id>)')
80 group = parser.add_argument_group('Arguments for function refresh')
81 group.add_argument('--no-diff-updates', action='store_false', dest='diffs',
82 help='Do not enable code for propagating updates')
83 group.add_argument('--enable-debug-statements', action='store_true',
84 help='Enable debug warning statements in functions')
86 def run(self, args: NominatimArgs) -> int:
87 from ..tools import refresh, postcodes
88 from ..indexer.indexer import Indexer
90 need_function_refresh = args.functions
93 if postcodes.can_compute(args.config.get_libpq_dsn()):
94 LOG.warning("Update postcodes centroid")
95 tokenizer = self._get_tokenizer(args.config)
96 postcodes.update_postcodes(args.config.get_libpq_dsn(),
97 args.project_dir, tokenizer)
98 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
100 asyncio.run(indexer.index_postcodes())
102 LOG.error("The place table doesn't exist. "
103 "Postcode updates on a frozen database is not possible.")
106 LOG.warning('Updating word tokens')
107 tokenizer = self._get_tokenizer(args.config)
108 tokenizer.update_word_tokens()
111 LOG.warning('Recompute word statistics')
112 self._get_tokenizer(args.config).update_statistics(args.config,
113 threads=args.threads or 1)
115 if args.address_levels:
116 LOG.warning('Updating address levels')
117 with connect(args.config.get_libpq_dsn()) as conn:
118 refresh.load_address_levels_from_config(conn, args.config)
120 # Attention: must come BEFORE functions
121 if args.secondary_importance:
122 with connect(args.config.get_libpq_dsn()) as conn:
123 # If the table did not exist before, then the importance code
124 # needs to be enabled.
125 if not table_exists(conn, 'secondary_importance'):
126 args.functions = True
128 LOG.warning('Import secondary importance raster data from %s', args.project_dir)
129 if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
130 args.project_dir) > 0:
131 LOG.fatal('FATAL: Cannot update secondary importance raster data')
133 need_function_refresh = True
136 data_path = Path(args.config.WIKIPEDIA_DATA_PATH
138 LOG.warning('Import wikipedia article importance from %s', data_path)
139 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
141 LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
143 need_function_refresh = True
145 if need_function_refresh:
146 LOG.warning('Create functions')
147 with connect(args.config.get_libpq_dsn()) as conn:
148 refresh.create_functions(conn, args.config,
149 args.diffs, args.enable_debug_statements)
150 self._get_tokenizer(args.config).update_sql_functions(args.config)
152 # Attention: importance MUST come after wiki data import and after functions.
154 LOG.warning('Update importance values for database')
155 with connect(args.config.get_libpq_dsn()) as conn:
156 refresh.recompute_importance(conn)
159 LOG.error('WARNING: Website setup is no longer required. '
160 'This function will be removed in future version of Nominatim.')
162 if args.data_object or args.data_area:
163 with connect(args.config.get_libpq_dsn()) as conn:
164 for obj in args.data_object or []:
165 refresh.invalidate_osm_object(*obj, conn, recursive=False)
166 for obj in args.data_area or []:
167 refresh.invalidate_osm_object(*obj, conn, recursive=True)
172 def _get_tokenizer(self, config: Configuration) -> AbstractTokenizer:
173 if self.tokenizer is None:
174 from ..tokenizer import factory as tokenizer_factory
176 self.tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
178 return self.tokenizer