1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Implementation of the 'import' subcommand.
10 from typing import Optional
13 from pathlib import Path
17 from nominatim_core.errors import UsageError
18 from nominatim_core.config import Configuration
19 from nominatim_core.db.connection import connect
20 from nominatim_core.db import status, properties
21 from ..tokenizer.base import AbstractTokenizer
22 from ..version import NOMINATIM_VERSION
23 from .args import NominatimArgs
25 # Do not repeat documentation of subcommand classes.
26 # pylint: disable=C0111
27 # Using non-top-level imports to avoid eventually unused imports.
28 # pylint: disable=C0415
30 LOG = logging.getLogger()
34 Create a new Nominatim database from an OSM file.
36 This sub-command sets up a new Nominatim database from scratch starting
37 with creating a new database in Postgresql. The user running this command
38 needs superuser rights on the database.
41 def add_args(self, parser: argparse.ArgumentParser) -> None:
42 group1 = parser.add_argument_group('Required arguments')
43 group1.add_argument('--osm-file', metavar='FILE', action='append',
44 help='OSM file to be imported'
45 ' (repeat for importing multiple files)',
47 group1.add_argument('--continue', dest='continue_at',
48 choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
49 help='Continue an import that was interrupted',
51 group2 = parser.add_argument_group('Optional arguments')
52 group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
53 help='Size of cache to be used by osm2pgsql (in MB)')
54 group2.add_argument('--reverse-only', action='store_true',
55 help='Do not create tables and indexes for searching')
56 group2.add_argument('--no-partitions', action='store_true',
57 help=("Do not partition search indices "
58 "(speeds up import of single country extracts)"))
59 group2.add_argument('--no-updates', action='store_true',
60 help="Do not keep tables that are only needed for "
61 "updating the database later")
62 group2.add_argument('--offline', action='store_true',
63 help="Do not attempt to load any additional data from the internet")
64 group3 = parser.add_argument_group('Expert options')
65 group3.add_argument('--ignore-errors', action='store_true',
66 help='Continue import even when errors in SQL are present')
67 group3.add_argument('--index-noanalyse', action='store_true',
68 help='Do not perform analyse operations during index (expert only)')
69 group3.add_argument('--prepare-database', action='store_true',
70 help='Create the database but do not import any data')
73 def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
74 from ..data import country_info
75 from ..tools import database_import, refresh, postcodes, freeze
76 from ..indexer.indexer import Indexer
78 num_threads = args.threads or psutil.cpu_count() or 1
80 country_info.setup_country_config(args.config)
82 if args.osm_file is None and args.continue_at is None and not args.prepare_database:
83 raise UsageError("No input files (use --osm-file).")
85 if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
86 raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
88 if args.continue_at is not None and args.prepare_database:
90 "Cannot use --continue and --prepare-database together."
94 if args.prepare_database or args.continue_at is None:
95 LOG.warning('Creating database')
96 database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
97 rouser=args.config.DATABASE_WEBUSER)
98 if args.prepare_database:
101 if args.continue_at in (None, 'import-from-file'):
102 files = args.get_osm_file_list()
104 raise UsageError("No input files (use --osm-file).")
106 if args.continue_at in ('import-from-file', None):
107 # Check if the correct plugins are installed
108 database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
109 LOG.warning('Setting up country tables')
110 country_info.setup_country_tables(args.config.get_libpq_dsn(),
111 args.config.lib_dir.data,
114 LOG.warning('Importing OSM data file')
115 database_import.import_osm_data(files,
116 args.osm2pgsql_options(0, 1),
117 drop=args.no_updates,
118 ignore_errors=args.ignore_errors)
120 LOG.warning('Importing wikipedia importance data')
121 data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
122 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
124 LOG.error('Wikipedia importance dump file not found. '
125 'Calculating importance values of locations will not '
126 'use Wikipedia importance data.')
128 LOG.warning('Importing secondary importance raster data')
129 if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
130 args.project_dir) != 0:
131 LOG.error('Secondary importance file not imported. '
132 'Falling back to default ranking.')
134 self._setup_tables(args.config, args.reverse_only)
136 if args.continue_at in ('import-from-file', 'load-data', None):
137 LOG.warning('Initialise tables')
138 with connect(args.config.get_libpq_dsn()) as conn:
139 database_import.truncate_data_tables(conn)
141 LOG.warning('Load data into placex table')
142 database_import.load_data(args.config.get_libpq_dsn(), num_threads)
144 LOG.warning("Setting up tokenizer")
145 tokenizer = self._get_tokenizer(args.continue_at, args.config)
147 if args.continue_at in ('import-from-file', 'load-data', None):
148 LOG.warning('Calculate postcodes')
149 postcodes.update_postcodes(args.config.get_libpq_dsn(),
150 args.project_dir, tokenizer)
152 if args.continue_at in \
153 ('import-from-file', 'load-data', 'indexing', None):
154 LOG.warning('Indexing places')
155 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
156 indexer.index_full(analyse=not args.index_noanalyse)
158 LOG.warning('Post-process tables')
159 with connect(args.config.get_libpq_dsn()) as conn:
160 database_import.create_search_indices(conn, args.config,
161 drop=args.no_updates,
163 LOG.warning('Create search index for default country names.')
164 country_info.create_country_names(conn, tokenizer,
165 args.config.get_str_list('LANGUAGES'))
167 freeze.drop_update_tables(conn)
168 tokenizer.finalize_import(args.config)
170 LOG.warning('Recompute word counts')
171 tokenizer.update_statistics(args.config, threads=num_threads)
173 webdir = args.project_dir / 'website'
174 LOG.warning('Setup website at %s', webdir)
175 with connect(args.config.get_libpq_dsn()) as conn:
176 refresh.setup_website(webdir, args.config, conn)
178 self._finalize_database(args.config.get_libpq_dsn(), args.offline)
183 def _setup_tables(self, config: Configuration, reverse_only: bool) -> None:
184 """ Set up the basic database layout: tables, indexes and functions.
186 from ..tools import database_import, refresh
188 with connect(config.get_libpq_dsn()) as conn:
189 LOG.warning('Create functions (1st pass)')
190 refresh.create_functions(conn, config, False, False)
191 LOG.warning('Create tables')
192 database_import.create_tables(conn, config, reverse_only=reverse_only)
193 refresh.load_address_levels_from_config(conn, config)
194 LOG.warning('Create functions (2nd pass)')
195 refresh.create_functions(conn, config, False, False)
196 LOG.warning('Create table triggers')
197 database_import.create_table_triggers(conn, config)
198 LOG.warning('Create partition tables')
199 database_import.create_partition_tables(conn, config)
200 LOG.warning('Create functions (3rd pass)')
201 refresh.create_functions(conn, config, False, False)
204 def _get_tokenizer(self, continue_at: Optional[str],
205 config: Configuration) -> AbstractTokenizer:
206 """ Set up a new tokenizer or load an already initialised one.
208 from ..tokenizer import factory as tokenizer_factory
210 if continue_at in ('import-from-file', 'load-data', None):
211 # (re)initialise the tokenizer data
212 return tokenizer_factory.create_tokenizer(config)
214 # just load the tokenizer
215 return tokenizer_factory.get_tokenizer_for_db(config)
218 def _finalize_database(self, dsn: str, offline: bool) -> None:
219 """ Determine the database date and set the status accordingly.
221 with connect(dsn) as conn:
222 properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
225 dbdate = status.compute_database_date(conn, offline)
226 status.set_status(conn, dbdate)
227 LOG.info('Database is at %s.', dbdate)
228 except Exception as exc: # pylint: disable=broad-except
229 LOG.error('Cannot determine date of database: %s', exc)