2 Implementation of the 'import' subcommand.
5 from pathlib import Path
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
18 LOG = logging.getLogger()
22 Create a new Nominatim database from an OSM file.
27 group_name = parser.add_argument_group('Required arguments')
28 group = group_name.add_mutually_exclusive_group(required=True)
29 group.add_argument('--osm-file', metavar='FILE', action='append',
30 help='OSM file to be imported'
31 ' (repeat for importing multiple files.')
32 group.add_argument('--continue', dest='continue_at',
33 choices=['load-data', 'indexing', 'db-postprocess'],
34 help='Continue an import that was interrupted')
35 group = parser.add_argument_group('Optional arguments')
36 group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
37 help='Size of cache to be used by osm2pgsql (in MB)')
38 group.add_argument('--reverse-only', action='store_true',
39 help='Do not create tables and indexes for searching')
40 group.add_argument('--no-partitions', action='store_true',
41 help=("Do not partition search indices "
42 "(speeds up import of single country extracts)"))
43 group.add_argument('--no-updates', action='store_true',
44 help="Do not keep tables that are only needed for "
45 "updating the database later")
46 group = parser.add_argument_group('Expert options')
47 group.add_argument('--ignore-errors', action='store_true',
48 help='Continue import even when errors in SQL are present')
49 group.add_argument('--index-noanalyse', action='store_true',
50 help='Do not perform analyse operations during index')
55 from ..tools import database_import, refresh, postcodes, freeze, country_info
56 from ..indexer.indexer import Indexer
58 country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml')
60 if args.continue_at is None:
61 files = args.get_osm_file_list()
63 LOG.warning('Creating database')
64 database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
65 rouser=args.config.DATABASE_WEBUSER)
67 LOG.warning('Setting up country tables')
68 country_info.setup_country_tables(args.config.get_libpq_dsn(),
72 LOG.warning('Importing OSM data file')
73 database_import.import_osm_data(files,
74 args.osm2pgsql_options(0, 1),
76 ignore_errors=args.ignore_errors)
78 SetupAll._setup_tables(args.config, args.reverse_only)
80 LOG.warning('Importing wikipedia importance data')
81 data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
82 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
84 LOG.error('Wikipedia importance dump file not found. '
85 'Will be using default importances.')
87 if args.continue_at is None or args.continue_at == 'load-data':
88 LOG.warning('Initialise tables')
89 with connect(args.config.get_libpq_dsn()) as conn:
90 database_import.truncate_data_tables(conn)
92 LOG.warning('Load data into placex table')
93 database_import.load_data(args.config.get_libpq_dsn(),
94 args.threads or psutil.cpu_count() or 1)
96 LOG.warning("Setting up tokenizer")
97 tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
99 if args.continue_at is None or args.continue_at == 'load-data':
100 LOG.warning('Calculate postcodes')
101 postcodes.update_postcodes(args.config.get_libpq_dsn(),
102 args.project_dir, tokenizer)
104 if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
105 if args.continue_at is not None and args.continue_at != 'load-data':
106 with connect(args.config.get_libpq_dsn()) as conn:
107 SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
108 LOG.warning('Indexing places')
109 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
110 args.threads or psutil.cpu_count() or 1)
111 indexer.index_full(analyse=not args.index_noanalyse)
113 LOG.warning('Post-process tables')
114 with connect(args.config.get_libpq_dsn()) as conn:
115 database_import.create_search_indices(conn, args.config,
116 drop=args.no_updates)
117 LOG.warning('Create search index for default country names.')
118 country_info.create_country_names(conn, tokenizer,
119 args.config.LANGUAGES)
122 freeze.drop_update_tables(conn)
123 tokenizer.finalize_import(args.config)
126 webdir = args.project_dir / 'website'
127 LOG.warning('Setup website at %s', webdir)
128 with connect(args.config.get_libpq_dsn()) as conn:
129 refresh.setup_website(webdir, args.config, conn)
131 with connect(args.config.get_libpq_dsn()) as conn:
132 SetupAll._set_database_date(conn)
133 properties.set_property(conn, 'database_version',
134 '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
140 def _setup_tables(config, reverse_only):
141 """ Set up the basic database layout: tables, indexes and functions.
143 from ..tools import database_import, refresh
145 with connect(config.get_libpq_dsn()) as conn:
146 LOG.warning('Create functions (1st pass)')
147 refresh.create_functions(conn, config, False, False)
148 LOG.warning('Create tables')
149 database_import.create_tables(conn, config, reverse_only=reverse_only)
150 refresh.load_address_levels_from_file(conn, Path(config.ADDRESS_LEVEL_CONFIG))
151 LOG.warning('Create functions (2nd pass)')
152 refresh.create_functions(conn, config, False, False)
153 LOG.warning('Create table triggers')
154 database_import.create_table_triggers(conn, config)
155 LOG.warning('Create partition tables')
156 database_import.create_partition_tables(conn, config)
157 LOG.warning('Create functions (3rd pass)')
158 refresh.create_functions(conn, config, False, False)
162 def _get_tokenizer(continue_at, config):
163 """ Set up a new tokenizer or load an already initialised one.
165 from ..tokenizer import factory as tokenizer_factory
167 if continue_at is None or continue_at == 'load-data':
168 # (re)initialise the tokenizer data
169 return tokenizer_factory.create_tokenizer(config)
171 # just load the tokenizer
172 return tokenizer_factory.get_tokenizer_for_db(config)
175 def _create_pending_index(conn, tablespace):
176 """ Add a supporting index for finding places still to be indexed.
178 This index is normally created at the end of the import process
179 for later updates. When indexing was partially done, then this
180 index can greatly improve speed going through already indexed data.
182 if conn.index_exists('idx_placex_pendingsector'):
185 with conn.cursor() as cur:
186 LOG.warning('Creating support index')
188 tablespace = 'TABLESPACE ' + tablespace
189 cur.execute("""CREATE INDEX idx_placex_pendingsector
190 ON placex USING BTREE (rank_address,geometry_sector)
191 {} WHERE indexed_status > 0
192 """.format(tablespace))
197 def _set_database_date(conn):
198 """ Determine the database date and set the status accordingly.
201 dbdate = status.compute_database_date(conn)
202 status.set_status(conn, dbdate)
203 LOG.info('Database is at %s.', dbdate)
204 except Exception as exc: # pylint: disable=broad-except
205 LOG.error('Cannot determine date of database: %s', exc)