2 Implementation of the 'import' subcommand.
5 from pathlib import Path
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
18 LOG = logging.getLogger()
22 Create a new Nominatim database from an OSM file.
24 This sub-command sets up a new Nominatim database from scratch starting
25 with creating a new database in Postgresql. The user running this command
26 needs superuser rights on the database.
31 group_name = parser.add_argument_group('Required arguments')
32 group = group_name.add_mutually_exclusive_group(required=True)
33 group.add_argument('--osm-file', metavar='FILE', action='append',
34 help='OSM file to be imported'
35 ' (repeat for importing multiple files)')
36 group.add_argument('--continue', dest='continue_at',
37 choices=['load-data', 'indexing', 'db-postprocess'],
38 help='Continue an import that was interrupted')
39 group = parser.add_argument_group('Optional arguments')
40 group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
41 help='Size of cache to be used by osm2pgsql (in MB)')
42 group.add_argument('--reverse-only', action='store_true',
43 help='Do not create tables and indexes for searching')
44 group.add_argument('--no-partitions', action='store_true',
45 help=("Do not partition search indices "
46 "(speeds up import of single country extracts)"))
47 group.add_argument('--no-updates', action='store_true',
48 help="Do not keep tables that are only needed for "
49 "updating the database later")
50 group = parser.add_argument_group('Expert options')
51 group.add_argument('--ignore-errors', action='store_true',
52 help='Continue import even when errors in SQL are present')
53 group.add_argument('--index-noanalyse', action='store_true',
54 help='Do not perform analyse operations during index (expert only)')
59 from ..tools import database_import, refresh, postcodes, freeze, country_info
60 from ..indexer.indexer import Indexer
62 country_info.setup_country_config(args.config)
64 if args.continue_at is None:
65 files = args.get_osm_file_list()
67 LOG.warning('Creating database')
68 database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
69 rouser=args.config.DATABASE_WEBUSER)
71 LOG.warning('Setting up country tables')
72 country_info.setup_country_tables(args.config.get_libpq_dsn(),
76 LOG.warning('Importing OSM data file')
77 database_import.import_osm_data(files,
78 args.osm2pgsql_options(0, 1),
80 ignore_errors=args.ignore_errors)
82 SetupAll._setup_tables(args.config, args.reverse_only)
84 LOG.warning('Importing wikipedia importance data')
85 data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
86 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
88 LOG.error('Wikipedia importance dump file not found. '
89 'Will be using default importances.')
91 if args.continue_at is None or args.continue_at == 'load-data':
92 LOG.warning('Initialise tables')
93 with connect(args.config.get_libpq_dsn()) as conn:
94 database_import.truncate_data_tables(conn)
96 LOG.warning('Load data into placex table')
97 database_import.load_data(args.config.get_libpq_dsn(),
98 args.threads or psutil.cpu_count() or 1)
100 LOG.warning("Setting up tokenizer")
101 tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
103 if args.continue_at is None or args.continue_at == 'load-data':
104 LOG.warning('Calculate postcodes')
105 postcodes.update_postcodes(args.config.get_libpq_dsn(),
106 args.project_dir, tokenizer)
108 if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
109 if args.continue_at is not None and args.continue_at != 'load-data':
110 with connect(args.config.get_libpq_dsn()) as conn:
111 SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
112 LOG.warning('Indexing places')
113 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
114 args.threads or psutil.cpu_count() or 1)
115 indexer.index_full(analyse=not args.index_noanalyse)
117 LOG.warning('Post-process tables')
118 with connect(args.config.get_libpq_dsn()) as conn:
119 database_import.create_search_indices(conn, args.config,
120 drop=args.no_updates)
121 LOG.warning('Create search index for default country names.')
122 country_info.create_country_names(conn, tokenizer,
123 args.config.LANGUAGES)
125 freeze.drop_update_tables(conn)
126 tokenizer.finalize_import(args.config)
128 LOG.warning('Recompute word counts')
129 tokenizer.update_statistics()
131 webdir = args.project_dir / 'website'
132 LOG.warning('Setup website at %s', webdir)
133 with connect(args.config.get_libpq_dsn()) as conn:
134 refresh.setup_website(webdir, args.config, conn)
136 SetupAll._set_database_date(args.config.get_libpq_dsn())
142 def _setup_tables(config, reverse_only):
143 """ Set up the basic database layout: tables, indexes and functions.
145 from ..tools import database_import, refresh
147 with connect(config.get_libpq_dsn()) as conn:
148 LOG.warning('Create functions (1st pass)')
149 refresh.create_functions(conn, config, False, False)
150 LOG.warning('Create tables')
151 database_import.create_tables(conn, config, reverse_only=reverse_only)
152 refresh.load_address_levels_from_config(conn, config)
153 LOG.warning('Create functions (2nd pass)')
154 refresh.create_functions(conn, config, False, False)
155 LOG.warning('Create table triggers')
156 database_import.create_table_triggers(conn, config)
157 LOG.warning('Create partition tables')
158 database_import.create_partition_tables(conn, config)
159 LOG.warning('Create functions (3rd pass)')
160 refresh.create_functions(conn, config, False, False)
164 def _get_tokenizer(continue_at, config):
165 """ Set up a new tokenizer or load an already initialised one.
167 from ..tokenizer import factory as tokenizer_factory
169 if continue_at is None or continue_at == 'load-data':
170 # (re)initialise the tokenizer data
171 return tokenizer_factory.create_tokenizer(config)
173 # just load the tokenizer
174 return tokenizer_factory.get_tokenizer_for_db(config)
177 def _create_pending_index(conn, tablespace):
178 """ Add a supporting index for finding places still to be indexed.
180 This index is normally created at the end of the import process
181 for later updates. When indexing was partially done, then this
182 index can greatly improve speed going through already indexed data.
184 if conn.index_exists('idx_placex_pendingsector'):
187 with conn.cursor() as cur:
188 LOG.warning('Creating support index')
190 tablespace = 'TABLESPACE ' + tablespace
191 cur.execute("""CREATE INDEX idx_placex_pendingsector
192 ON placex USING BTREE (rank_address,geometry_sector)
193 {} WHERE indexed_status > 0
194 """.format(tablespace))
199 def _set_database_date(dsn):
200 """ Determine the database date and set the status accordingly.
202 with connect(dsn) as conn:
204 dbdate = status.compute_database_date(conn)
205 status.set_status(conn, dbdate)
206 LOG.info('Database is at %s.', dbdate)
207 except Exception as exc: # pylint: disable=broad-except
208 LOG.error('Cannot determine date of database: %s', exc)
210 properties.set_property(conn, 'database_version',
211 '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))