2 Implementation of the 'import' subcommand.
5 from pathlib import Path
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
18 LOG = logging.getLogger()
22 Create a new Nominatim database from an OSM file.
27 group_name = parser.add_argument_group('Required arguments')
28 group = group_name.add_mutually_exclusive_group(required=True)
29 group.add_argument('--osm-file', metavar='FILE', action='append',
30 help='OSM file to be imported'
31 ' (repeat for importing multiple files.')
32 group.add_argument('--continue', dest='continue_at',
33 choices=['load-data', 'indexing', 'db-postprocess'],
34 help='Continue an import that was interrupted')
35 group = parser.add_argument_group('Optional arguments')
36 group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
37 help='Size of cache to be used by osm2pgsql (in MB)')
38 group.add_argument('--reverse-only', action='store_true',
39 help='Do not create tables and indexes for searching')
40 group.add_argument('--no-partitions', action='store_true',
41 help=("Do not partition search indices "
42 "(speeds up import of single country extracts)"))
43 group.add_argument('--no-updates', action='store_true',
44 help="Do not keep tables that are only needed for "
45 "updating the database later")
46 group = parser.add_argument_group('Expert options')
47 group.add_argument('--ignore-errors', action='store_true',
48 help='Continue import even when errors in SQL are present')
49 group.add_argument('--index-noanalyse', action='store_true',
50 help='Do not perform analyse operations during index')
55 from ..tools import database_import, refresh, postcodes, freeze, country_info
56 from ..indexer.indexer import Indexer
58 country_info.setup_country_config(args.config)
60 if args.continue_at is None:
61 files = args.get_osm_file_list()
63 LOG.warning('Creating database')
64 database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
65 rouser=args.config.DATABASE_WEBUSER)
67 LOG.warning('Setting up country tables')
68 country_info.setup_country_tables(args.config.get_libpq_dsn(),
72 LOG.warning('Importing OSM data file')
73 database_import.import_osm_data(files,
74 args.osm2pgsql_options(0, 1),
76 ignore_errors=args.ignore_errors)
78 SetupAll._setup_tables(args.config, args.reverse_only)
80 LOG.warning('Importing wikipedia importance data')
81 data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
82 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
84 LOG.error('Wikipedia importance dump file not found. '
85 'Will be using default importances.')
87 if args.continue_at is None or args.continue_at == 'load-data':
88 LOG.warning('Initialise tables')
89 with connect(args.config.get_libpq_dsn()) as conn:
90 database_import.truncate_data_tables(conn)
92 LOG.warning('Load data into placex table')
93 database_import.load_data(args.config.get_libpq_dsn(),
94 args.threads or psutil.cpu_count() or 1)
96 LOG.warning("Setting up tokenizer")
97 tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
99 if args.continue_at is None or args.continue_at == 'load-data':
100 LOG.warning('Calculate postcodes')
101 postcodes.update_postcodes(args.config.get_libpq_dsn(),
102 args.project_dir, tokenizer)
104 if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
105 if args.continue_at is not None and args.continue_at != 'load-data':
106 with connect(args.config.get_libpq_dsn()) as conn:
107 SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
108 LOG.warning('Indexing places')
109 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
110 args.threads or psutil.cpu_count() or 1)
111 indexer.index_full(analyse=not args.index_noanalyse)
113 LOG.warning('Post-process tables')
114 with connect(args.config.get_libpq_dsn()) as conn:
115 database_import.create_search_indices(conn, args.config,
116 drop=args.no_updates)
117 LOG.warning('Create search index for default country names.')
118 country_info.create_country_names(conn, tokenizer,
119 args.config.LANGUAGES)
121 freeze.drop_update_tables(conn)
122 tokenizer.finalize_import(args.config)
125 webdir = args.project_dir / 'website'
126 LOG.warning('Setup website at %s', webdir)
127 with connect(args.config.get_libpq_dsn()) as conn:
128 refresh.setup_website(webdir, args.config, conn)
130 with connect(args.config.get_libpq_dsn()) as conn:
131 SetupAll._set_database_date(conn)
132 properties.set_property(conn, 'database_version',
133 '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
139 def _setup_tables(config, reverse_only):
140 """ Set up the basic database layout: tables, indexes and functions.
142 from ..tools import database_import, refresh
144 with connect(config.get_libpq_dsn()) as conn:
145 LOG.warning('Create functions (1st pass)')
146 refresh.create_functions(conn, config, False, False)
147 LOG.warning('Create tables')
148 database_import.create_tables(conn, config, reverse_only=reverse_only)
149 refresh.load_address_levels_from_file(conn, Path(config.ADDRESS_LEVEL_CONFIG))
150 LOG.warning('Create functions (2nd pass)')
151 refresh.create_functions(conn, config, False, False)
152 LOG.warning('Create table triggers')
153 database_import.create_table_triggers(conn, config)
154 LOG.warning('Create partition tables')
155 database_import.create_partition_tables(conn, config)
156 LOG.warning('Create functions (3rd pass)')
157 refresh.create_functions(conn, config, False, False)
161 def _get_tokenizer(continue_at, config):
162 """ Set up a new tokenizer or load an already initialised one.
164 from ..tokenizer import factory as tokenizer_factory
166 if continue_at is None or continue_at == 'load-data':
167 # (re)initialise the tokenizer data
168 return tokenizer_factory.create_tokenizer(config)
170 # just load the tokenizer
171 return tokenizer_factory.get_tokenizer_for_db(config)
174 def _create_pending_index(conn, tablespace):
175 """ Add a supporting index for finding places still to be indexed.
177 This index is normally created at the end of the import process
178 for later updates. When indexing was partially done, then this
179 index can greatly improve speed going through already indexed data.
181 if conn.index_exists('idx_placex_pendingsector'):
184 with conn.cursor() as cur:
185 LOG.warning('Creating support index')
187 tablespace = 'TABLESPACE ' + tablespace
188 cur.execute("""CREATE INDEX idx_placex_pendingsector
189 ON placex USING BTREE (rank_address,geometry_sector)
190 {} WHERE indexed_status > 0
191 """.format(tablespace))
196 def _set_database_date(conn):
197 """ Determine the database date and set the status accordingly.
200 dbdate = status.compute_database_date(conn)
201 status.set_status(conn, dbdate)
202 LOG.info('Database is at %s.', dbdate)
203 except Exception as exc: # pylint: disable=broad-except
204 LOG.error('Cannot determine date of database: %s', exc)