2 Implementation of the 'import' subcommand.
5 from pathlib import Path
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
18 LOG = logging.getLogger()
22 Create a new Nominatim database from an OSM file.
27 group_name = parser.add_argument_group('Required arguments')
28 group = group_name.add_mutually_exclusive_group(required=True)
29 group.add_argument('--osm-file', metavar='FILE', action='append',
30 help='OSM file to be imported'
31 ' (repeat for importing multiple files.')
32 group.add_argument('--continue', dest='continue_at',
33 choices=['load-data', 'indexing', 'db-postprocess'],
34 help='Continue an import that was interrupted')
35 group = parser.add_argument_group('Optional arguments')
36 group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
37 help='Size of cache to be used by osm2pgsql (in MB)')
38 group.add_argument('--reverse-only', action='store_true',
39 help='Do not create tables and indexes for searching')
40 group.add_argument('--no-partitions', action='store_true',
41 help=("Do not partition search indices "
42 "(speeds up import of single country extracts)"))
43 group.add_argument('--no-updates', action='store_true',
44 help="Do not keep tables that are only needed for "
45 "updating the database later")
46 group = parser.add_argument_group('Expert options')
47 group.add_argument('--ignore-errors', action='store_true',
48 help='Continue import even when errors in SQL are present')
49 group.add_argument('--index-noanalyse', action='store_true',
50 help='Do not perform analyse operations during index')
55 from ..tools import database_import, refresh, postcodes, freeze
56 from ..indexer.indexer import Indexer
58 if args.continue_at is None:
59 files = args.get_osm_file_list()
61 database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
64 rouser=args.config.DATABASE_WEBUSER)
66 LOG.warning('Importing OSM data file')
67 database_import.import_osm_data(files,
68 args.osm2pgsql_options(0, 1),
70 ignore_errors=args.ignore_errors)
72 SetupAll._setup_tables(args.config, args.reverse_only)
74 LOG.warning('Importing wikipedia importance data')
75 data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
76 if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
78 LOG.error('Wikipedia importance dump file not found. '
79 'Will be using default importances.')
81 if args.continue_at is None or args.continue_at == 'load-data':
82 LOG.warning('Initialise tables')
83 with connect(args.config.get_libpq_dsn()) as conn:
84 database_import.truncate_data_tables(conn)
86 LOG.warning('Load data into placex table')
87 database_import.load_data(args.config.get_libpq_dsn(),
88 args.threads or psutil.cpu_count() or 1)
90 LOG.warning("Setting up tokenizer")
91 tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
93 if args.continue_at is None or args.continue_at == 'load-data':
94 LOG.warning('Calculate postcodes')
95 postcodes.update_postcodes(args.config.get_libpq_dsn(),
96 args.project_dir, tokenizer)
98 if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
99 if args.continue_at is not None and args.continue_at != 'load-data':
100 with connect(args.config.get_libpq_dsn()) as conn:
101 SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
102 LOG.warning('Indexing places')
103 indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
104 args.threads or psutil.cpu_count() or 1)
105 indexer.index_full(analyse=not args.index_noanalyse)
107 LOG.warning('Post-process tables')
108 with connect(args.config.get_libpq_dsn()) as conn:
109 database_import.create_search_indices(conn, args.config,
110 drop=args.no_updates)
111 LOG.warning('Create search index for default country names.')
112 database_import.create_country_names(conn, tokenizer,
113 args.config.LANGUAGES)
116 freeze.drop_update_tables(conn)
117 tokenizer.finalize_import(args.config)
120 webdir = args.project_dir / 'website'
121 LOG.warning('Setup website at %s', webdir)
122 with connect(args.config.get_libpq_dsn()) as conn:
123 refresh.setup_website(webdir, args.config, conn)
125 with connect(args.config.get_libpq_dsn()) as conn:
126 SetupAll._set_database_date(conn)
127 properties.set_property(conn, 'database_version',
128 '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
134 def _setup_tables(config, reverse_only):
135 """ Set up the basic database layout: tables, indexes and functions.
137 from ..tools import database_import, refresh
139 with connect(config.get_libpq_dsn()) as conn:
140 LOG.warning('Create functions (1st pass)')
141 refresh.create_functions(conn, config, False, False)
142 LOG.warning('Create tables')
143 database_import.create_tables(conn, config, reverse_only=reverse_only)
144 refresh.load_address_levels_from_file(conn, Path(config.ADDRESS_LEVEL_CONFIG))
145 LOG.warning('Create functions (2nd pass)')
146 refresh.create_functions(conn, config, False, False)
147 LOG.warning('Create table triggers')
148 database_import.create_table_triggers(conn, config)
149 LOG.warning('Create partition tables')
150 database_import.create_partition_tables(conn, config)
151 LOG.warning('Create functions (3rd pass)')
152 refresh.create_functions(conn, config, False, False)
156 def _get_tokenizer(continue_at, config):
157 """ Set up a new tokenizer or load an already initialised one.
159 from ..tokenizer import factory as tokenizer_factory
161 if continue_at is None or continue_at == 'load-data':
162 # (re)initialise the tokenizer data
163 return tokenizer_factory.create_tokenizer(config)
165 # just load the tokenizer
166 return tokenizer_factory.get_tokenizer_for_db(config)
169 def _create_pending_index(conn, tablespace):
170 """ Add a supporting index for finding places still to be indexed.
172 This index is normally created at the end of the import process
173 for later updates. When indexing was partially done, then this
174 index can greatly improve speed going through already indexed data.
176 if conn.index_exists('idx_placex_pendingsector'):
179 with conn.cursor() as cur:
180 LOG.warning('Creating support index')
182 tablespace = 'TABLESPACE ' + tablespace
183 cur.execute("""CREATE INDEX idx_placex_pendingsector
184 ON placex USING BTREE (rank_address,geometry_sector)
185 {} WHERE indexed_status > 0
186 """.format(tablespace))
191 def _set_database_date(conn):
192 """ Determine the database date and set the status accordingly.
195 dbdate = status.compute_database_date(conn)
196 status.set_status(conn, dbdate)
197 LOG.info('Database is at %s.', dbdate)
198 except Exception as exc: # pylint: disable=broad-except
199 LOG.error('Cannot determine date of database: %s', exc)