]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/clicmd/setup.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / clicmd / setup.py
1 """
2 Implementation of the 'import' subcommand.
3 """
4 import logging
5 from pathlib import Path
6
7 import psutil
8
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
12
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
17
18 LOG = logging.getLogger()
19
20 class SetupAll:
21     """\
22     Create a new Nominatim database from an OSM file.
23
24     This sub-command sets up a new Nominatim database from scratch starting
25     with creating a new database in Postgresql. The user running this command
26     needs superuser rights on the database.
27     """
28
29     @staticmethod
30     def add_args(parser):
31         group_name = parser.add_argument_group('Required arguments')
32         group = group_name.add_mutually_exclusive_group(required=True)
33         group.add_argument('--osm-file', metavar='FILE', action='append',
34                            help='OSM file to be imported'
35                                 ' (repeat for importing multiple files)')
36         group.add_argument('--continue', dest='continue_at',
37                            choices=['load-data', 'indexing', 'db-postprocess'],
38                            help='Continue an import that was interrupted')
39         group = parser.add_argument_group('Optional arguments')
40         group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
41                            help='Size of cache to be used by osm2pgsql (in MB)')
42         group.add_argument('--reverse-only', action='store_true',
43                            help='Do not create tables and indexes for searching')
44         group.add_argument('--no-partitions', action='store_true',
45                            help=("Do not partition search indices "
46                                  "(speeds up import of single country extracts)"))
47         group.add_argument('--no-updates', action='store_true',
48                            help="Do not keep tables that are only needed for "
49                                 "updating the database later")
50         group = parser.add_argument_group('Expert options')
51         group.add_argument('--ignore-errors', action='store_true',
52                            help='Continue import even when errors in SQL are present')
53         group.add_argument('--index-noanalyse', action='store_true',
54                            help='Do not perform analyse operations during index (expert only)')
55
56
57     @staticmethod
58     def run(args):
59         from ..tools import database_import, refresh, postcodes, freeze, country_info
60         from ..indexer.indexer import Indexer
61
62         country_info.setup_country_config(args.config)
63
64         if args.continue_at is None:
65             files = args.get_osm_file_list()
66
67             LOG.warning('Creating database')
68             database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
69                                                     rouser=args.config.DATABASE_WEBUSER)
70
71             LOG.warning('Setting up country tables')
72             country_info.setup_country_tables(args.config.get_libpq_dsn(),
73                                               args.data_dir,
74                                               args.no_partitions)
75
76             LOG.warning('Importing OSM data file')
77             database_import.import_osm_data(files,
78                                             args.osm2pgsql_options(0, 1),
79                                             drop=args.no_updates,
80                                             ignore_errors=args.ignore_errors)
81
82             SetupAll._setup_tables(args.config, args.reverse_only)
83
84             LOG.warning('Importing wikipedia importance data')
85             data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
86             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
87                                                  data_path) > 0:
88                 LOG.error('Wikipedia importance dump file not found. '
89                           'Will be using default importances.')
90
91         if args.continue_at is None or args.continue_at == 'load-data':
92             LOG.warning('Initialise tables')
93             with connect(args.config.get_libpq_dsn()) as conn:
94                 database_import.truncate_data_tables(conn)
95
96             LOG.warning('Load data into placex table')
97             database_import.load_data(args.config.get_libpq_dsn(),
98                                       args.threads or psutil.cpu_count() or 1)
99
100         LOG.warning("Setting up tokenizer")
101         tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
102
103         if args.continue_at is None or args.continue_at == 'load-data':
104             LOG.warning('Calculate postcodes')
105             postcodes.update_postcodes(args.config.get_libpq_dsn(),
106                                        args.project_dir, tokenizer)
107
108         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
109             if args.continue_at is not None and args.continue_at != 'load-data':
110                 with connect(args.config.get_libpq_dsn()) as conn:
111                     SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
112             LOG.warning('Indexing places')
113             indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
114                               args.threads or psutil.cpu_count() or 1)
115             indexer.index_full(analyse=not args.index_noanalyse)
116
117         LOG.warning('Post-process tables')
118         with connect(args.config.get_libpq_dsn()) as conn:
119             database_import.create_search_indices(conn, args.config,
120                                                   drop=args.no_updates)
121             LOG.warning('Create search index for default country names.')
122             country_info.create_country_names(conn, tokenizer,
123                                               args.config.LANGUAGES)
124             if args.no_updates:
125                 freeze.drop_update_tables(conn)
126         tokenizer.finalize_import(args.config)
127
128
129         webdir = args.project_dir / 'website'
130         LOG.warning('Setup website at %s', webdir)
131         with connect(args.config.get_libpq_dsn()) as conn:
132             refresh.setup_website(webdir, args.config, conn)
133
134         with connect(args.config.get_libpq_dsn()) as conn:
135             SetupAll._set_database_date(conn)
136             properties.set_property(conn, 'database_version',
137                                     '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
138
139         return 0
140
141
142     @staticmethod
143     def _setup_tables(config, reverse_only):
144         """ Set up the basic database layout: tables, indexes and functions.
145         """
146         from ..tools import database_import, refresh
147
148         with connect(config.get_libpq_dsn()) as conn:
149             LOG.warning('Create functions (1st pass)')
150             refresh.create_functions(conn, config, False, False)
151             LOG.warning('Create tables')
152             database_import.create_tables(conn, config, reverse_only=reverse_only)
153             refresh.load_address_levels_from_config(conn, config)
154             LOG.warning('Create functions (2nd pass)')
155             refresh.create_functions(conn, config, False, False)
156             LOG.warning('Create table triggers')
157             database_import.create_table_triggers(conn, config)
158             LOG.warning('Create partition tables')
159             database_import.create_partition_tables(conn, config)
160             LOG.warning('Create functions (3rd pass)')
161             refresh.create_functions(conn, config, False, False)
162
163
164     @staticmethod
165     def _get_tokenizer(continue_at, config):
166         """ Set up a new tokenizer or load an already initialised one.
167         """
168         from ..tokenizer import factory as tokenizer_factory
169
170         if continue_at is None or continue_at == 'load-data':
171             # (re)initialise the tokenizer data
172             return tokenizer_factory.create_tokenizer(config)
173
174         # just load the tokenizer
175         return tokenizer_factory.get_tokenizer_for_db(config)
176
177     @staticmethod
178     def _create_pending_index(conn, tablespace):
179         """ Add a supporting index for finding places still to be indexed.
180
181             This index is normally created at the end of the import process
182             for later updates. When indexing was partially done, then this
183             index can greatly improve speed going through already indexed data.
184         """
185         if conn.index_exists('idx_placex_pendingsector'):
186             return
187
188         with conn.cursor() as cur:
189             LOG.warning('Creating support index')
190             if tablespace:
191                 tablespace = 'TABLESPACE ' + tablespace
192             cur.execute("""CREATE INDEX idx_placex_pendingsector
193                            ON placex USING BTREE (rank_address,geometry_sector)
194                            {} WHERE indexed_status > 0
195                         """.format(tablespace))
196         conn.commit()
197
198
199     @staticmethod
200     def _set_database_date(conn):
201         """ Determine the database date and set the status accordingly.
202         """
203         try:
204             dbdate = status.compute_database_date(conn)
205             status.set_status(conn, dbdate)
206             LOG.info('Database is at %s.', dbdate)
207         except Exception as exc: # pylint: disable=broad-except
208             LOG.error('Cannot determine date of database: %s', exc)