]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/clicmd/setup.py
014c9dc9ca6fb25a4a310d3c5ad7fba6c3bd9fa6
[nominatim.git] / nominatim / clicmd / setup.py
1 """
2 Implementation of the 'import' subcommand.
3 """
4 import logging
5 from pathlib import Path
6
7 import psutil
8
9 from nominatim.db.connection import connect
10 from nominatim.db import status, properties
11 from nominatim.version import NOMINATIM_VERSION
12
13 # Do not repeat documentation of subcommand classes.
14 # pylint: disable=C0111
15 # Using non-top-level imports to avoid eventually unused imports.
16 # pylint: disable=E0012,C0415
17
18 LOG = logging.getLogger()
19
20 class SetupAll:
21     """\
22     Create a new Nominatim database from an OSM file.
23     """
24
25     @staticmethod
26     def add_args(parser):
27         group_name = parser.add_argument_group('Required arguments')
28         group = group_name.add_mutually_exclusive_group(required=True)
29         group.add_argument('--osm-file', metavar='FILE', action='append',
30                            help='OSM file to be imported'
31                                 ' (repeat for importing multiple files.')
32         group.add_argument('--continue', dest='continue_at',
33                            choices=['load-data', 'indexing', 'db-postprocess'],
34                            help='Continue an import that was interrupted')
35         group = parser.add_argument_group('Optional arguments')
36         group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
37                            help='Size of cache to be used by osm2pgsql (in MB)')
38         group.add_argument('--reverse-only', action='store_true',
39                            help='Do not create tables and indexes for searching')
40         group.add_argument('--no-partitions', action='store_true',
41                            help=("Do not partition search indices "
42                                  "(speeds up import of single country extracts)"))
43         group.add_argument('--no-updates', action='store_true',
44                            help="Do not keep tables that are only needed for "
45                                 "updating the database later")
46         group = parser.add_argument_group('Expert options')
47         group.add_argument('--ignore-errors', action='store_true',
48                            help='Continue import even when errors in SQL are present')
49         group.add_argument('--index-noanalyse', action='store_true',
50                            help='Do not perform analyse operations during index')
51
52
53     @staticmethod
54     def run(args):
55         from ..tools import database_import, refresh, postcodes, freeze, country_info
56         from ..indexer.indexer import Indexer
57
58         country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml')
59
60         if args.continue_at is None:
61             files = args.get_osm_file_list()
62
63             LOG.warning('Creating database')
64             database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
65                                                     rouser=args.config.DATABASE_WEBUSER)
66
67             LOG.warning('Setting up country tables')
68             country_info.setup_country_tables(args.config.get_libpq_dsn(),
69                                               args.data_dir,
70                                               args.no_partitions)
71
72             LOG.warning('Importing OSM data file')
73             database_import.import_osm_data(files,
74                                             args.osm2pgsql_options(0, 1),
75                                             drop=args.no_updates,
76                                             ignore_errors=args.ignore_errors)
77
78             SetupAll._setup_tables(args.config, args.reverse_only)
79
80             LOG.warning('Importing wikipedia importance data')
81             data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
82             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
83                                                  data_path) > 0:
84                 LOG.error('Wikipedia importance dump file not found. '
85                           'Will be using default importances.')
86
87         if args.continue_at is None or args.continue_at == 'load-data':
88             LOG.warning('Initialise tables')
89             with connect(args.config.get_libpq_dsn()) as conn:
90                 database_import.truncate_data_tables(conn)
91
92             LOG.warning('Load data into placex table')
93             database_import.load_data(args.config.get_libpq_dsn(),
94                                       args.threads or psutil.cpu_count() or 1)
95
96         LOG.warning("Setting up tokenizer")
97         tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
98
99         if args.continue_at is None or args.continue_at == 'load-data':
100             LOG.warning('Calculate postcodes')
101             postcodes.update_postcodes(args.config.get_libpq_dsn(),
102                                        args.project_dir, tokenizer)
103
104         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
105             if args.continue_at is not None and args.continue_at != 'load-data':
106                 with connect(args.config.get_libpq_dsn()) as conn:
107                     SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
108             LOG.warning('Indexing places')
109             indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
110                               args.threads or psutil.cpu_count() or 1)
111             indexer.index_full(analyse=not args.index_noanalyse)
112
113         LOG.warning('Post-process tables')
114         with connect(args.config.get_libpq_dsn()) as conn:
115             database_import.create_search_indices(conn, args.config,
116                                                   drop=args.no_updates)
117             LOG.warning('Create search index for default country names.')
118             country_info.create_country_names(conn, tokenizer,
119                                               args.config.LANGUAGES)
120             conn.commit()
121             if args.no_updates:
122                 freeze.drop_update_tables(conn)
123         tokenizer.finalize_import(args.config)
124
125
126         webdir = args.project_dir / 'website'
127         LOG.warning('Setup website at %s', webdir)
128         with connect(args.config.get_libpq_dsn()) as conn:
129             refresh.setup_website(webdir, args.config, conn)
130
131         with connect(args.config.get_libpq_dsn()) as conn:
132             SetupAll._set_database_date(conn)
133             properties.set_property(conn, 'database_version',
134                                     '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
135
136         return 0
137
138
139     @staticmethod
140     def _setup_tables(config, reverse_only):
141         """ Set up the basic database layout: tables, indexes and functions.
142         """
143         from ..tools import database_import, refresh
144
145         with connect(config.get_libpq_dsn()) as conn:
146             LOG.warning('Create functions (1st pass)')
147             refresh.create_functions(conn, config, False, False)
148             LOG.warning('Create tables')
149             database_import.create_tables(conn, config, reverse_only=reverse_only)
150             refresh.load_address_levels_from_file(conn, Path(config.ADDRESS_LEVEL_CONFIG))
151             LOG.warning('Create functions (2nd pass)')
152             refresh.create_functions(conn, config, False, False)
153             LOG.warning('Create table triggers')
154             database_import.create_table_triggers(conn, config)
155             LOG.warning('Create partition tables')
156             database_import.create_partition_tables(conn, config)
157             LOG.warning('Create functions (3rd pass)')
158             refresh.create_functions(conn, config, False, False)
159
160
161     @staticmethod
162     def _get_tokenizer(continue_at, config):
163         """ Set up a new tokenizer or load an already initialised one.
164         """
165         from ..tokenizer import factory as tokenizer_factory
166
167         if continue_at is None or continue_at == 'load-data':
168             # (re)initialise the tokenizer data
169             return tokenizer_factory.create_tokenizer(config)
170
171         # just load the tokenizer
172         return tokenizer_factory.get_tokenizer_for_db(config)
173
174     @staticmethod
175     def _create_pending_index(conn, tablespace):
176         """ Add a supporting index for finding places still to be indexed.
177
178             This index is normally created at the end of the import process
179             for later updates. When indexing was partially done, then this
180             index can greatly improve speed going through already indexed data.
181         """
182         if conn.index_exists('idx_placex_pendingsector'):
183             return
184
185         with conn.cursor() as cur:
186             LOG.warning('Creating support index')
187             if tablespace:
188                 tablespace = 'TABLESPACE ' + tablespace
189             cur.execute("""CREATE INDEX idx_placex_pendingsector
190                            ON placex USING BTREE (rank_address,geometry_sector)
191                            {} WHERE indexed_status > 0
192                         """.format(tablespace))
193         conn.commit()
194
195
196     @staticmethod
197     def _set_database_date(conn):
198         """ Determine the database date and set the status accordingly.
199         """
200         try:
201             dbdate = status.compute_database_date(conn)
202             status.set_status(conn, dbdate)
203             LOG.info('Database is at %s.', dbdate)
204         except Exception as exc: # pylint: disable=broad-except
205             LOG.error('Cannot determine date of database: %s', exc)