nominatim/clicmd/setup.py

   1 """
   2 Implementation of the 'import' subcommand.
   3 """
   4 import logging
   5 from pathlib import Path
   6
   7 import psutil
   8
   9 from nominatim.db.connection import connect
  10 from nominatim.db import status, properties
  11 from nominatim.version import NOMINATIM_VERSION
  12
  13 # Do not repeat documentation of subcommand classes.
  14 # pylint: disable=C0111
  15 # Using non-top-level imports to avoid eventually unused imports.
  16 # pylint: disable=E0012,C0415
  17
  18 LOG = logging.getLogger()
  19
  20 class SetupAll:
  21     """\
  22     Create a new Nominatim database from an OSM file.
  23     """
  24
  25     @staticmethod
  26     def add_args(parser):
  27         group_name = parser.add_argument_group('Required arguments')
  28         group = group_name.add_mutually_exclusive_group(required=True)
  29         group.add_argument('--osm-file', metavar='FILE', action='append',
  30                            help='OSM file to be imported'
  31                                 ' (repeat for importing multiple files.')
  32         group.add_argument('--continue', dest='continue_at',
  33                            choices=['load-data', 'indexing', 'db-postprocess'],
  34                            help='Continue an import that was interrupted')
  35         group = parser.add_argument_group('Optional arguments')
  36         group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
  37                            help='Size of cache to be used by osm2pgsql (in MB)')
  38         group.add_argument('--reverse-only', action='store_true',
  39                            help='Do not create tables and indexes for searching')
  40         group.add_argument('--no-partitions', action='store_true',
  41                            help=("Do not partition search indices "
  42                                  "(speeds up import of single country extracts)"))
  43         group.add_argument('--no-updates', action='store_true',
  44                            help="Do not keep tables that are only needed for "
  45                                 "updating the database later")
  46         group = parser.add_argument_group('Expert options')
  47         group.add_argument('--ignore-errors', action='store_true',
  48                            help='Continue import even when errors in SQL are present')
  49         group.add_argument('--index-noanalyse', action='store_true',
  50                            help='Do not perform analyse operations during index')
  51
  52
  53     @staticmethod
  54     def run(args):
  55         from ..tools import database_import, refresh, postcodes, freeze
  56         from ..indexer.indexer import Indexer
  57
  58         if args.continue_at is None:
  59             files = args.get_osm_file_list()
  60
  61             database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
  62                                                     args.data_dir,
  63                                                     args.no_partitions,
  64                                                     rouser=args.config.DATABASE_WEBUSER)
  65
  66             LOG.warning('Importing OSM data file')
  67             database_import.import_osm_data(files,
  68                                             args.osm2pgsql_options(0, 1),
  69                                             drop=args.no_updates,
  70                                             ignore_errors=args.ignore_errors)
  71
  72             SetupAll._setup_tables(args.config, args.reverse_only)
  73
  74             LOG.warning('Importing wikipedia importance data')
  75             data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
  76             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
  77                                                  data_path) > 0:
  78                 LOG.error('Wikipedia importance dump file not found. '
  79                           'Will be using default importances.')
  80
  81         if args.continue_at is None or args.continue_at == 'load-data':
  82             LOG.warning('Initialise tables')
  83             with connect(args.config.get_libpq_dsn()) as conn:
  84                 database_import.truncate_data_tables(conn)
  85
  86             LOG.warning('Load data into placex table')
  87             database_import.load_data(args.config.get_libpq_dsn(),
  88                                       args.threads or psutil.cpu_count() or 1)
  89
  90         LOG.warning("Setting up tokenizer")
  91         tokenizer = SetupAll._get_tokenizer(args.continue_at, args.config)
  92
  93         if args.continue_at is None or args.continue_at == 'load-data':
  94             LOG.warning('Calculate postcodes')
  95             postcodes.update_postcodes(args.config.get_libpq_dsn(),
  96                                        args.project_dir, tokenizer)
  97
  98         if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
  99             if args.continue_at is not None and args.continue_at != 'load-data':
 100                 with connect(args.config.get_libpq_dsn()) as conn:
 101                     SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
 102             LOG.warning('Indexing places')
 103             indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
 104                               args.threads or psutil.cpu_count() or 1)
 105             indexer.index_full(analyse=not args.index_noanalyse)
 106
 107         LOG.warning('Post-process tables')
 108         with connect(args.config.get_libpq_dsn()) as conn:
 109             database_import.create_search_indices(conn, args.config,
 110                                                   drop=args.no_updates)
 111             LOG.warning('Create search index for default country names.')
 112             database_import.create_country_names(conn, tokenizer,
 113                                                  args.config.LANGUAGES)
 114             conn.commit()
 115             if args.no_updates:
 116                 freeze.drop_update_tables(conn)
 117         tokenizer.finalize_import(args.config)
 118
 119
 120         webdir = args.project_dir / 'website'
 121         LOG.warning('Setup website at %s', webdir)
 122         with connect(args.config.get_libpq_dsn()) as conn:
 123             refresh.setup_website(webdir, args.config, conn)
 124
 125         with connect(args.config.get_libpq_dsn()) as conn:
 126             SetupAll._set_database_date(conn)
 127             properties.set_property(conn, 'database_version',
 128                                     '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
 129
 130         return 0
 131
 132
 133     @staticmethod
 134     def _setup_tables(config, reverse_only):
 135         """ Set up the basic database layout: tables, indexes and functions.
 136         """
 137         from ..tools import database_import, refresh
 138
 139         with connect(config.get_libpq_dsn()) as conn:
 140             LOG.warning('Create functions (1st pass)')
 141             refresh.create_functions(conn, config, False, False)
 142             LOG.warning('Create tables')
 143             database_import.create_tables(conn, config, reverse_only=reverse_only)
 144             refresh.load_address_levels_from_file(conn, Path(config.ADDRESS_LEVEL_CONFIG))
 145             LOG.warning('Create functions (2nd pass)')
 146             refresh.create_functions(conn, config, False, False)
 147             LOG.warning('Create table triggers')
 148             database_import.create_table_triggers(conn, config)
 149             LOG.warning('Create partition tables')
 150             database_import.create_partition_tables(conn, config)
 151             LOG.warning('Create functions (3rd pass)')
 152             refresh.create_functions(conn, config, False, False)
 153
 154
 155     @staticmethod
 156     def _get_tokenizer(continue_at, config):
 157         """ Set up a new tokenizer or load an already initialised one.
 158         """
 159         from ..tokenizer import factory as tokenizer_factory
 160
 161         if continue_at is None or continue_at == 'load-data':
 162             # (re)initialise the tokenizer data
 163             return tokenizer_factory.create_tokenizer(config)
 164
 165         # just load the tokenizer
 166         return tokenizer_factory.get_tokenizer_for_db(config)
 167
 168     @staticmethod
 169     def _create_pending_index(conn, tablespace):
 170         """ Add a supporting index for finding places still to be indexed.
 171
 172             This index is normally created at the end of the import process
 173             for later updates. When indexing was partially done, then this
 174             index can greatly improve speed going through already indexed data.
 175         """
 176         if conn.index_exists('idx_placex_pendingsector'):
 177             return
 178
 179         with conn.cursor() as cur:
 180             LOG.warning('Creating support index')
 181             if tablespace:
 182                 tablespace = 'TABLESPACE ' + tablespace
 183             cur.execute("""CREATE INDEX idx_placex_pendingsector
 184                            ON placex USING BTREE (rank_address,geometry_sector)
 185                            {} WHERE indexed_status > 0
 186                         """.format(tablespace))
 187         conn.commit()
 188
 189
 190     @staticmethod
 191     def _set_database_date(conn):
 192         """ Determine the database date and set the status accordingly.
 193         """
 194         try:
 195             dbdate = status.compute_database_date(conn)
 196             status.set_status(conn, dbdate)
 197             LOG.info('Database is at %s.', dbdate)
 198         except Exception as exc: # pylint: disable=broad-except
 199             LOG.error('Cannot determine date of database: %s', exc)