X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/fbbdd31399da42b94188d9d4aa4f084efd4876a4..6dbec931f58c659075a903db1e8eab15b1c93b03:/nominatim/tools/database_import.py?ds=sidebyside diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 400ce7a5..6195b44a 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Functions for setting up and importing a new Nominatim database. """ @@ -8,9 +14,9 @@ import subprocess from pathlib import Path import psutil +from psycopg2 import sql as pysql from nominatim.db.connection import connect, get_pg_env -from nominatim.db import utils as db_utils from nominatim.db.async_connection import DBConnection from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.tools.exec_utils import run_osm2pgsql @@ -19,24 +25,24 @@ from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERS LOG = logging.getLogger() -def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None): - """ Create a new database for Nominatim and populate it with the - essential extensions and data. +def _require_version(module, actual, expected): + """ Compares the version for the given module and raises an exception + if the actual version is too old. """ - LOG.warning('Creating database') - create_db(dsn, rouser) + if actual < expected: + LOG.fatal('Minimum supported version of %s is %d.%d. ' + 'Found version %d.%d.', + module, expected[0], expected[1], actual[0], actual[1]) + raise UsageError(f'{module} is too old.') - LOG.warning('Setting up database') - with connect(dsn) as conn: - setup_extensions(conn) - LOG.warning('Loading basic data') - import_base_data(dsn, data_dir, no_partitions) +def setup_database_skeleton(dsn, rouser=None): + """ Create a new database for Nominatim and populate it with the + essential extensions. + The function fails when the database already exists or Postgresql or + PostGIS versions are too old. -def create_db(dsn, rouser=None): - """ Create a new database for the given DSN. Fails when the database - already exists or the PostgreSQL version is too old. Uses `createdb` to create the database. If 'rouser' is given, then the function also checks that the user @@ -50,62 +56,35 @@ def create_db(dsn, rouser=None): raise UsageError('Creating new database failed.') with connect(dsn) as conn: - postgres_version = conn.server_version_tuple() - if postgres_version < POSTGRESQL_REQUIRED_VERSION: - LOG.fatal('Minimum supported version of Postgresql is %d.%d. ' - 'Found version %d.%d.', - POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1], - postgres_version[0], postgres_version[1]) - raise UsageError('PostgreSQL server is too old.') + _require_version('PostgreSQL server', + conn.server_version_tuple(), + POSTGRESQL_REQUIRED_VERSION) if rouser is not None: with conn.cursor() as cur: cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s', (rouser, )) if cnt == 0: - LOG.fatal("Web user '%s' does not exists. Create it with:\n" + LOG.fatal("Web user '%s' does not exist. Create it with:\n" "\n createuser %s", rouser, rouser) raise UsageError('Missing read-only user.') + # Create extensions. + with conn.cursor() as cur: + cur.execute('CREATE EXTENSION IF NOT EXISTS hstore') + cur.execute('CREATE EXTENSION IF NOT EXISTS postgis') + conn.commit() - -def setup_extensions(conn): - """ Set up all extensions needed for Nominatim. Also checks that the - versions of the extensions are sufficient. - """ - with conn.cursor() as cur: - cur.execute('CREATE EXTENSION IF NOT EXISTS hstore') - cur.execute('CREATE EXTENSION IF NOT EXISTS postgis') - conn.commit() - - postgis_version = conn.postgis_version_tuple() - if postgis_version < POSTGIS_REQUIRED_VERSION: - LOG.fatal('Minimum supported version of PostGIS is %d.%d. ' - 'Found version %d.%d.', - POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1], - postgis_version[0], postgis_version[1]) - raise UsageError('PostGIS version is too old.') - - -def import_base_data(dsn, sql_dir, ignore_partitions=False): - """ Create and populate the tables with basic static data that provides - the background for geocoding. Data is assumed to not yet exist. - """ - db_utils.execute_file(dsn, sql_dir / 'country_name.sql') - db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz') - - if ignore_partitions: - with connect(dsn) as conn: - with conn.cursor() as cur: - cur.execute('UPDATE country_name SET partition = 0') - conn.commit() + _require_version('PostGIS', + conn.postgis_version_tuple(), + POSTGIS_REQUIRED_VERSION) -def import_osm_data(osm_file, options, drop=False, ignore_errors=False): - """ Import the given OSM file. 'options' contains the list of +def import_osm_data(osm_files, options, drop=False, ignore_errors=False): + """ Import the given OSM files. 'options' contains the list of default settings for osm2pgsql. """ - options['import_file'] = osm_file + options['import_file'] = osm_files options['append'] = False options['threads'] = 1 @@ -113,7 +92,12 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False): # Make some educated guesses about cache size based on the size # of the import file and the available memory. mem = psutil.virtual_memory() - fsize = os.stat(str(osm_file)).st_size + fsize = 0 + if isinstance(osm_files, list): + for fname in osm_files: + fsize += os.stat(str(fname)).st_size + else: + fsize = os.stat(str(osm_files)).st_size options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75, fsize * 2) / 1024 / 1024) + 1 @@ -129,9 +113,8 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False): if drop: conn.drop_table('planet_osm_nodes') - if drop: - if options['flatnode_file']: - Path(options['flatnode_file']).unlink() + if drop and options['flatnode_file']: + Path(options['flatnode_file']).unlink() def create_tables(conn, config, reverse_only=False): @@ -184,7 +167,12 @@ def truncate_data_tables(conn): conn.commit() -_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' + +_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier, + ('osm_type', 'osm_id', 'class', 'type', + 'name', 'admin_level', 'address', + 'extratags', 'geometry'))) + def load_data(dsn, threads): """ Copy data into the word and placex table. @@ -195,12 +183,15 @@ def load_data(dsn, threads): for imod in range(place_threads): conn = DBConnection(dsn) conn.connect() - conn.perform("""INSERT INTO placex ({0}) - SELECT {0} FROM place - WHERE osm_id % {1} = {2} - AND NOT (class='place' and type='houses') - AND ST_IsValid(geometry) - """.format(_COPY_COLUMNS, place_threads, imod)) + conn.perform( + pysql.SQL("""INSERT INTO placex ({columns}) + SELECT {columns} FROM place + WHERE osm_id % {total} = {mod} + AND NOT (class='place' and (type='houses' or type='postcode')) + AND ST_IsValid(geometry) + """).format(columns=_COPY_COLUMNS, + total=pysql.Literal(place_threads), + mod=pysql.Literal(imod))) sel.register(conn, selectors.EVENT_READ, conn) # Address interpolations go into another table. @@ -243,41 +234,9 @@ def create_search_indices(conn, config, drop=False): bad_indices = [row[0] for row in list(cur)] for idx in bad_indices: LOG.info("Drop invalid index %s.", idx) - cur.execute('DROP INDEX "{}"'.format(idx)) + cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx))) conn.commit() sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'indices.sql', drop=drop) - -def create_country_names(conn, config): - """ Create search index for default country names. - """ - - with conn.cursor() as cur: - cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""") - cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""") - cur.execute("""SELECT COUNT(*) FROM - (SELECT getorcreate_country(make_standard_name(country_code), - country_code) FROM country_name WHERE country_code is not null) AS x""") - cur.execute("""SELECT COUNT(*) FROM - (SELECT getorcreate_country(make_standard_name(name->'name'), country_code) - FROM country_name WHERE name ? 'name') AS x""") - sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v), - country_code) FROM (SELECT country_code, skeys(name) - AS k, svals(name) AS v FROM country_name) x WHERE k""" - - languages = config.LANGUAGES - - if languages: - sql_statement = "{} IN (".format(sql_statement) - delim = '' - for language in languages.split(','): - sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language) - delim = ', ' - sql_statement = '{})'.format(sql_statement) - else: - sql_statement = "{} LIKE 'name:%'".format(sql_statement) - sql_statement = "{}) v".format(sql_statement) - cur.execute(sql_statement) - conn.commit()