+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Functions for setting up and importing a new Nominatim database.
"""
+from typing import Tuple, Optional, Union, Sequence, MutableMapping, Any
import logging
import os
+import selectors
import subprocess
-import shutil
from pathlib import Path
import psutil
+from psycopg2 import sql as pysql
-from ..db.connection import connect, get_pg_env
-from ..db import utils as db_utils
-from .exec_utils import run_osm2pgsql
-from ..errors import UsageError
-from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+from nominatim.config import Configuration
+from nominatim.db.connection import connect, get_pg_env, Connection
+from nominatim.db.async_connection import DBConnection
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools.exec_utils import run_osm2pgsql
+from nominatim.errors import UsageError
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \
+ POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
-def create_db(dsn, rouser=None):
- """ Create a new database for the given DSN. Fails when the database
- already exists or the PostgreSQL version is too old.
+def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int, int]) -> None:
+ """ Compares the version for the given module and raises an exception
+ if the actual version is too old.
+ """
+ if actual < expected:
+ LOG.fatal('Minimum supported version of %s is %d.%d. '
+ 'Found version %d.%d.',
+ module, expected[0], expected[1], actual[0], actual[1])
+ raise UsageError(f'{module} is too old.')
+
+
+def _require_loaded(extension_name: str, conn: Connection):
+ """ Check that the given extension is loaded. """
+ if not conn.extension_loaded(extension_name):
+ LOG.fatal('Required module %s is not loaded.', extension_name)
+ raise UsageError(f'{extension_name} is not loaded.')
+
+
+def check_existing_database_plugins(dsn: str):
+ """ Check that the database has the required plugins installed."""
+ with connect(dsn) as conn:
+ _require_version('PostgreSQL server',
+ conn.server_version_tuple(),
+ POSTGRESQL_REQUIRED_VERSION)
+ _require_version('PostGIS',
+ conn.postgis_version_tuple(),
+ POSTGIS_REQUIRED_VERSION)
+ _require_loaded('hstore', conn)
+
+
+def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
+ """ Create a new database for Nominatim and populate it with the
+ essential extensions.
+
+ The function fails when the database already exists or Postgresql or
+ PostGIS versions are too old.
+
Uses `createdb` to create the database.
If 'rouser' is given, then the function also checks that the user
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
- postgres_version = conn.server_version_tuple()
- if postgres_version < POSTGRESQL_REQUIRED_VERSION:
- LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
- 'Found version %d.%d.',
- POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1],
- postgres_version[0], postgres_version[1])
- raise UsageError('PostgreSQL server is too old.')
+ _require_version('PostgreSQL server',
+ conn.server_version_tuple(),
+ POSTGRESQL_REQUIRED_VERSION)
if rouser is not None:
with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
- LOG.fatal("Web user '%s' does not exists. Create it with:\n"
+ LOG.fatal("Web user '%s' does not exist. Create it with:\n"
"\n createuser %s", rouser, rouser)
raise UsageError('Missing read-only user.')
+ # Create extensions.
+ with conn.cursor() as cur:
+ cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
+ cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
+ postgis_version = conn.postgis_version_tuple()
+ if postgis_version[0] >= 3:
+ cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
-def setup_extensions(conn):
- """ Set up all extensions needed for Nominatim. Also checks that the
- versions of the extensions are sufficient.
- """
- with conn.cursor() as cur:
- cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
- cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
- conn.commit()
+ conn.commit()
- postgis_version = conn.postgis_version_tuple()
- if postgis_version < POSTGIS_REQUIRED_VERSION:
- LOG.fatal('Minimum supported version of PostGIS is %d.%d. '
- 'Found version %d.%d.',
- POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1],
- postgis_version[0], postgis_version[1])
- raise UsageError('PostGIS version is too old.')
+ _require_version('PostGIS',
+ conn.postgis_version_tuple(),
+ POSTGIS_REQUIRED_VERSION)
-def install_module(src_dir, project_dir, module_dir):
- """ Copy the normalization module from src_dir into the project
- directory under the '/module' directory. If 'module_dir' is set, then
- use the module from there instead and check that it is accessible
- for Postgresql.
-
- The function detects when the installation is run from the
- build directory. It doesn't touch the module in that case.
+def import_osm_data(osm_files: Union[Path, Sequence[Path]],
+ options: MutableMapping[str, Any],
+ drop: bool = False, ignore_errors: bool = False) -> None:
+ """ Import the given OSM files. 'options' contains the list of
+ default settings for osm2pgsql.
"""
- if not module_dir:
- module_dir = project_dir / 'module'
+ options['import_file'] = osm_files
+ options['append'] = False
+ options['threads'] = 1
- if not module_dir.exists() or not src_dir.samefile(module_dir):
+ if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
+ # Make some educated guesses about cache size based on the size
+ # of the import file and the available memory.
+ mem = psutil.virtual_memory()
+ fsize = 0
+ if isinstance(osm_files, list):
+ for fname in osm_files:
+ fsize += os.stat(str(fname)).st_size
+ else:
+ fsize = os.stat(str(osm_files)).st_size
+ options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
+ fsize * 2) / 1024 / 1024) + 1
- if not module_dir.exists():
- module_dir.mkdir()
+ run_osm2pgsql(options)
- destfile = module_dir / 'nominatim.so'
- shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
- destfile.chmod(0o755)
+ with connect(options['dsn']) as conn:
+ if not ignore_errors:
+ with conn.cursor() as cur:
+ cur.execute('SELECT * FROM place LIMIT 1')
+ if cur.rowcount == 0:
+ raise UsageError('No data imported by osm2pgsql.')
- LOG.info('Database module installed at %s', str(destfile))
- else:
- LOG.info('Running from build directory. Leaving database module as is.')
- else:
- LOG.info("Using custom path for database module at '%s'", module_dir)
+ if drop:
+ conn.drop_table('planet_osm_nodes')
- return module_dir
+ if drop and options['flatnode_file']:
+ Path(options['flatnode_file']).unlink()
-def check_module_dir_path(conn, path):
- """ Check that the normalisation module can be found and executed
- from the given path.
+def create_tables(conn: Connection, config: Configuration, reverse_only: bool = False) -> None:
+ """ Create the set of basic tables.
+ When `reverse_only` is True, then the main table for searching will
+ be skipped and only reverse search is possible.
"""
- with conn.cursor() as cur:
- cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
- LANGUAGE c IMMUTABLE STRICT;
- DROP FUNCTION nominatim_test_import_func(text)
- """.format(path))
+ sql = SQLPreprocessor(conn, config)
+ sql.env.globals['db']['reverse_only'] = reverse_only
+
+ sql.run_sql_file(conn, 'tables.sql')
-def import_base_data(dsn, sql_dir, ignore_partitions=False):
- """ Create and populate the tables with basic static data that provides
- the background for geocoding. Data is assumed to not yet exist.
+def create_table_triggers(conn: Connection, config: Configuration) -> None:
+ """ Create the triggers for the tables. The trigger functions must already
+ have been imported with refresh.create_functions().
"""
- db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
- db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
+ sql = SQLPreprocessor(conn, config)
+ sql.run_sql_file(conn, 'table-triggers.sql')
- if ignore_partitions:
- with connect(dsn) as conn:
- with conn.cursor() as cur:
- cur.execute('UPDATE country_name SET partition = 0')
- conn.commit()
+
+def create_partition_tables(conn: Connection, config: Configuration) -> None:
+ """ Create tables that have explicit partitioning.
+ """
+ sql = SQLPreprocessor(conn, config)
+ sql.run_sql_file(conn, 'partition-tables.src.sql')
-def import_osm_data(osm_file, options, drop=False):
- """ Import the given OSM file. 'options' contains the list of
- default settings for osm2pgsql.
+def truncate_data_tables(conn: Connection) -> None:
+ """ Truncate all data tables to prepare for a fresh load.
"""
- options['import_file'] = osm_file
- options['append'] = False
- options['threads'] = 1
+ with conn.cursor() as cur:
+ cur.execute('TRUNCATE placex')
+ cur.execute('TRUNCATE place_addressline')
+ cur.execute('TRUNCATE location_area')
+ cur.execute('TRUNCATE location_area_country')
+ cur.execute('TRUNCATE location_property_tiger')
+ cur.execute('TRUNCATE location_property_osmline')
+ cur.execute('TRUNCATE location_postcode')
+ if conn.table_exists('search_name'):
+ cur.execute('TRUNCATE search_name')
+ cur.execute('DROP SEQUENCE IF EXISTS seq_place')
+ cur.execute('CREATE SEQUENCE seq_place start 100000')
+
+ cur.execute("""SELECT tablename FROM pg_tables
+ WHERE tablename LIKE 'location_road_%'""")
+
+ for table in [r[0] for r in list(cur)]:
+ cur.execute('TRUNCATE ' + table)
- if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
- # Make some educated guesses about cache size based on the size
- # of the import file and the available memory.
- mem = psutil.virtual_memory()
- fsize = os.stat(str(osm_file)).st_size
- options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
- fsize * 2) / 1024 / 1024) + 1
+ conn.commit()
- run_osm2pgsql(options)
- with connect(options['dsn']) as conn:
- with conn.cursor() as cur:
- cur.execute('SELECT * FROM place LIMIT 1')
- if cur.rowcount == 0:
- raise UsageError('No data imported by osm2pgsql.')
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+ ('osm_type', 'osm_id', 'class', 'type',
+ 'name', 'admin_level', 'address',
+ 'extratags', 'geometry')))
- if drop:
- conn.drop_table('planet_osm_nodes')
- if drop:
- if options['flatnode_file']:
- Path(options['flatnode_file']).unlink()
+def load_data(dsn: str, threads: int) -> None:
+ """ Copy data into the word and placex table.
+ """
+ sel = selectors.DefaultSelector()
+ # Then copy data from place to placex in <threads - 1> chunks.
+ place_threads = max(1, threads - 1)
+ for imod in range(place_threads):
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform(
+ pysql.SQL("""INSERT INTO placex ({columns})
+ SELECT {columns} FROM place
+ WHERE osm_id % {total} = {mod}
+ AND NOT (class='place' and (type='houses' or type='postcode'))
+ AND ST_IsValid(geometry)
+ """).format(columns=_COPY_COLUMNS,
+ total=pysql.Literal(place_threads),
+ mod=pysql.Literal(imod)))
+ sel.register(conn, selectors.EVENT_READ, conn)
+
+ # Address interpolations go into another table.
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+ SELECT osm_id, address, geometry FROM place
+ WHERE class='place' and type='houses' and osm_type='W'
+ and ST_GeometryType(geometry) = 'ST_LineString'
+ """)
+ sel.register(conn, selectors.EVENT_READ, conn)
+
+ # Now wait for all of them to finish.
+ todo = place_threads + 1
+ while todo > 0:
+ for key, _ in sel.select(1):
+ conn = key.data
+ sel.unregister(conn)
+ conn.wait()
+ conn.close()
+ todo -= 1
+ print('.', end='', flush=True)
+ print('\n')
+
+ with connect(dsn) as syn_conn:
+ with syn_conn.cursor() as cur:
+ cur.execute('ANALYSE')
+
+
+def create_search_indices(conn: Connection, config: Configuration,
+ drop: bool = False, threads: int = 1) -> None:
+ """ Create tables that have explicit partitioning.
+ """
+
+ # If index creation failed and left an index invalid, they need to be
+ # cleaned out first, so that the script recreates them.
+ with conn.cursor() as cur:
+ cur.execute("""SELECT relname FROM pg_class, pg_index
+ WHERE pg_index.indisvalid = false
+ AND pg_index.indexrelid = pg_class.oid""")
+ bad_indices = [row[0] for row in list(cur)]
+ for idx in bad_indices:
+ LOG.info("Drop invalid index %s.", idx)
+ cur.execute(pysql.SQL('DROP INDEX {}').format(pysql.Identifier(idx)))
+ conn.commit()
+
+ sql = SQLPreprocessor(conn, config)
+
+ sql.run_parallel_sql_file(config.get_libpq_dsn(),
+ 'indices.sql', min(8, threads), drop=drop)