X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/32683f73c787464e16f2a146d4c08c4041087dd5..b50d6078f5cec2362813d710db35e616e9913ea2:/nominatim/tools/database_import.py diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 00ec95c0..664d3c6b 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -3,20 +3,38 @@ Functions for setting up and importing a new Nominatim database. """ import logging import os +import selectors import subprocess -import shutil from pathlib import Path import psutil +import psycopg2.extras -from ..db.connection import connect, get_pg_env -from ..db import utils as db_utils -from .exec_utils import run_osm2pgsql -from ..errors import UsageError -from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION +from nominatim.db.connection import connect, get_pg_env +from nominatim.db import utils as db_utils +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError +from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION LOG = logging.getLogger() +def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None): + """ Create a new database for Nominatim and populate it with the + essential extensions and data. + """ + LOG.warning('Creating database') + create_db(dsn, rouser) + + LOG.warning('Setting up database') + with connect(dsn) as conn: + setup_extensions(conn) + + LOG.warning('Loading basic data') + import_base_data(dsn, data_dir, no_partitions) + + def create_db(dsn, rouser=None): """ Create a new database for the given DSN. Fails when the database already exists or the PostgreSQL version is too old. @@ -70,48 +88,6 @@ def setup_extensions(conn): raise UsageError('PostGIS version is too old.') -def install_module(src_dir, project_dir, module_dir): - """ Copy the normalization module from src_dir into the project - directory under the '/module' directory. If 'module_dir' is set, then - use the module from there instead and check that it is accessible - for Postgresql. - - The function detects when the installation is run from the - build directory. It doesn't touch the module in that case. - """ - if not module_dir: - module_dir = project_dir / 'module' - - if not module_dir.exists() or not src_dir.samefile(module_dir): - - if not module_dir.exists(): - module_dir.mkdir() - - destfile = module_dir / 'nominatim.so' - shutil.copy(str(src_dir / 'nominatim.so'), str(destfile)) - destfile.chmod(0o755) - - LOG.info('Database module installed at %s', str(destfile)) - else: - LOG.info('Running from build directory. Leaving database module as is.') - else: - LOG.info("Using custom path for database module at '%s'", module_dir) - - return module_dir - - -def check_module_dir_path(conn, path): - """ Check that the normalisation module can be found and executed - from the given path. - """ - with conn.cursor() as cur: - cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) - RETURNS text AS '{}/nominatim.so', 'transliteration' - LANGUAGE c IMMUTABLE STRICT; - DROP FUNCTION nominatim_test_import_func(text) - """.format(path)) - - def import_base_data(dsn, sql_dir, ignore_partitions=False): """ Create and populate the tables with basic static data that provides the background for geocoding. Data is assumed to not yet exist. @@ -126,7 +102,7 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False): conn.commit() -def import_osm_data(osm_file, options, drop=False): +def import_osm_data(osm_file, options, drop=False, ignore_errors=False): """ Import the given OSM file. 'options' contains the list of default settings for osm2pgsql. """ @@ -145,10 +121,11 @@ def import_osm_data(osm_file, options, drop=False): run_osm2pgsql(options) with connect(options['dsn']) as conn: - with conn.cursor() as cur: - cur.execute('SELECT * FROM place LIMIT 1') - if cur.rowcount == 0: - raise UsageError('No data imported by osm2pgsql.') + if not ignore_errors: + with conn.cursor() as cur: + cur.execute('SELECT * FROM place LIMIT 1') + if cur.rowcount == 0: + raise UsageError('No data imported by osm2pgsql.') if drop: conn.drop_table('planet_osm_nodes') @@ -156,3 +133,155 @@ def import_osm_data(osm_file, options, drop=False): if drop: if options['flatnode_file']: Path(options['flatnode_file']).unlink() + + +def create_tables(conn, config, reverse_only=False): + """ Create the set of basic tables. + When `reverse_only` is True, then the main table for searching will + be skipped and only reverse search is possible. + """ + sql = SQLPreprocessor(conn, config) + sql.env.globals['db']['reverse_only'] = reverse_only + + sql.run_sql_file(conn, 'tables.sql') + + +def create_table_triggers(conn, config): + """ Create the triggers for the tables. The trigger functions must already + have been imported with refresh.create_functions(). + """ + sql = SQLPreprocessor(conn, config) + sql.run_sql_file(conn, 'table-triggers.sql') + + +def create_partition_tables(conn, config): + """ Create tables that have explicit partitioning. + """ + sql = SQLPreprocessor(conn, config) + sql.run_sql_file(conn, 'partition-tables.src.sql') + + +def truncate_data_tables(conn): + """ Truncate all data tables to prepare for a fresh load. + """ + with conn.cursor() as cur: + cur.execute('TRUNCATE placex') + cur.execute('TRUNCATE place_addressline') + cur.execute('TRUNCATE location_area') + cur.execute('TRUNCATE location_area_country') + cur.execute('TRUNCATE location_property_tiger') + cur.execute('TRUNCATE location_property_osmline') + cur.execute('TRUNCATE location_postcode') + if conn.table_exists('search_name'): + cur.execute('TRUNCATE search_name') + cur.execute('DROP SEQUENCE IF EXISTS seq_place') + cur.execute('CREATE SEQUENCE seq_place start 100000') + + cur.execute("""SELECT tablename FROM pg_tables + WHERE tablename LIKE 'location_road_%'""") + + for table in [r[0] for r in list(cur)]: + cur.execute('TRUNCATE ' + table) + + conn.commit() + +_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' + +def load_data(dsn, threads): + """ Copy data into the word and placex table. + """ + sel = selectors.DefaultSelector() + # Then copy data from place to placex in chunks. + place_threads = max(1, threads - 1) + for imod in range(place_threads): + conn = DBConnection(dsn) + conn.connect() + conn.perform("""INSERT INTO placex ({0}) + SELECT {0} FROM place + WHERE osm_id % {1} = {2} + AND NOT (class='place' and type='houses') + AND ST_IsValid(geometry) + """.format(_COPY_COLUMNS, place_threads, imod)) + sel.register(conn, selectors.EVENT_READ, conn) + + # Address interpolations go into another table. + conn = DBConnection(dsn) + conn.connect() + conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo) + SELECT osm_id, address, geometry FROM place + WHERE class='place' and type='houses' and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString' + """) + sel.register(conn, selectors.EVENT_READ, conn) + + # Now wait for all of them to finish. + todo = place_threads + 1 + while todo > 0: + for key, _ in sel.select(1): + conn = key.data + sel.unregister(conn) + conn.wait() + conn.close() + todo -= 1 + print('.', end='', flush=True) + print('\n') + + with connect(dsn) as conn: + with conn.cursor() as cur: + cur.execute('ANALYSE') + + +def create_search_indices(conn, config, drop=False): + """ Create tables that have explicit partitioning. + """ + + # If index creation failed and left an index invalid, they need to be + # cleaned out first, so that the script recreates them. + with conn.cursor() as cur: + cur.execute("""SELECT relname FROM pg_class, pg_index + WHERE pg_index.indisvalid = false + AND pg_index.indexrelid = pg_class.oid""") + bad_indices = [row[0] for row in list(cur)] + for idx in bad_indices: + LOG.info("Drop invalid index %s.", idx) + cur.execute('DROP INDEX "{}"'.format(idx)) + conn.commit() + + sql = SQLPreprocessor(conn, config) + + sql.run_sql_file(conn, 'indices.sql', drop=drop) + +def create_country_names(conn, tokenizer, languages=None): + """ Add default country names to search index. `languages` is a comma- + separated list of language codes as used in OSM. If `languages` is not + empty then only name translations for the given languages are added + to the index. + """ + if languages: + languages = languages.split(',') + + def _include_key(key): + return key == 'name' or \ + (key.startswith('name:') \ + and (not languages or key[5:] in languages)) + + with conn.cursor() as cur: + psycopg2.extras.register_hstore(cur) + cur.execute("""SELECT country_code, name FROM country_name + WHERE country_code is not null""") + + with tokenizer.name_analyzer() as analyzer: + for code, name in cur: + names = [code] + if code == 'gb': + names.append('UK') + if code == 'us': + names.append('United States') + + # country names (only in languages as provided) + if name: + names.extend((v for k, v in name.items() if _include_key(k))) + + analyzer.add_country_names(code, names) + + conn.commit()