Functions for setting up and importing a new Nominatim database.
"""
import logging
+import os
+import selectors
import subprocess
-import shutil
+from pathlib import Path
-from ..db.connection import connect, get_pg_env
-from ..db import utils as db_utils
-from ..errors import UsageError
-from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+import psutil
+import psycopg2.extras
+from psycopg2 import sql as pysql
+
+from nominatim.db.connection import connect, get_pg_env
+from nominatim.db import utils as db_utils
+from nominatim.db.async_connection import DBConnection
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools.exec_utils import run_osm2pgsql
+from nominatim.errors import UsageError
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
+def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None):
+ """ Create a new database for Nominatim and populate it with the
+ essential extensions and data.
+ """
+ LOG.warning('Creating database')
+ create_db(dsn, rouser)
+
+ LOG.warning('Setting up database')
+ with connect(dsn) as conn:
+ setup_extensions(conn)
+
+ LOG.warning('Loading basic data')
+ import_base_data(dsn, data_dir, no_partitions)
+
+
def create_db(dsn, rouser=None):
""" Create a new database for the given DSN. Fails when the database
already exists or the PostgreSQL version is too old.
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
- postgres_version = conn.server_version_tuple() # pylint: disable=E1101
+ postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
- with conn.cursor() as cur: # pylint: disable=E1101
+ with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
raise UsageError('PostGIS version is too old.')
-def install_module(src_dir, project_dir, module_dir):
- """ Copy the normalization module from src_dir into the project
- directory under the '/module' directory. If 'module_dir' is set, then
- use the module from there instead and check that it is accessible
- for Postgresql.
+def import_base_data(dsn, sql_dir, ignore_partitions=False):
+ """ Create and populate the tables with basic static data that provides
+ the background for geocoding. Data is assumed to not yet exist.
+ """
+ db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
+ db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
+
+ if ignore_partitions:
+ with connect(dsn) as conn:
+ with conn.cursor() as cur:
+ cur.execute('UPDATE country_name SET partition = 0')
+ conn.commit()
- The function detects when the installation is run from the
- build directory. It doesn't touch the module in that case.
+
+def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
+ """ Import the given OSM file. 'options' contains the list of
+ default settings for osm2pgsql.
"""
- if not module_dir:
- module_dir = project_dir / 'module'
+ options['import_file'] = osm_file
+ options['append'] = False
+ options['threads'] = 1
+
+ if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
+ # Make some educated guesses about cache size based on the size
+ # of the import file and the available memory.
+ mem = psutil.virtual_memory()
+ fsize = os.stat(str(osm_file)).st_size
+ options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
+ fsize * 2) / 1024 / 1024) + 1
+
+ run_osm2pgsql(options)
+
+ with connect(options['dsn']) as conn:
+ if not ignore_errors:
+ with conn.cursor() as cur:
+ cur.execute('SELECT * FROM place LIMIT 1')
+ if cur.rowcount == 0:
+ raise UsageError('No data imported by osm2pgsql.')
- if not module_dir.exists() or not src_dir.samefile(module_dir):
+ if drop:
+ conn.drop_table('planet_osm_nodes')
- if not module_dir.exists():
- module_dir.mkdir()
+ if drop and options['flatnode_file']:
+ Path(options['flatnode_file']).unlink()
- destfile = module_dir / 'nominatim.so'
- shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
- destfile.chmod(0o755)
- LOG.info('Database module installed at %s', str(destfile))
- else:
- LOG.info('Running from build directory. Leaving database module as is.')
- else:
- LOG.info("Using custom path for database module at '%s'", module_dir)
+def create_tables(conn, config, reverse_only=False):
+ """ Create the set of basic tables.
+ When `reverse_only` is True, then the main table for searching will
+ be skipped and only reverse search is possible.
+ """
+ sql = SQLPreprocessor(conn, config)
+ sql.env.globals['db']['reverse_only'] = reverse_only
+
+ sql.run_sql_file(conn, 'tables.sql')
- return module_dir
+
+def create_table_triggers(conn, config):
+ """ Create the triggers for the tables. The trigger functions must already
+ have been imported with refresh.create_functions().
+ """
+ sql = SQLPreprocessor(conn, config)
+ sql.run_sql_file(conn, 'table-triggers.sql')
-def check_module_dir_path(conn, path):
- """ Check that the normalisation module can be found and executed
- from the given path.
+def create_partition_tables(conn, config):
+ """ Create tables that have explicit partitioning.
+ """
+ sql = SQLPreprocessor(conn, config)
+ sql.run_sql_file(conn, 'partition-tables.src.sql')
+
+
+def truncate_data_tables(conn):
+ """ Truncate all data tables to prepare for a fresh load.
"""
with conn.cursor() as cur:
- cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
- LANGUAGE c IMMUTABLE STRICT;
- DROP FUNCTION nominatim_test_import_func(text)
- """.format(path))
+ cur.execute('TRUNCATE placex')
+ cur.execute('TRUNCATE place_addressline')
+ cur.execute('TRUNCATE location_area')
+ cur.execute('TRUNCATE location_area_country')
+ cur.execute('TRUNCATE location_property_tiger')
+ cur.execute('TRUNCATE location_property_osmline')
+ cur.execute('TRUNCATE location_postcode')
+ if conn.table_exists('search_name'):
+ cur.execute('TRUNCATE search_name')
+ cur.execute('DROP SEQUENCE IF EXISTS seq_place')
+ cur.execute('CREATE SEQUENCE seq_place start 100000')
+ cur.execute("""SELECT tablename FROM pg_tables
+ WHERE tablename LIKE 'location_road_%'""")
-def import_base_data(dsn, sql_dir, ignore_partitions=False):
- """ Create and populate the tables with basic static data that provides
- the background for geocoding.
+ for table in [r[0] for r in list(cur)]:
+ cur.execute('TRUNCATE ' + table)
+
+ conn.commit()
+
+
+_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
+ ('osm_type', 'osm_id', 'class', 'type',
+ 'name', 'admin_level', 'address',
+ 'extratags', 'geometry')))
+
+
+def load_data(dsn, threads):
+ """ Copy data into the word and placex table.
"""
- db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
- db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
+ sel = selectors.DefaultSelector()
+ # Then copy data from place to placex in <threads - 1> chunks.
+ place_threads = max(1, threads - 1)
+ for imod in range(place_threads):
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform(
+ pysql.SQL("""INSERT INTO placex ({columns})
+ SELECT {columns} FROM place
+ WHERE osm_id % {total} = {mod}
+ AND NOT (class='place' and (type='houses' or type='postcode'))
+ AND ST_IsValid(geometry)
+ """).format(columns=_COPY_COLUMNS,
+ total=pysql.Literal(place_threads),
+ mod=pysql.Literal(imod)))
+ sel.register(conn, selectors.EVENT_READ, conn)
- if ignore_partitions:
- with connect(dsn) as conn:
- with conn.cursor() as cur: # pylint: disable=E1101
- cur.execute('UPDATE country_name SET partition = 0')
- conn.commit() # pylint: disable=E1101
+ # Address interpolations go into another table.
+ conn = DBConnection(dsn)
+ conn.connect()
+ conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+ SELECT osm_id, address, geometry FROM place
+ WHERE class='place' and type='houses' and osm_type='W'
+ and ST_GeometryType(geometry) = 'ST_LineString'
+ """)
+ sel.register(conn, selectors.EVENT_READ, conn)
+
+ # Now wait for all of them to finish.
+ todo = place_threads + 1
+ while todo > 0:
+ for key, _ in sel.select(1):
+ conn = key.data
+ sel.unregister(conn)
+ conn.wait()
+ conn.close()
+ todo -= 1
+ print('.', end='', flush=True)
+ print('\n')
+
+ with connect(dsn) as conn:
+ with conn.cursor() as cur:
+ cur.execute('ANALYSE')
+
+
+def create_search_indices(conn, config, drop=False):
+ """ Create tables that have explicit partitioning.
+ """
+
+ # If index creation failed and left an index invalid, they need to be
+ # cleaned out first, so that the script recreates them.
+ with conn.cursor() as cur:
+ cur.execute("""SELECT relname FROM pg_class, pg_index
+ WHERE pg_index.indisvalid = false
+ AND pg_index.indexrelid = pg_class.oid""")
+ bad_indices = [row[0] for row in list(cur)]
+ for idx in bad_indices:
+ LOG.info("Drop invalid index %s.", idx)
+ cur.execute('DROP INDEX "{}"'.format(idx))
+ conn.commit()
+
+ sql = SQLPreprocessor(conn, config)
+
+ sql.run_sql_file(conn, 'indices.sql', drop=drop)
+
+
+def create_country_names(conn, tokenizer, languages=None):
+ """ Add default country names to search index. `languages` is a comma-
+ separated list of language codes as used in OSM. If `languages` is not
+ empty then only name translations for the given languages are added
+ to the index.
+ """
+ if languages:
+ languages = languages.split(',')
+
+ def _include_key(key):
+ return key == 'name' or \
+ (key.startswith('name:') and (not languages or key[5:] in languages))
+
+ with conn.cursor() as cur:
+ psycopg2.extras.register_hstore(cur)
+ cur.execute("""SELECT country_code, name FROM country_name
+ WHERE country_code is not null""")
+
+ with tokenizer.name_analyzer() as analyzer:
+ for code, name in cur:
+ names = {'countrycode': code}
+ if code == 'gb':
+ names['short_name'] = 'UK'
+ if code == 'us':
+ names['short_name'] = 'United States'
+
+ # country names (only in languages as provided)
+ if name:
+ names.update(((k, v) for k, v in name.items() if _include_key(k)))
+
+ analyzer.add_country_names(code, names)
+
+ conn.commit()