X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4fa6c0ad5333e979931d3276a5a9cdcbfd514430..7205491b8495e48c62b28373d1746e77d475582b:/nominatim/db/sql_preprocessor.py diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index c7009b34..839f682d 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -1,10 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Preprocessing of SQL files. """ +from typing import Set, Dict, Any import jinja2 +from nominatim.db.connection import Connection +from nominatim.db.async_connection import WorkerPool +from nominatim.config import Configuration -def _get_partitions(conn): +def _get_partitions(conn: Connection) -> Set[int]: """ Get the set of partitions currently in use. """ with conn.cursor() as cur: @@ -16,7 +26,7 @@ def _get_partitions(conn): return partitions -def _get_tables(conn): +def _get_tables(conn: Connection) -> Set[str]: """ Return the set of tables currently in use. Only includes non-partitioned """ @@ -26,45 +36,35 @@ def _get_tables(conn): return set((row[0] for row in list(cur))) -def _setup_tablespace_sql(config): +def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]: """ Returns a dict with tablespace expressions for the different tablespace kinds depending on whether a tablespace is configured or not. """ out = {} for subset in ('ADDRESS', 'SEARCH', 'AUX'): for kind in ('DATA', 'INDEX'): - tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) + tspace = getattr(config, f'TABLESPACE_{subset}_{kind}') if tspace: - tspace = 'TABLESPACE "{}"'.format(tspace) - out['{}_{}'.format(subset.lower, kind.lower())] = tspace + tspace = f'TABLESPACE "{tspace}"' + out[f'{subset.lower()}_{kind.lower()}'] = tspace return out -def _setup_postgres_sql(conn): - """ Set up a dictionary with various Postgresql/Postgis SQL terms which - are dependent on the database version in use. - """ - out = {} - pg_version = conn.server_version_tuple() - # CREATE INDEX IF NOT EXISTS was introduced in PG9.5. - # Note that you need to ignore failures on older versions when - # using this construct. - out['if_index_not_exists'] = ' IF NOT EXISTS ' if pg_version >= (9, 5, 0) else '' - - return out - - -def _setup_postgresql_features(conn): +def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]: """ Set up a dictionary with various optional Postgresql/Postgis features that depend on the database version. """ pg_version = conn.server_version_tuple() + postgis_version = conn.postgis_version_tuple() + pg11plus = pg_version >= (11, 0, 0) + ps3 = postgis_version >= (3, 0) return { - 'has_index_non_key_column' : pg_version >= (11, 0, 0) + 'has_index_non_key_column': pg11plus, + 'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST' } -class SQLPreprocessor: # pylint: disable=too-few-public-methods +class SQLPreprocessor: """ A environment for preprocessing SQL files from the lib-sql directory. @@ -75,11 +75,11 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods and follows its syntax. """ - def __init__(self, conn, config): + def __init__(self, conn: Connection, config: Configuration) -> None: self.env = jinja2.Environment(autoescape=False, loader=jinja2.FileSystemLoader(str(config.lib_dir.sql))) - db_info = {} + db_info: Dict[str, Any] = {} db_info['partitions'] = _get_partitions(conn) db_info['tables'] = _get_tables(conn) db_info['reverse_only'] = 'search_name' not in db_info['tables'] @@ -87,13 +87,22 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods self.env.globals['config'] = config self.env.globals['db'] = db_info - self.env.globals['sql'] = _setup_postgres_sql(conn) self.env.globals['postgres'] = _setup_postgresql_features(conn) - self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \ - str((config.project_dir / 'module').resolve()) - def run_sql_file(self, conn, name, **kwargs): + def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None: + """ Execute the given SQL template string on the connection. + The keyword arguments may supply additional parameters + for preprocessing. + """ + sql = self.env.from_string(template).render(**kwargs) + + with conn.cursor() as cur: + cur.execute(sql) + conn.commit() + + + def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None: """ Execute the given SQL file on the connection. The keyword arguments may supply additional parameters for preprocessing. """ @@ -102,3 +111,21 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods with conn.cursor() as cur: cur.execute(sql) conn.commit() + + + def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1, + **kwargs: Any) -> None: + """ Execute the given SQL files using parallel asynchronous connections. + The keyword arguments may supply additional parameters for + preprocessing. + + After preprocessing the SQL code is cut at lines containing only + '---'. Each chunk is sent to one of the `num_threads` workers. + """ + sql = self.env.get_template(name).render(**kwargs) + + parts = sql.split('\n---\n') + + with WorkerPool(dsn, num_threads) as pool: + for part in parts: + pool.next_free_worker().perform(part)