Correct some typos

[nominatim.git] / nominatim / db / sql_preprocessor.py
diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py

index 02d782875537649ada45a133debf25323c375a27..839f682d8a8e5db722657751e6b9e4051db9d002 100644 (file)
--- a/nominatim/db/sql_preprocessor.py
+++ b/nominatim/db/sql_preprocessor.py
@@ -1,10 +1,20 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
  """
  Preprocessing of SQL files.
  """
  """
  Preprocessing of SQL files.
  """
+from typing import Set, Dict, Any
  import jinja2
  
  import jinja2
  
+from nominatim.db.connection import Connection
+from nominatim.db.async_connection import WorkerPool
+from nominatim.config import Configuration
  
  
-def _get_partitions(conn):
+def _get_partitions(conn: Connection) -> Set[int]:
      """ Get the set of partitions currently in use.
      """
      with conn.cursor() as cur:
      """ Get the set of partitions currently in use.
      """
      with conn.cursor() as cur:
@@ -16,7 +26,7 @@ def _get_partitions(conn):
      return partitions
  
  
      return partitions
  
  
-def _get_tables(conn):
+def _get_tables(conn: Connection) -> Set[str]:
      """ Return the set of tables currently in use.
          Only includes non-partitioned
      """
      """ Return the set of tables currently in use.
          Only includes non-partitioned
      """
@@ -25,7 +35,36 @@ def _get_tables(conn):
  
          return set((row[0] for row in list(cur)))
  
  
          return set((row[0] for row in list(cur)))
  
-class SQLPreprocessor: # pylint: disable=too-few-public-methods
+
+def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
+    """ Returns a dict with tablespace expressions for the different tablespace
+        kinds depending on whether a tablespace is configured or not.
+    """
+    out = {}
+    for subset in ('ADDRESS', 'SEARCH', 'AUX'):
+        for kind in ('DATA', 'INDEX'):
+            tspace = getattr(config, f'TABLESPACE_{subset}_{kind}')
+            if tspace:
+                tspace = f'TABLESPACE "{tspace}"'
+            out[f'{subset.lower()}_{kind.lower()}'] = tspace
+
+    return out
+
+
+def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
+    """ Set up a dictionary with various optional Postgresql/Postgis features that
+        depend on the database version.
+    """
+    pg_version = conn.server_version_tuple()
+    postgis_version = conn.postgis_version_tuple()
+    pg11plus = pg_version >= (11, 0, 0)
+    ps3 = postgis_version >= (3, 0)
+    return {
+        'has_index_non_key_column': pg11plus,
+        'spgist_geom' : 'SPGIST' if pg11plus and ps3 else 'GIST'
+    }
+
+class SQLPreprocessor:
      """ A environment for preprocessing SQL files from the
          lib-sql directory.
  
      """ A environment for preprocessing SQL files from the
          lib-sql directory.
  
@@ -36,22 +75,34 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods
          and follows its syntax.
      """
  
          and follows its syntax.
      """
  
-    def __init__(self, conn, config, sqllib_dir):
+    def __init__(self, conn: Connection, config: Configuration) -> None:
          self.env = jinja2.Environment(autoescape=False,
          self.env = jinja2.Environment(autoescape=False,
-                                      loader=jinja2.FileSystemLoader(str(sqllib_dir)))
+                                      loader=jinja2.FileSystemLoader(str(config.lib_dir.sql)))
  
  
-        db_info = {}
+        db_info: Dict[str, Any] = {}
          db_info['partitions'] = _get_partitions(conn)
          db_info['tables'] = _get_tables(conn)
          db_info['reverse_only'] = 'search_name' not in db_info['tables']
          db_info['partitions'] = _get_partitions(conn)
          db_info['tables'] = _get_tables(conn)
          db_info['reverse_only'] = 'search_name' not in db_info['tables']
+        db_info['tablespace'] = _setup_tablespace_sql(config)
  
          self.env.globals['config'] = config
          self.env.globals['db'] = db_info
  
          self.env.globals['config'] = config
          self.env.globals['db'] = db_info
-        self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \
-                                         str((config.project_dir / 'module').resolve())
+        self.env.globals['postgres'] = _setup_postgresql_features(conn)
+
  
  
+    def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
+        """ Execute the given SQL template string on the connection.
+            The keyword arguments may supply additional parameters
+            for preprocessing.
+        """
+        sql = self.env.from_string(template).render(**kwargs)
  
  
-    def run_sql_file(self, conn, name, **kwargs):
+        with conn.cursor() as cur:
+            cur.execute(sql)
+        conn.commit()
+
+
+    def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
          """ Execute the given SQL file on the connection. The keyword arguments
              may supply additional parameters for preprocessing.
          """
          """ Execute the given SQL file on the connection. The keyword arguments
              may supply additional parameters for preprocessing.
          """
@@ -60,3 +111,21 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods
          with conn.cursor() as cur:
              cur.execute(sql)
          conn.commit()
          with conn.cursor() as cur:
              cur.execute(sql)
          conn.commit()
+
+
+    def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
+                              **kwargs: Any) -> None:
+        """ Execute the given SQL files using parallel asynchronous connections.
+            The keyword arguments may supply additional parameters for
+            preprocessing.
+
+            After preprocessing the SQL code is cut at lines containing only
+            '---'. Each chunk is sent to one of the `num_threads` workers.
+        """
+        sql = self.env.get_template(name).render(**kwargs)
+
+        parts = sql.split('\n---\n')
+
+        with WorkerPool(dsn, num_threads) as pool:
+            for part in parts:
+                pool.next_free_worker().perform(part)