]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/db/async_connection.py
Vagrant and CI tests for Ubuntu 22.04
[nominatim.git] / nominatim / db / async_connection.py
index c5d6872bf0790abfaa10f7b07d2567ba0913663d..285463a50ffd9d7eee8efd1fa931d0bdcc4254f7 100644 (file)
@@ -1,11 +1,15 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
-# This file is part of Nominatim.
-# Copyright (C) 2021 by the Nominatim developer community.
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """ Database helper functions for the indexer.
 """
 import logging
+import select
+import time
+
 import psycopg2
 from psycopg2.extras import wait_select
 
@@ -14,7 +18,7 @@ from psycopg2.extras import wait_select
 try:
     import psycopg2.errors # pylint: disable=no-name-in-module,import-error
     __has_psycopg2_errors__ = True
-except ModuleNotFoundError:
+except ImportError:
     __has_psycopg2_errors__ = False
 
 LOG = logging.getLogger()
@@ -25,22 +29,27 @@ class DeadlockHandler:
         normally.
     """
 
-    def __init__(self, handler):
+    def __init__(self, handler, ignore_sql_errors=False):
         self.handler = handler
+        self.ignore_sql_errors = ignore_sql_errors
 
     def __enter__(self):
-        pass
+        return self
 
     def __exit__(self, exc_type, exc_value, traceback):
         if __has_psycopg2_errors__:
             if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
                 self.handler()
                 return True
-        else:
-            if exc_type == psycopg2.extensions.TransactionRollbackError:
-                if exc_value.pgcode == '40P01':
-                    self.handler()
-                    return True
+        elif exc_type == psycopg2.extensions.TransactionRollbackError \
+             and exc_value.pgcode == '40P01':
+            self.handler()
+            return True
+
+        if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
+            LOG.info("SQL error ignored: %s", exc_value)
+            return True
+
         return False
 
 
@@ -48,14 +57,15 @@ class DBConnection:
     """ A single non-blocking database connection.
     """
 
-    def __init__(self, dsn):
+    def __init__(self, dsn, cursor_factory=None, ignore_sql_errors=False):
         self.current_query = None
         self.current_params = None
         self.dsn = dsn
+        self.ignore_sql_errors = ignore_sql_errors
 
         self.conn = None
         self.cursor = None
-        self.connect()
+        self.connect(cursor_factory=cursor_factory)
 
     def close(self):
         """ Close all open connections. Does not wait for pending requests.
@@ -66,7 +76,7 @@ class DBConnection:
 
         self.conn = None
 
-    def connect(self):
+    def connect(self, cursor_factory=None):
         """ (Re)connect to the database. Creates an asynchronous connection
             with JIT and parallel processing disabled. If a connection was
             already open, it is closed and a new connection established.
@@ -76,10 +86,10 @@ class DBConnection:
 
         # Use a dict to hand in the parameters because async is a reserved
         # word in Python3.
-        self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
+        self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
         self.wait()
 
-        self.cursor = self.conn.cursor()
+        self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
         # Disable JIT and parallel workers as they are known to cause problems.
         # Update pg_settings instead of using SET because it does not yield
         # errors on older versions of Postgres where the settings are not
@@ -98,7 +108,7 @@ class DBConnection:
         """ Block until any pending operation is done.
         """
         while True:
-            with DeadlockHandler(self._deadlock_handler):
+            with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
                 wait_select(self.conn)
                 self.current_query = None
                 return
@@ -125,9 +135,82 @@ class DBConnection:
         if self.current_query is None:
             return True
 
-        with DeadlockHandler(self._deadlock_handler):
+        with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
             if self.conn.poll() == psycopg2.extensions.POLL_OK:
                 self.current_query = None
                 return True
 
         return False
+
+
+class WorkerPool:
+    """ A pool of asynchronous database connections.
+
+        The pool may be used as a context manager.
+    """
+    REOPEN_CONNECTIONS_AFTER = 100000
+
+    def __init__(self, dsn, pool_size, ignore_sql_errors=False):
+        self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
+                        for _ in range(pool_size)]
+        self.free_workers = self._yield_free_worker()
+        self.wait_time = 0
+
+
+    def finish_all(self):
+        """ Wait for all connection to finish.
+        """
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
+
+        self.free_workers = self._yield_free_worker()
+
+    def close(self):
+        """ Close all connections and clear the pool.
+        """
+        for thread in self.threads:
+            thread.close()
+        self.threads = []
+        self.free_workers = None
+
+
+    def next_free_worker(self):
+        """ Get the next free connection.
+        """
+        return next(self.free_workers)
+
+
+    def _yield_free_worker(self):
+        ready = self.threads
+        command_stat = 0
+        while True:
+            for thread in ready:
+                if thread.is_done():
+                    command_stat += 1
+                    yield thread
+
+            if command_stat > self.REOPEN_CONNECTIONS_AFTER:
+                self._reconnect_threads()
+                ready = self.threads
+                command_stat = 0
+            else:
+                tstart = time.time()
+                _, ready, _ = select.select([], self.threads, [])
+                self.wait_time += time.time() - tstart
+
+
+    def _reconnect_threads(self):
+        for thread in self.threads:
+            while not thread.is_done():
+                thread.wait()
+            thread.connect()
+
+
+    def __enter__(self):
+        return self
+
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.finish_all()
+        self.close()