1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim.
4 # Copyright (C) 2021 by the Nominatim developer community.
5 # For a full list of authors see the git log.
6 """ Database helper functions for the indexer.
13 from psycopg2.extras import wait_select
15 # psycopg2 emits different exceptions pre and post 2.8. Detect if the new error
16 # module is available and adapt the error handling accordingly.
18 import psycopg2.errors # pylint: disable=no-name-in-module,import-error
19 __has_psycopg2_errors__ = True
21 __has_psycopg2_errors__ = False
23 LOG = logging.getLogger()
25 class DeadlockHandler:
26 """ Context manager that catches deadlock exceptions and calls
27 the given handler function. All other exceptions are passed on
31 def __init__(self, handler, ignore_sql_errors=False):
32 self.handler = handler
33 self.ignore_sql_errors = ignore_sql_errors
38 def __exit__(self, exc_type, exc_value, traceback):
39 if __has_psycopg2_errors__:
40 if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
43 elif exc_type == psycopg2.extensions.TransactionRollbackError \
44 and exc_value.pgcode == '40P01':
48 if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
49 LOG.info("SQL error ignored: %s", exc_value)
56 """ A single non-blocking database connection.
59 def __init__(self, dsn, cursor_factory=None, ignore_sql_errors=False):
60 self.current_query = None
61 self.current_params = None
63 self.ignore_sql_errors = ignore_sql_errors
67 self.connect(cursor_factory=cursor_factory)
70 """ Close all open connections. Does not wait for pending requests.
72 if self.conn is not None:
78 def connect(self, cursor_factory=None):
79 """ (Re)connect to the database. Creates an asynchronous connection
80 with JIT and parallel processing disabled. If a connection was
81 already open, it is closed and a new connection established.
82 The caller must ensure that no query is pending before reconnecting.
86 # Use a dict to hand in the parameters because async is a reserved
88 self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
91 self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
92 # Disable JIT and parallel workers as they are known to cause problems.
93 # Update pg_settings instead of using SET because it does not yield
94 # errors on older versions of Postgres where the settings are not
97 """ UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
98 UPDATE pg_settings SET setting = 0
99 WHERE name = 'max_parallel_workers_per_gather';""")
102 def _deadlock_handler(self):
103 LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
104 self.cursor.execute(self.current_query, self.current_params)
107 """ Block until any pending operation is done.
110 with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
111 wait_select(self.conn)
112 self.current_query = None
115 def perform(self, sql, args=None):
116 """ Send SQL query to the server. Returns immediately without
119 self.current_query = sql
120 self.current_params = args
121 self.cursor.execute(sql, args)
124 """ File descriptor to wait for. (Makes this class select()able.)
126 return self.conn.fileno()
129 """ Check if the connection is available for a new query.
131 Also checks if the previous query has run into a deadlock.
132 If so, then the previous query is repeated.
134 if self.current_query is None:
137 with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
138 if self.conn.poll() == psycopg2.extensions.POLL_OK:
139 self.current_query = None
146 """ A pool of asynchronous database connections.
148 The pool may be used as a context manager.
150 REOPEN_CONNECTIONS_AFTER = 100000
152 def __init__(self, dsn, pool_size, ignore_sql_errors=False):
153 self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
154 for _ in range(pool_size)]
155 self.free_workers = self._yield_free_worker()
159 def finish_all(self):
160 """ Wait for all connection to finish.
162 for thread in self.threads:
163 while not thread.is_done():
166 self.free_workers = self._yield_free_worker()
169 """ Close all connections and clear the pool.
171 for thread in self.threads:
174 self.free_workers = None
177 def next_free_worker(self):
178 """ Get the next free connection.
180 return next(self.free_workers)
183 def _yield_free_worker(self):
192 if command_stat > self.REOPEN_CONNECTIONS_AFTER:
193 self._reconnect_threads()
198 _, ready, _ = select.select([], self.threads, [])
199 self.wait_time += time.time() - tstart
202 def _reconnect_threads(self):
203 for thread in self.threads:
204 while not thread.is_done():
213 def __exit__(self, exc_type, exc_value, traceback):