]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/db/async_connection.py
convert word info column to json before copying
[nominatim.git] / nominatim / db / async_connection.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim.
4 # Copyright (C) 2021 by the Nominatim developer community.
5 # For a full list of authors see the git log.
6 """ Database helper functions for the indexer.
7 """
8 import logging
9 import select
10 import time
11
12 import psycopg2
13 from psycopg2.extras import wait_select
14
15 # psycopg2 emits different exceptions pre and post 2.8. Detect if the new error
16 # module is available and adapt the error handling accordingly.
17 try:
18     import psycopg2.errors # pylint: disable=no-name-in-module,import-error
19     __has_psycopg2_errors__ = True
20 except ImportError:
21     __has_psycopg2_errors__ = False
22
23 LOG = logging.getLogger()
24
25 class DeadlockHandler:
26     """ Context manager that catches deadlock exceptions and calls
27         the given handler function. All other exceptions are passed on
28         normally.
29     """
30
31     def __init__(self, handler, ignore_sql_errors=False):
32         self.handler = handler
33         self.ignore_sql_errors = ignore_sql_errors
34
35     def __enter__(self):
36         return self
37
38     def __exit__(self, exc_type, exc_value, traceback):
39         if __has_psycopg2_errors__:
40             if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101
41                 self.handler()
42                 return True
43         elif exc_type == psycopg2.extensions.TransactionRollbackError \
44              and exc_value.pgcode == '40P01':
45             self.handler()
46             return True
47
48         if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error):
49             LOG.info("SQL error ignored: %s", exc_value)
50             return True
51
52         return False
53
54
55 class DBConnection:
56     """ A single non-blocking database connection.
57     """
58
59     def __init__(self, dsn, cursor_factory=None, ignore_sql_errors=False):
60         self.current_query = None
61         self.current_params = None
62         self.dsn = dsn
63         self.ignore_sql_errors = ignore_sql_errors
64
65         self.conn = None
66         self.cursor = None
67         self.connect(cursor_factory=cursor_factory)
68
69     def close(self):
70         """ Close all open connections. Does not wait for pending requests.
71         """
72         if self.conn is not None:
73             self.cursor.close()
74             self.conn.close()
75
76         self.conn = None
77
78     def connect(self, cursor_factory=None):
79         """ (Re)connect to the database. Creates an asynchronous connection
80             with JIT and parallel processing disabled. If a connection was
81             already open, it is closed and a new connection established.
82             The caller must ensure that no query is pending before reconnecting.
83         """
84         self.close()
85
86         # Use a dict to hand in the parameters because async is a reserved
87         # word in Python3.
88         self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
89         self.wait()
90
91         self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
92         # Disable JIT and parallel workers as they are known to cause problems.
93         # Update pg_settings instead of using SET because it does not yield
94         # errors on older versions of Postgres where the settings are not
95         # implemented.
96         self.perform(
97             """ UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost';
98                 UPDATE pg_settings SET setting = 0
99                    WHERE name = 'max_parallel_workers_per_gather';""")
100         self.wait()
101
102     def _deadlock_handler(self):
103         LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params))
104         self.cursor.execute(self.current_query, self.current_params)
105
106     def wait(self):
107         """ Block until any pending operation is done.
108         """
109         while True:
110             with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
111                 wait_select(self.conn)
112                 self.current_query = None
113                 return
114
115     def perform(self, sql, args=None):
116         """ Send SQL query to the server. Returns immediately without
117             blocking.
118         """
119         self.current_query = sql
120         self.current_params = args
121         self.cursor.execute(sql, args)
122
123     def fileno(self):
124         """ File descriptor to wait for. (Makes this class select()able.)
125         """
126         return self.conn.fileno()
127
128     def is_done(self):
129         """ Check if the connection is available for a new query.
130
131             Also checks if the previous query has run into a deadlock.
132             If so, then the previous query is repeated.
133         """
134         if self.current_query is None:
135             return True
136
137         with DeadlockHandler(self._deadlock_handler, self.ignore_sql_errors):
138             if self.conn.poll() == psycopg2.extensions.POLL_OK:
139                 self.current_query = None
140                 return True
141
142         return False
143
144
145 class WorkerPool:
146     """ A pool of asynchronous database connections.
147
148         The pool may be used as a context manager.
149     """
150     REOPEN_CONNECTIONS_AFTER = 100000
151
152     def __init__(self, dsn, pool_size, ignore_sql_errors=False):
153         self.threads = [DBConnection(dsn, ignore_sql_errors=ignore_sql_errors)
154                         for _ in range(pool_size)]
155         self.free_workers = self._yield_free_worker()
156         self.wait_time = 0
157
158
159     def finish_all(self):
160         """ Wait for all connection to finish.
161         """
162         for thread in self.threads:
163             while not thread.is_done():
164                 thread.wait()
165
166         self.free_workers = self._yield_free_worker()
167
168     def close(self):
169         """ Close all connections and clear the pool.
170         """
171         for thread in self.threads:
172             thread.close()
173         self.threads = []
174         self.free_workers = None
175
176
177     def next_free_worker(self):
178         """ Get the next free connection.
179         """
180         return next(self.free_workers)
181
182
183     def _yield_free_worker(self):
184         ready = self.threads
185         command_stat = 0
186         while True:
187             for thread in ready:
188                 if thread.is_done():
189                     command_stat += 1
190                     yield thread
191
192             if command_stat > self.REOPEN_CONNECTIONS_AFTER:
193                 self._reconnect_threads()
194                 ready = self.threads
195                 command_stat = 0
196             else:
197                 tstart = time.time()
198                 _, ready, _ = select.select([], self.threads, [])
199                 self.wait_time += time.time() - tstart
200
201
202     def _reconnect_threads(self):
203         for thread in self.threads:
204             while not thread.is_done():
205                 thread.wait()
206             thread.connect()
207
208
209     def __enter__(self):
210         return self
211
212
213     def __exit__(self, exc_type, exc_value, traceback):
214         self.finish_all()
215         self.close()