]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tools/replication.py
Merge pull request #3344 from lonvia/osm2pgsql-new-middle
[nominatim.git] / nominatim / tools / replication.py
index d93335b8b7169bcf9746970f4919c0db76066917..edd63e49a15931d289b2fd488737ad8d105dc532 100644 (file)
@@ -18,7 +18,7 @@ import urllib.request as urlrequest
 
 import requests
 from nominatim.db import status
-from nominatim.db.connection import Connection
+from nominatim.db.connection import Connection, connect
 from nominatim.tools.exec_utils import run_osm2pgsql
 from nominatim.errors import UsageError
 
@@ -92,12 +92,14 @@ class UpdateState(Enum):
     NO_CHANGES = 3
 
 
-def update(conn: Connection, options: MutableMapping[str, Any],
+def update(dsn: str, options: MutableMapping[str, Any],
            socket_timeout: int = 60) -> UpdateState:
     """ Update database from the next batch of data. Returns the state of
         updates according to `UpdateState`.
     """
-    startdate, startseq, indexed = status.get_status(conn)
+    with connect(dsn) as conn:
+        startdate, startseq, indexed = status.get_status(conn)
+        conn.commit()
 
     if startseq is None:
         LOG.error("Replication not set up. "
@@ -130,19 +132,37 @@ def update(conn: Connection, options: MutableMapping[str, Any],
         if endseq is None:
             return UpdateState.NO_CHANGES
 
-        # Consume updates with osm2pgsql.
-        options['append'] = True
-        options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
-        run_osm2pgsql(options)
+        with connect(dsn) as conn:
+            run_osm2pgsql_updates(conn, options)
 
-        # Write the current status to the file
-        endstate = repl.get_state_info(endseq)
-        status.set_status(conn, endstate.timestamp if endstate else None,
-                          seq=endseq, indexed=False)
+            # Write the current status to the file
+            endstate = repl.get_state_info(endseq)
+            status.set_status(conn, endstate.timestamp if endstate else None,
+                              seq=endseq, indexed=False)
+            conn.commit()
 
     return UpdateState.UP_TO_DATE
 
 
+def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -> None:
+    """ Run osm2pgsql in append mode.
+    """
+    # Remove any stale deletion marks.
+    with conn.cursor() as cur:
+        cur.execute('TRUNCATE place_to_be_deleted')
+    conn.commit()
+
+    # Consume updates with osm2pgsql.
+    options['append'] = True
+    options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
+    run_osm2pgsql(options)
+
+    # Handle deletions
+    with conn.cursor() as cur:
+        cur.execute('SELECT flush_deleted_places()')
+    conn.commit()
+
+
 def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]:
     """ Returns a ReplicationServer in form of a context manager.
 
@@ -156,25 +176,25 @@ def _make_replication_server(url: str, timeout: int) -> ContextManager[Replicati
             """ Download a resource from the given URL and return a byte sequence
                 of the content.
             """
-            get_params = {
-                'headers': {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"},
-                'timeout': timeout or None,
-                'stream': True
-            }
+            headers = {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
 
             if self.session is not None:
-                return self.session.get(url.get_full_url(), **get_params)
+                return self.session.get(url.get_full_url(),
+                                       headers=headers, timeout=timeout or None,
+                                       stream=True)
 
             @contextmanager
             def _get_url_with_session() -> Iterator[requests.Response]:
                 with requests.Session() as session:
-                    request = session.get(url.get_full_url(), **get_params) # type: ignore
+                    request = session.get(url.get_full_url(),
+                                          headers=headers, timeout=timeout or None,
+                                          stream=True)
                     yield request
 
             return _get_url_with_session()
 
         repl = ReplicationServer(url)
-        repl.open_url = types.MethodType(patched_open_url, repl)
+        setattr(repl, 'open_url', types.MethodType(patched_open_url, repl))
 
         return cast(ContextManager[ReplicationServer], repl)