import requests
from nominatim.db import status
-from nominatim.db.connection import Connection
+from nominatim.db.connection import Connection, connect
from nominatim.tools.exec_utils import run_osm2pgsql
from nominatim.errors import UsageError
LOG = logging.getLogger()
-def init_replication(conn: Connection, base_url: str) -> None:
+def init_replication(conn: Connection, base_url: str,
+ socket_timeout: int = 60) -> None:
""" Set up replication for the server at the given base URL.
"""
LOG.info("Using replication source: %s", base_url)
# margin of error to make sure we get all data
date -= dt.timedelta(hours=3)
- repl = ReplicationServer(base_url)
-
- seq = repl.timestamp_to_sequence(date)
+ with _make_replication_server(base_url, socket_timeout) as repl:
+ seq = repl.timestamp_to_sequence(date)
if seq is None:
LOG.fatal("Cannot reach the configured replication service '%s'.\n"
LOG.warning("Updates initialised at sequence %s (%s)", seq, date)
-def check_for_updates(conn: Connection, base_url: str) -> int:
+def check_for_updates(conn: Connection, base_url: str,
+ socket_timeout: int = 60) -> int:
""" Check if new data is available from the replication service at the
given base URL.
"""
"Please run 'nominatim replication --init' first.")
return 254
- state = ReplicationServer(base_url).get_state_info()
+ with _make_replication_server(base_url, socket_timeout) as repl:
+ state = repl.get_state_info()
if state is None:
LOG.error("Cannot get state for URL %s.", base_url)
NO_CHANGES = 3
-def update(conn: Connection, options: MutableMapping[str, Any],
+def update(dsn: str, options: MutableMapping[str, Any],
socket_timeout: int = 60) -> UpdateState:
""" Update database from the next batch of data. Returns the state of
updates according to `UpdateState`.
"""
- startdate, startseq, indexed = status.get_status(conn)
+ with connect(dsn) as conn:
+ startdate, startseq, indexed = status.get_status(conn)
+ conn.commit()
if startseq is None:
LOG.error("Replication not set up. "
if endseq is None:
return UpdateState.NO_CHANGES
- # Consume updates with osm2pgsql.
- options['append'] = True
- options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
- run_osm2pgsql(options)
+ with connect(dsn) as conn:
+ run_osm2pgsql_updates(conn, options)
- # Write the current status to the file
- endstate = repl.get_state_info(endseq)
- status.set_status(conn, endstate.timestamp if endstate else None,
- seq=endseq, indexed=False)
+ # Write the current status to the file
+ endstate = repl.get_state_info(endseq)
+ status.set_status(conn, endstate.timestamp if endstate else None,
+ seq=endseq, indexed=False)
+ conn.commit()
return UpdateState.UP_TO_DATE
+def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -> None:
+ """ Run osm2pgsql in append mode.
+ """
+ # Remove any stale deletion marks.
+ with conn.cursor() as cur:
+ cur.execute('TRUNCATE place_to_be_deleted')
+ conn.commit()
+
+ # Consume updates with osm2pgsql.
+ options['append'] = True
+ options['disable_jit'] = conn.server_version_tuple() >= (11, 0)
+ run_osm2pgsql(options)
+
+ # Handle deletions
+ with conn.cursor() as cur:
+ cur.execute('SELECT flush_deleted_places()')
+ conn.commit()
+
+
def _make_replication_server(url: str, timeout: int) -> ContextManager[ReplicationServer]:
""" Returns a ReplicationServer in form of a context manager.
""" Download a resource from the given URL and return a byte sequence
of the content.
"""
- get_params = {
- 'headers': {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"},
- 'timeout': timeout or None,
- 'stream': True
- }
+ headers = {"User-Agent" : f"Nominatim (pyosmium/{pyo_version.pyosmium_release})"}
if self.session is not None:
- return self.session.get(url.get_full_url(), **get_params)
+ return self.session.get(url.get_full_url(),
+ headers=headers, timeout=timeout or None,
+ stream=True)
@contextmanager
def _get_url_with_session() -> Iterator[requests.Response]:
with requests.Session() as session:
- request = session.get(url.get_full_url(), **get_params) # type: ignore
+ request = session.get(url.get_full_url(),
+ headers=headers, timeout=timeout or None,
+ stream=True)
yield request
return _get_url_with_session()
repl = ReplicationServer(url)
- repl.open_url = types.MethodType(patched_open_url, repl)
+ setattr(repl, 'open_url', types.MethodType(patched_open_url, repl))
return cast(ContextManager[ReplicationServer], repl)