if ($aResult['init-updates']) {
- // sanity check that the replication URL is correct
- $sBaseState = file_get_contents($sBaseURL.'/state.txt');
- if ($sBaseState === false) {
- echo "\nCannot find state.txt file at the configured replication URL.\n";
- echo "Does the URL point to a directory containing OSM update data?\n\n";
- fail('replication URL not reachable.');
- }
- // sanity check for pyosmium-get-changes
- if (!$sPyosmiumBin) {
- echo "\nNOMINATIM_PYOSMIUM_BINARY not configured.\n";
- echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
- echo "in your local .env file.\n\n";
- fail('NOMINATIM_PYOSMIUM_BINARY not configured');
- }
-
- $aOutput = 0;
- $oCMD = new \Nominatim\Shell($sPyosmiumBin, '--help');
- exec($oCMD->escapedCmd(), $aOutput, $iRet);
-
- if ($iRet != 0) {
- echo "Cannot execute pyosmium-get-changes.\n";
- echo "Make sure you have pyosmium installed correctly\n";
- echo "and have set up NOMINATIM_PYOSMIUM_BINARY to point to pyosmium-get-changes.\n";
- fail('pyosmium-get-changes not found or not usable');
- }
-
- if (!$aResult['no-update-functions']) {
- (clone($oNominatimCmd))->addParams('refresh', '--functions')->run();
- }
-
- $sDatabaseDate = getDatabaseDate($oDB);
- if (!$sDatabaseDate) {
- fail('Cannot determine date of database.');
- }
- $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
-
- // get the appropriate state id
- $aOutput = 0;
- $oCMD = (new \Nominatim\Shell($sPyosmiumBin))
- ->addParams('--start-date', $sWindBack)
- ->addParams('--server', $sBaseURL);
-
- exec($oCMD->escapedCmd(), $aOutput, $iRet);
- if ($iRet != 0 || $aOutput[0] == 'None') {
- fail('Error running pyosmium tools');
- }
-
- $oDB->exec('TRUNCATE import_status');
- $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
- $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
+ $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
- try {
- $oDB->exec($sSQL);
- } catch (\Nominatim\DatabaseError $e) {
- fail('Could not enter sequence into database.');
+ if ($aResult['no-update-functions']) {
+ $oCmd->addParams('--no-update-functions');
}
- echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
+ $oCmd->run();
}
if ($aResult['check-for-updates']) {
@staticmethod
def run(args):
+ try:
+ import osmium # pylint: disable=W0611
+ except ModuleNotFoundError:
+ LOG.fatal("pyosmium not installed. Replication functions not available.\n"
+ "To install pyosmium via pip: pip3 install osmium")
+ return 1
+
+ from .tools import replication, refresh
+
+ conn = connect(args.config.get_libpq_dsn())
+
params = ['update.php']
if args.init:
- params.append('--init-updates')
- if not args.update_functions:
- params.append('--no-update-functions')
- elif args.check_for_updates:
+ LOG.warning("Initialising replication updates")
+ replication.init_replication(conn, args.config.REPLICATION_URL)
+ if args.update_functions:
+ LOG.warning("Create functions")
+ refresh.create_functions(conn, args.config, args.data_dir,
+ True, False)
+ conn.close()
+ return 0
+
+ if args.check_for_updates:
params.append('--check-for-updates')
else:
if args.once:
--- /dev/null
+"""
+Specialised connection and cursor functions.
+"""
+import logging
+
+import psycopg2
+import psycopg2.extensions
+import psycopg2.extras
+
+class _Cursor(psycopg2.extras.DictCursor):
+ """ A cursor returning dict-like objects and providing specialised
+ execution functions.
+ """
+
+ def execute(self, query, args=None): # pylint: disable=W0221
+ """ Query execution that logs the SQL query when debugging is enabled.
+ """
+ logger = logging.getLogger()
+ logger.debug(self.mogrify(query, args).decode('utf-8'))
+
+ super().execute(query, args)
+
+ def scalar(self, sql, args=None):
+ """ Execute query that returns a single value. The value is returned.
+ If the query yields more than one row, a ValueError is raised.
+ """
+ self.execute(sql, args)
+
+ if self.rowcount != 1:
+ raise ValueError("Query did not return a single row.")
+
+ return self.fetchone()[0]
+
+
+class _Connection(psycopg2.extensions.connection):
+ """ A connection that provides the specialised cursor by default and
+ adds convenience functions for administrating the database.
+ """
+
+ def cursor(self, cursor_factory=_Cursor, **kwargs):
+ """ Return a new cursor. By default the specialised cursor is returned.
+ """
+ return super().cursor(cursor_factory=cursor_factory, **kwargs)
+
+ def table_exists(self, table):
+ """ Check that a table with the given name exists in the database.
+ """
+ with self.cursor() as cur:
+ num = cur.scalar("""SELECT count(*) FROM pg_tables
+ WHERE tablename = %s""", (table, ))
+ return num == 1
+
+
+def connect(dsn):
+ """ Open a connection to the database using the specialised connection
+ factory.
+ """
+ return psycopg2.connect(dsn, connection_factory=_Connection)
--- /dev/null
+"""
+Access and helper functions for the status table.
+"""
+import datetime as dt
+import logging
+import re
+
+from ..tools.exec_utils import get_url
+
+LOG = logging.getLogger()
+
+def compute_database_date(conn):
+ """ Determine the date of the database from the newest object in the
+ data base.
+ """
+ # First, find the node with the highest ID in the database
+ with conn.cursor() as cur:
+ osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
+
+ if osmid is None:
+ LOG.fatal("No data found in the database.")
+ raise RuntimeError("No data found in the database.")
+
+ LOG.info("Using node id %d for timestamp lookup", osmid)
+ # Get the node from the API to find the timestamp when it was created.
+ node_url = 'https://www.openstreetmap.org/api/0.6/node/{}/1'.format(osmid)
+ data = get_url(node_url)
+
+ match = re.search(r'timestamp="((\d{4})-(\d{2})-(\d{2})T(\d{2}):(\d{2}):(\d{2}))Z"', data)
+
+ if match is None:
+ LOG.fatal("The node data downloaded from the API does not contain valid data.\n"
+ "URL used: %s", node_url)
+ raise RuntimeError("Bad API data.")
+
+ LOG.debug("Found timestamp %s", match[1])
+
+ return dt.datetime.fromisoformat(match[1]).replace(tzinfo=dt.timezone.utc)
+
+
+def set_status(conn, date, seq=None, indexed=True):
+ """ Replace the current status with the given status.
+ """
+ assert date.tzinfo == dt.timezone.utc
+ with conn.cursor() as cur:
+ cur.execute("TRUNCATE TABLE import_status")
+ cur.execute("""INSERT INTO import_status (lastimportdate, sequence_id, indexed)
+ VALUES (%s, %s, %s)""", (date, seq, indexed))
+
+ conn.commit()
"""
import logging
import subprocess
+import urllib.request as urlrequest
from urllib.parse import urlencode
+from ..version import NOMINATIM_VERSION
+
+LOG = logging.getLogger()
+
def run_legacy_script(script, *args, nominatim_env=None, throw_on_fail=False):
""" Run a Nominatim PHP script with the given arguments.
print(result[content_start + 4:].replace('\\n', '\n'))
return 0
+
+
+def get_url(url):
+ """ Get the contents from the given URL and return it as a UTF-8 string.
+ """
+ headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION}
+
+ try:
+ with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
+ return response.read().decode('utf-8')
+ except:
+ LOG.fatal('Failed to load URL: %s', url)
+ raise
--- /dev/null
+"""
+Functions for updating a database from a replication source.
+"""
+import datetime
+import logging
+
+from osmium.replication.server import ReplicationServer
+
+from ..db import status
+
+LOG = logging.getLogger()
+
+def init_replication(conn, base_url):
+ """ Set up replication for the server at the given base URL.
+ """
+ LOG.info("Using replication source: %s", base_url)
+ date = status.compute_database_date(conn)
+
+ # margin of error to make sure we get all data
+ date -= datetime.timedelta(hours=3)
+
+ repl = ReplicationServer(base_url)
+
+ seq = repl.timestamp_to_sequence(date)
+
+ if seq is None:
+ LOG.fatal("Cannot reach the configured replication service '%s'.\n"
+ "Does the URL point to a directory containing OSM update data?",
+ base_url)
+ raise RuntimeError("Failed to reach replication service")
+
+ status.set_status(conn, date=date, seq=seq)
+
+ LOG.warning("Updates intialised at sequence %s (%s)", seq, date)
--- /dev/null
+"""
+Version information for Nominatim.
+"""
+
+NOMINATIM_VERSION = "3.6.0"
NOMINATIM_HTTP_PROXY_PORT=3128
NOMINATIM_HTTP_PROXY_LOGIN=
NOMINATIM_HTTP_PROXY_PASSWORD=
+# Also set these standard environment variables.
+# HTTP_PROXY="http://user:pass@10.10.1.10:1080"
+# HTTPS_PROXY="http://user:pass@10.10.1.10:1080"
# Location of the osm2pgsql binary.
# When empty, osm2pgsql is expected to reside in the osm2pgsql directory in
drop table if exists import_status;
CREATE TABLE import_status (
- lastimportdate timestamp NOT NULL,
+ lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
);
+import itertools
import sys
from pathlib import Path
sys.path.insert(0, str(SRC_DIR.resolve()))
from nominatim.config import Configuration
+from nominatim.db import connection
class _TestingCursor(psycopg2.extras.DictCursor):
""" Extension to the DictCursor class that provides execution
exported into NOMINATIM_DATABASE_DSN.
"""
name = 'test_nominatim_python_unittest'
- with psycopg2.connect(database='postgres') as conn:
- conn.set_isolation_level(0)
- with conn.cursor() as cur:
- cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
- cur.execute('CREATE DATABASE {}'.format(name))
+ conn = psycopg2.connect(database='postgres')
+
+ conn.set_isolation_level(0)
+ with conn.cursor() as cur:
+ cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+ cur.execute('CREATE DATABASE {}'.format(name))
+
+ conn.close()
monkeypatch.setenv('NOMINATIM_DATABASE_DSN' , 'dbname=' + name)
yield name
- with psycopg2.connect(database='postgres') as conn:
- conn.set_isolation_level(0)
- with conn.cursor() as cur:
- cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+ conn = psycopg2.connect(database='postgres')
+ conn.set_isolation_level(0)
+ with conn.cursor() as cur:
+ cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
+
+ conn.close()
+
+@pytest.fixture
+def temp_db_with_extensions(temp_db):
+ conn = psycopg2.connect(database=temp_db)
+ with conn.cursor() as cur:
+ cur.execute('CREATE EXTENSION hstore; CREATE EXTENSION postgis;')
+ conn.commit()
+ conn.close()
+
+ return temp_db
@pytest.fixture
def temp_db_conn(temp_db):
""" Connection to the test database.
"""
- conn = psycopg2.connect(database=temp_db)
+ conn = connection.connect('dbname=' + temp_db)
yield conn
conn.close()
@pytest.fixture
def def_config():
return Configuration(None, SRC_DIR.resolve() / 'settings')
+
+
+@pytest.fixture
+def status_table(temp_db_conn):
+ """ Create an empty version of the status table.
+ """
+ with temp_db_conn.cursor() as cur:
+ cur.execute("""CREATE TABLE import_status (
+ lastimportdate timestamp with time zone NOT NULL,
+ sequence_id integer,
+ indexed boolean
+ )""")
+ temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_table(temp_db_with_extensions, temp_db_conn):
+ """ Create an empty version of the place table.
+ """
+ with temp_db_conn.cursor() as cur:
+ cur.execute("""CREATE TABLE place (
+ osm_id int8 NOT NULL,
+ osm_type char(1) NOT NULL,
+ class text NOT NULL,
+ type text NOT NULL,
+ name hstore,
+ admin_level smallint,
+ address hstore,
+ extratags hstore,
+ geometry Geometry(Geometry,4326) NOT NULL)""")
+ temp_db_conn.commit()
+
+
+@pytest.fixture
+def place_row(place_table, temp_db_cursor):
+ """ A factory for rows in the place table. The table is created as a
+ prerequisite to the fixture.
+ """
+ idseq = itertools.count(1001)
+ def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
+ admin_level=None, address=None, extratags=None, geom=None):
+ temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
+ (osm_id or next(idseq), osm_type, cls, typ, names,
+ admin_level, address, extratags,
+ geom or 'SRID=4326;POINT(0 0 )'))
+
+ return _insert
import nominatim.cli
import nominatim.indexer.indexer
import nominatim.tools.refresh
+import nominatim.tools.replication
def call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='build/module',
(('import', '--continue', 'load-data'), 'setup'),
(('freeze',), 'setup'),
(('special-phrases',), 'specialphrases'),
- (('replication',), 'update'),
(('add-data', '--tiger-data', 'tiger'), 'setup'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('check-database',), 'check_import_finished'),
('importance', ('update.php', '--recompute-importance')),
('website', ('setup.php', '--setup-website')),
])
-def test_refresh_legacy_command(mock_run_legacy, command, params):
+def test_refresh_legacy_command(mock_run_legacy, temp_db, command, params):
assert 0 == call_nominatim('refresh', '--' + command)
assert mock_run_legacy.called == 1
('address-levels', 'load_address_levels_from_file'),
('functions', 'create_functions'),
])
-def test_refresh_command(monkeypatch, command, func):
+def test_refresh_command(monkeypatch, temp_db, command, func):
func_mock = MockParamCapture()
monkeypatch.setattr(nominatim.tools.refresh, func, func_mock)
assert func_mock.called == 1
-def test_refresh_importance_computed_after_wiki_import(mock_run_legacy):
+
+def test_refresh_importance_computed_after_wiki_import(mock_run_legacy, temp_db):
assert 0 == call_nominatim('refresh', '--importance', '--wiki-data')
assert mock_run_legacy.called == 2
assert mock_run_legacy.last_args == ('update.php', '--recompute-importance')
+
+@pytest.mark.parametrize("params,func", [
+ (('--init', '--no-update-functions'), 'init_replication')
+ ])
+def test_replication_command(monkeypatch, temp_db, params, func):
+ func_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.tools.replication, func, func_mock)
+
+ assert 0 == call_nominatim('replication', *params)
+
+ assert func_mock.called == 1
+
+
@pytest.mark.parametrize("params", [
('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),
--- /dev/null
+"""
+Tests for status table manipulation.
+"""
+import datetime as dt
+
+import pytest
+
+import nominatim.db.status
+
+def test_compute_database_date_place_empty(status_table, place_table, temp_db_conn):
+ with pytest.raises(RuntimeError):
+ nominatim.db.status.compute_database_date(temp_db_conn)
+
+OSM_NODE_DATA = """\
+<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
+<node id="45673" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
+</node>
+</osm>
+"""
+
+def test_compute_database_date_valid(monkeypatch, status_table, place_row, temp_db_conn):
+ place_row(osm_type='N', osm_id=45673)
+
+ requested_url = []
+ def mock_url(url):
+ requested_url.append(url)
+ return OSM_NODE_DATA
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
+
+ date = nominatim.db.status.compute_database_date(temp_db_conn)
+
+ assert requested_url == ['https://www.openstreetmap.org/api/0.6/node/45673/1']
+ assert date == dt.datetime.fromisoformat('2006-01-27T22:09:10').replace(tzinfo=dt.timezone.utc)
+
+
+def test_compute_database_broken_api(monkeypatch, status_table, place_row, temp_db_conn):
+ place_row(osm_type='N', osm_id=45673)
+
+ requested_url = []
+ def mock_url(url):
+ requested_url.append(url)
+ return '<osm version="0.6" generator="OpenStre'
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", mock_url)
+
+ with pytest.raises(RuntimeError):
+ date = nominatim.db.status.compute_database_date(temp_db_conn)
+
+
+def test_set_status_empty_table(status_table, temp_db_conn, temp_db_cursor):
+ date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+ nominatim.db.status.set_status(temp_db_conn, date=date)
+
+ temp_db_cursor.execute("SELECT * FROM import_status")
+
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone() == [date, None, True]
+
+
+def test_set_status_filled_table(status_table, temp_db_conn, temp_db_cursor):
+ date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+ nominatim.db.status.set_status(temp_db_conn, date=date)
+
+ assert 1 == temp_db_cursor.scalar("SELECT count(*) FROM import_status")
+
+
+ date = dt.datetime.fromordinal(1000100).replace(tzinfo=dt.timezone.utc)
+ nominatim.db.status.set_status(temp_db_conn, date=date, seq=456, indexed=False)
+
+ temp_db_cursor.execute("SELECT * FROM import_status")
+
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone() == [date, 456, False]
--- /dev/null
+"""
+Tests for replication functionality.
+"""
+import datetime as dt
+
+import pytest
+
+import nominatim.tools.replication
+
+OSM_NODE_DATA = """\
+<osm version="0.6" generator="OpenStreetMap server" copyright="OpenStreetMap and contributors" attribution="http://www.openstreetmap.org/copyright" license="http://opendatacommons.org/licenses/odbl/1-0/">
+<node id="100" visible="true" version="1" changeset="2047" timestamp="2006-01-27T22:09:10Z" user="Foo" uid="111" lat="48.7586670" lon="8.1343060">
+</node>
+</osm>
+"""
+
+
+def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+ place_row(osm_type='N', osm_id=100)
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+
+ with pytest.raises(RuntimeError, match="Failed to reach replication service"):
+ nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+
+def test_init_replication_success(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
+ place_row(osm_type='N', osm_id=100)
+
+ monkeypatch.setattr(nominatim.db.status, "get_url", lambda u : OSM_NODE_DATA)
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "timestamp_to_sequence",
+ lambda self, date: 234)
+
+ nominatim.tools.replication.init_replication(temp_db_conn, 'https://test.io')
+
+ temp_db_cursor.execute("SELECT * FROM import_status")
+
+ expected_date = dt.datetime.fromisoformat('2006-01-27T19:09:10').replace(tzinfo=dt.timezone.utc)
+ assert temp_db_cursor.rowcount == 1
+ assert temp_db_cursor.fetchone() == [expected_date, 234, True]