$oNominatimCmd->addParams('--verbose');
}
-$sPyosmiumBin = getSetting('PYOSMIUM_BINARY');
-$sBaseURL = getSetting('REPLICATION_URL');
-
if ($aResult['init-updates']) {
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
}
if ($aResult['index']) {
- (clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run();
+ (clone $oNominatimCmd)
+ ->addParams('index', '--minrank', $aResult['index-rank'])
+ ->addParams('--threads', $aResult['index-instances'])
+ ->run();
}
if ($aResult['update-address-levels']) {
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
- //
- if (strpos($sBaseURL, 'download.geofabrik.de') !== false && getSetting('REPLICATION_UPDATE_INTERVAL') < 86400) {
- fail('Error: Update interval too low for download.geofabrik.de. ' .
- "Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
+ $oCmd = (clone($oNominatimCmd))
+ ->addParams('replication')
+ ->addParams('--threads', $aResult['index-instances']);
+
+ if (!$aResult['import-osmosis-all']) {
+ $oCmd->addParams('--once');
}
- $sImportFile = CONST_InstallDir.'/osmosischange.osc';
-
- $oCMDDownload = (new \Nominatim\Shell($sPyosmiumBin))
- ->addParams('--server', $sBaseURL)
- ->addParams('--outfile', $sImportFile)
- ->addParams('--size', getSetting('REPLICATION_MAX_DIFF'));
-
- $oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
-
- while (true) {
- $fStartTime = time();
- $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
-
- if (!$aLastState['sequence_id']) {
- echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
- exit(1);
- }
-
- echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
-
- $sBatchEnd = $aLastState['lastimportdate'];
- $iEndSequence = $aLastState['sequence_id'];
-
- if ($aLastState['indexed']) {
- // Sleep if the update interval has not yet been reached.
- $fNextUpdate = $aLastState['unix_ts'] + getSetting('REPLICATION_UPDATE_INTERVAL');
- if ($fNextUpdate > $fStartTime) {
- $iSleepTime = $fNextUpdate - $fStartTime;
- echo "Waiting for next update for $iSleepTime sec.";
- sleep($iSleepTime);
- }
-
- // Download the next batch of changes.
- do {
- $fCMDStartTime = time();
- $iNextSeq = (int) $aLastState['sequence_id'];
- unset($aOutput);
-
- $oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
- echo $oCMD->escapedCmd()."\n";
- if (file_exists($sImportFile)) {
- unlink($sImportFile);
- }
- exec($oCMD->escapedCmd(), $aOutput, $iResult);
-
- if ($iResult == 3) {
- $sSleep = getSetting('REPLICATION_RECHECK_INTERVAL');
- echo 'No new updates. Sleeping for '.$sSleep." sec.\n";
- sleep($sSleep);
- } elseif ($iResult != 0) {
- echo 'ERROR: updates failed.';
- exit($iResult);
- } else {
- $iEndSequence = (int)$aOutput[0];
- }
- } while ($iResult);
-
- // get the newest object from the diff file
- $sBatchEnd = 0;
- $iRet = 0;
- $oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
- exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
- if ($iRet == 5) {
- echo "Diff file is empty. skipping import.\n";
- if (!$aResult['import-osmosis-all']) {
- exit(0);
- } else {
- continue;
- }
- }
- if ($iRet != 0) {
- fail('Error getting date from diff file.');
- }
- $sBatchEnd = $sBatchEnd[0];
-
- // Import the file
- $fCMDStartTime = time();
-
-
- echo $oCMDImport->escapedCmd()."\n";
- unset($sJunk);
- $iErrorLevel = $oCMDImport->run();
- if ($iErrorLevel) {
- echo "Error executing osm2pgsql: $iErrorLevel\n";
- exit($iErrorLevel);
- }
-
- // write the update logs
- $iFileSize = filesize($sImportFile);
- $sSQL = 'INSERT INTO import_osmosis_log';
- $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
- $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
- $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
- $sSQL .= date('Y-m-d H:i:s')."','import')";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
-
- // update the status
- $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
- echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
- }
-
- // Index file
- if (!$aResult['no-index']) {
- $fCMDStartTime = time();
-
- $oThisIndexCmd = clone($oNominatimCmd);
- $oThisIndexCmd->addParams('index');
- echo $oThisIndexCmd->escapedCmd()."\n";
- $iErrorLevel = $oThisIndexCmd->run();
- if ($iErrorLevel) {
- echo "Error: $iErrorLevel\n";
- exit($iErrorLevel);
- }
-
- $sSQL = 'INSERT INTO import_osmosis_log';
- $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
- $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
- $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
- $sSQL .= date('Y-m-d H:i:s')."','index')";
- var_Dump($sSQL);
- $oDB->exec($sSQL);
- echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
- } else {
- if ($aResult['import-osmosis-all']) {
- echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
- exit(1);
- }
- }
-
- $fDuration = time() - $fStartTime;
- echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
- if (!$aResult['import-osmosis-all']) exit(0);
+ if ($aResult['no-index']) {
+ $oCmd->addParams('--no-index');
}
+
+ exit($oCmd->run());
}
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
-import sys
+import datetime as dt
import os
+import sys
+import time
import argparse
import logging
from pathlib import Path
from .config import Configuration
from .tools.exec_utils import run_legacy_script, run_api_script
from .db.connection import connect
+from .db import status
LOG = logging.getLogger()
return args.command.run(args)
+
+def _osm2pgsql_options_from_args(args, default_cache, default_threads):
+ """ Set up the stanadrd osm2pgsql from the command line arguments.
+ """
+ return dict(osm2pgsql=args.osm2pgsql_path,
+ osm2pgsql_cache=args.osm2pgsql_cache or default_cache,
+ osm2pgsql_style=args.config.get_import_style_file(),
+ threads=args.threads or default_threads,
+ dsn=args.config.get_libpq_dsn(),
+ flatnode_file=args.config.FLATNODE_FILE)
+
##### Subcommand classes
#
# Each class needs to implement two functions: add_args() adds the CLI parameters
group.add_argument('--no-index', action='store_false', dest='do_index',
help="""Do not index the new data. Only applicable
together with --once""")
+ group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+ help='Size of cache to be used by osm2pgsql (in MB)')
+
+ @staticmethod
+ def _init_replication(args):
+ from .tools import replication, refresh
+
+ LOG.warning("Initialising replication updates")
+ conn = connect(args.config.get_libpq_dsn())
+ replication.init_replication(conn, base_url=args.config.REPLICATION_URL)
+ if args.update_functions:
+ LOG.warning("Create functions")
+ refresh.create_functions(conn, args.config, args.data_dir,
+ True, False)
+ conn.close()
+ return 0
+
+
+ @staticmethod
+ def _check_for_updates(args):
+ from .tools import replication
+
+ conn = connect(args.config.get_libpq_dsn())
+ ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL)
+ conn.close()
+ return ret
+
+
+ @staticmethod
+ def _update(args):
+ from .tools import replication
+ from .indexer.indexer import Indexer
+
+ params = _osm2pgsql_options_from_args(args, 2000, 1)
+ params.update(base_url=args.config.REPLICATION_URL,
+ update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'),
+ import_file=args.project_dir / 'osmosischange.osc',
+ max_diff_size=args.config.get_int('REPLICATION_MAX_DIFF'),
+ indexed_only=not args.once)
+
+ # Sanity check to not overwhelm the Geofabrik servers.
+ if 'download.geofabrik.de'in params['base_url']\
+ and params['update_interval'] < 86400:
+ LOG.fatal("Update interval too low for download.geofabrik.de.\n"
+ "Please check install documentation "
+ "(https://nominatim.org/release-docs/latest/admin/Import-and-Update#"
+ "setting-up-the-update-process).")
+ raise RuntimeError("Invalid replication update interval setting.")
+
+ if not args.once:
+ if not args.do_index:
+ LOG.fatal("Indexing cannot be disabled when running updates continuously.")
+ raise RuntimeError("Bad arguments.")
+ recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
+
+ while True:
+ conn = connect(args.config.get_libpq_dsn())
+ start = dt.datetime.now(dt.timezone.utc)
+ state = replication.update(conn, params)
+ status.log_status(conn, start, 'import')
+ conn.close()
+
+ if state is not replication.UpdateState.NO_CHANGES and args.do_index:
+ start = dt.datetime.now(dt.timezone.utc)
+ indexer = Indexer(args.config.get_libpq_dsn(),
+ args.threads or 1)
+ indexer.index_boundaries(0, 30)
+ indexer.index_by_rank(0, 30)
+
+ conn = connect(args.config.get_libpq_dsn())
+ status.set_indexed(conn, True)
+ status.log_status(conn, start, 'index')
+ conn.close()
+
+ if args.once:
+ break
+
+ if state is replication.UpdateState.NO_CHANGES:
+ LOG.warning("No new changes. Sleeping for %d sec.", recheck_interval)
+ time.sleep(recheck_interval)
+
+ return state.value
@staticmethod
def run(args):
"To install pyosmium via pip: pip3 install osmium")
return 1
- from .tools import replication, refresh
-
- conn = connect(args.config.get_libpq_dsn())
-
- params = ['update.php']
if args.init:
- LOG.warning("Initialising replication updates")
- replication.init_replication(conn, args.config.REPLICATION_URL)
- if args.update_functions:
- LOG.warning("Create functions")
- refresh.create_functions(conn, args.config, args.data_dir,
- True, False)
- conn.close()
- return 0
+ return UpdateReplication._init_replication(args)
if args.check_for_updates:
- ret = replication.check_for_updates(conn, args.config.REPLICATION_URL)
- conn.close()
- return ret
-
- if args.once:
- params.append('--import-osmosis')
- else:
- params.append('--import-osmosis-all')
- if not args.do_index:
- params.append('--no-index')
-
- return run_legacy_script(*params, nominatim_env=args)
+ return UpdateReplication._check_for_updates(args)
+ return UpdateReplication._update(args)
class UpdateAddData:
"""\
if not args.boundaries_only:
indexer.index_by_rank(args.minrank, args.maxrank)
- if not args.no_boundaries and not args.boundaries_only:
- indexer.update_status_table()
+ if not args.no_boundaries and not args.boundaries_only \
+ and args.minrank == 0 and args.maxrank == 30:
+ conn = connect(args.config.get_libpq_dsn())
+ status.set_indexed(conn, True)
+ conn.close()
return 0
def run(args):
from .tools import refresh
- conn = connect(args.config.get_libpq_dsn())
-
if args.postcodes:
LOG.warning("Update postcodes centroid")
+ conn = connect(args.config.get_libpq_dsn())
refresh.update_postcodes(conn, args.data_dir)
+ conn.close()
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
+ conn = connect(args.config.get_libpq_dsn())
refresh.recompute_word_counts(conn, args.data_dir)
+ conn.close()
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
LOG.warning('Updating address levels from %s', cfg)
+ conn = connect(args.config.get_libpq_dsn())
refresh.load_address_levels_from_file(conn, cfg)
+ conn.close()
if args.functions:
LOG.warning('Create functions')
+ conn = connect(args.config.get_libpq_dsn())
refresh.create_functions(conn, args.config, args.data_dir,
args.diffs, args.enable_debug_statements)
+ conn.close()
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
- conn.close()
-
return 0
"""
Nominatim configuration accessor.
"""
+import logging
import os
+from pathlib import Path
from dotenv import dotenv_values
+LOG = logging.getLogger()
+
class Configuration:
""" Load and manage the project configuration.
def __init__(self, project_dir, config_dir):
self.project_dir = project_dir
+ self.config_dir = config_dir
self._config = dotenv_values(str((config_dir / 'env.defaults').resolve()))
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
return os.environ.get(name) or self._config[name]
def get_bool(self, name):
- """ Return the given configuration parameters as a boolean.
+ """ Return the given configuration parameter as a boolean.
Values of '1', 'yes' and 'true' are accepted as truthy values,
everything else is interpreted as false.
"""
return self.__getattr__(name).lower() in ('1', 'yes', 'true')
+
+ def get_int(self, name):
+ """ Return the given configuration parameter as an int.
+ """
+ try:
+ return int(self.__getattr__(name))
+ except ValueError:
+ LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name)
+ raise
+
+
def get_libpq_dsn(self):
""" Get configured database DSN converted into the key/value format
understood by libpq and psycopg.
"""
dsn = self.DATABASE_DSN
+ def quote_param(param):
+ key, val = param.split('=')
+ val = val.replace('\\', '\\\\').replace("'", "\\'")
+ if ' ' in val:
+ val = "'" + val + "'"
+ return key + '=' + val
+
if dsn.startswith('pgsql:'):
# Old PHP DSN format. Convert before returning.
- return dsn[6:].replace(';', ' ')
+ return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
return dsn
+
+ def get_import_style_file(self):
+ """ Return the import style file as a path object. Translates the
+ name of the standard styles automatically into a file in the
+ config style.
+ """
+ style = self.__getattr__('IMPORT_STYLE')
+
+ if style in ('admin', 'street', 'address', 'full', 'extratags'):
+ return self.config_dir / 'import-{}.style'.format(style)
+
+ return Path(style)
+
+
def get_os_env(self):
""" Return a copy of the OS environment with the Nominatim configuration
merged in.
"""
-Access and helper functions for the status table.
+Access and helper functions for the status and status log table.
"""
import datetime as dt
import logging
row = cur.fetchone()
return row['lastimportdate'], row['sequence_id'], row['indexed']
+
+
+def set_indexed(conn, state):
+ """ Set the indexed flag in the status table to the given state.
+ """
+ with conn.cursor() as cur:
+ cur.execute("UPDATE import_status SET indexed = %s", (state, ))
+ conn.commit()
+
+
+def log_status(conn, start, event, batchsize=None):
+ """ Write a new status line to the `import_osmosis_log` table.
+ """
+ with conn.cursor() as cur:
+ cur.execute("""INSERT INTO import_osmosis_log
+ (batchend, batchseq, batchsize, starttime, endtime, event)
+ SELECT lastimportdate, sequence_id, %s, %s, now(), %s FROM import_status""",
+ (batchsize, start, event))
Helper functions for executing external programs.
"""
import logging
+import os
import subprocess
import urllib.request as urlrequest
from urllib.parse import urlencode
+from psycopg2.extensions import parse_dsn
+
from ..version import NOMINATIM_VERSION
LOG = logging.getLogger()
return 0
+def run_osm2pgsql(options):
+ """ Run osm2pgsql with the given options.
+ """
+ env = os.environ
+ cmd = [options['osm2pgsql'],
+ '--hstore', '--latlon', '--slim',
+ '--with-forward-dependencies', 'false',
+ '--log-progress', 'true',
+ '--number-processes', str(options['threads']),
+ '--cache', str(options['osm2pgsql_cache']),
+ '--output', 'gazetteer',
+ '--style', str(options['osm2pgsql_style'])
+ ]
+ if options['append']:
+ cmd.append('--append')
+
+ if options['flatnode_file']:
+ cmd.extend(('--flat-nodes', options['flatnode_file']))
+
+ dsn = parse_dsn(options['dsn'])
+ if 'password' in dsn:
+ env['PGPASSWORD'] = dsn['password']
+ if 'dbname' in dsn:
+ cmd.extend(('-d', dsn['dbname']))
+ if 'user' in dsn:
+ cmd.extend(('--username', dsn['user']))
+ for param in ('host', 'port'):
+ if param in dsn:
+ cmd.extend(('--' + param, dsn[param]))
+
+ cmd.append(str(options['import_file']))
+
+ subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
+
+
def get_url(url):
""" Get the contents from the given URL and return it as a UTF-8 string.
"""
"""
Functions for updating a database from a replication source.
"""
-import datetime
+import datetime as dt
+from enum import Enum
import logging
+import time
from osmium.replication.server import ReplicationServer
+from osmium import WriteHandler
from ..db import status
+from .exec_utils import run_osm2pgsql
LOG = logging.getLogger()
date = status.compute_database_date(conn)
# margin of error to make sure we get all data
- date -= datetime.timedelta(hours=3)
+ date -= dt.timedelta(hours=3)
repl = ReplicationServer(base_url)
if state.sequence <= seq:
LOG.warning("Database is up to date.")
- return 1
+ return 2
LOG.warning("New data available (%i => %i).", seq, state.sequence)
return 0
+
+class UpdateState(Enum):
+ """ Possible states after an update has run.
+ """
+
+ UP_TO_DATE = 0
+ MORE_PENDING = 2
+ NO_CHANGES = 3
+
+
+def update(conn, options):
+ """ Update database from the next batch of data. Returns the state of
+ updates according to `UpdateState`.
+ """
+ startdate, startseq, indexed = status.get_status(conn)
+
+ if startseq is None:
+ LOG.error("Replication not set up. "
+ "Please run 'nominatim replication --init' first.")
+ raise RuntimeError("Replication not set up.")
+
+ if not indexed and options['indexed_only']:
+ LOG.info("Skipping update. There is data that needs indexing.")
+ return UpdateState.MORE_PENDING
+
+ last_since_update = dt.datetime.now(dt.timezone.utc) - startdate
+ update_interval = dt.timedelta(seconds=options['update_interval'])
+ if last_since_update < update_interval:
+ duration = (update_interval - last_since_update).seconds
+ LOG.warning("Sleeping for %s sec before next update.", duration)
+ time.sleep(duration)
+
+ if options['import_file'].exists():
+ options['import_file'].unlink()
+
+ # Read updates into file.
+ repl = ReplicationServer(options['base_url'])
+
+ outhandler = WriteHandler(str(options['import_file']))
+ endseq = repl.apply_diffs(outhandler, startseq,
+ max_size=options['max_diff_size'] * 1024)
+ outhandler.close()
+
+ if endseq is None:
+ return UpdateState.NO_CHANGES
+
+ # Consume updates with osm2pgsql.
+ options['append'] = True
+ run_osm2pgsql(options)
+
+ # Write the current status to the file
+ endstate = repl.get_state_info(endseq)
+ status.set_status(conn, endstate.timestamp, seq=endseq, indexed=False)
+
+ return UpdateState.UP_TO_DATE
@pytest.fixture
def status_table(temp_db_conn):
- """ Create an empty version of the status table.
+ """ Create an empty version of the status table and
+ the status logging table.
"""
with temp_db_conn.cursor() as cur:
cur.execute("""CREATE TABLE import_status (
sequence_id integer,
indexed boolean
)""")
+ cur.execute("""CREATE TABLE import_osmosis_log (
+ batchend timestamp,
+ batchseq integer,
+ batchsize bigint,
+ starttime timestamp,
+ endtime timestamp,
+ event text
+ )""")
temp_db_conn.commit()
"""
Tests for command line interface wrapper.
+
+These tests just check that the various command line parameters route to the
+correct functionionality. They use a lot of monkeypatching to avoid executing
+the actual functions.
"""
import psycopg2
import pytest
+import time
import nominatim.cli
import nominatim.indexer.indexer
""" Mock that records the parameters with which a function was called
as well as the number of calls.
"""
- def __init__(self):
+ def __init__(self, retval=0):
self.called = 0
- self.return_value = 0
+ self.return_value = retval
def __call__(self, *args, **kwargs):
self.called += 1
assert func_mock.called == 1
+def test_replication_update_bad_interval(monkeypatch, temp_db):
+ monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx')
+
+ with pytest.raises(ValueError):
+ call_nominatim('replication')
+
+
+def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db):
+ monkeypatch.setenv('NOMINATIM_REPLICATION_URL',
+ 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates')
+
+ with pytest.raises(RuntimeError, match='Invalid replication.*'):
+ call_nominatim('replication')
+
+
+@pytest.mark.parametrize("state, retval", [
+ (nominatim.tools.replication.UpdateState.UP_TO_DATE, 0),
+ (nominatim.tools.replication.UpdateState.NO_CHANGES, 3)
+ ])
+def test_replication_update_once_no_index(monkeypatch, temp_db, status_table, state, retval):
+ func_mock = MockParamCapture(retval=state)
+ monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock)
+
+ assert retval == call_nominatim('replication', '--once', '--no-index')
+
+
+def test_replication_update_continuous(monkeypatch, status_table):
+ states = [nominatim.tools.replication.UpdateState.UP_TO_DATE,
+ nominatim.tools.replication.UpdateState.UP_TO_DATE]
+ monkeypatch.setattr(nominatim.tools.replication, 'update',
+ lambda *args, **kwargs: states.pop())
+
+ index_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+ with pytest.raises(IndexError):
+ call_nominatim('replication')
+
+ assert index_mock.called == 4
+
+
+def test_replication_update_continuous_no_change(monkeypatch, status_table):
+ states = [nominatim.tools.replication.UpdateState.NO_CHANGES,
+ nominatim.tools.replication.UpdateState.UP_TO_DATE]
+ monkeypatch.setattr(nominatim.tools.replication, 'update',
+ lambda *args, **kwargs: states.pop())
+
+ index_mock = MockParamCapture()
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock)
+ monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock)
+
+ sleep_mock = MockParamCapture()
+ monkeypatch.setattr(time, 'sleep', sleep_mock)
+
+ with pytest.raises(IndexError):
+ call_nominatim('replication')
+
+ assert index_mock.called == 2
+ assert sleep_mock.called == 1
+ assert sleep_mock.last_args[0] == 60
+
+
@pytest.mark.parametrize("params", [
('search', '--query', 'new'),
('reverse', '--lat', '0', '--lon', '0'),
assert config.get_libpq_dsn() == 'dbname=gis password=foo host=localhost'
+@pytest.mark.parametrize("val,expect", [('foo bar', "'foo bar'"),
+ ("xy'z", "xy\\'z"),
+ ])
+def test_get_libpq_dsn_convert_php_special_chars(monkeypatch, val, expect):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_DATABASE_DSN',
+ 'pgsql:dbname=gis;password={}'.format(val))
+
+ assert config.get_libpq_dsn() == "dbname=gis password={}".format(expect)
+
+
def test_get_libpq_dsn_convert_libpq(monkeypatch):
config = Configuration(None, DEFCFG_DIR)
assert config.DATABASE_MODULE_PATH == ''
assert config.get_bool('DATABASE_MODULE_PATH') == False
+
+
+@pytest.mark.parametrize("value,result", [('0', 0), ('1', 1),
+ ('85762513444', 85762513444)])
+def test_get_int_success(monkeypatch, value, result):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+ assert config.get_int('FOOBAR') == result
+
+
+@pytest.mark.parametrize("value", ['1b', 'fg', '0x23'])
+def test_get_int_bad_values(monkeypatch, value):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_FOOBAR', value)
+
+ with pytest.raises(ValueError):
+ config.get_int('FOOBAR')
+
+
+def test_get_int_empty():
+ config = Configuration(None, DEFCFG_DIR)
+
+ assert config.DATABASE_MODULE_PATH == ''
+
+ with pytest.raises(ValueError):
+ config.get_int('DATABASE_MODULE_PATH')
+
+
+def test_get_import_style_intern(monkeypatch):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'street')
+
+ expected = DEFCFG_DIR / 'import-street.style'
+
+ assert config.get_import_style_file() == expected
+
+
+@pytest.mark.parametrize("value", ['custom', '/foo/bar.stye'])
+def test_get_import_style_intern(monkeypatch, value):
+ config = Configuration(None, DEFCFG_DIR)
+
+ monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
+
+ assert str(config.get_import_style_file()) == value
assert nominatim.db.status.get_status(temp_db_conn) == \
(date, 667, False)
+
+
+@pytest.mark.parametrize("old_state", [True, False])
+@pytest.mark.parametrize("new_state", [True, False])
+def test_set_indexed(status_table, temp_db_conn, temp_db_cursor, old_state, new_state):
+ date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+ nominatim.db.status.set_status(temp_db_conn, date=date, indexed=old_state)
+ nominatim.db.status.set_indexed(temp_db_conn, new_state)
+
+ assert temp_db_cursor.scalar("SELECT indexed FROM import_status") == new_state
+
+
+def test_set_indexed_empty_status(status_table, temp_db_conn, temp_db_cursor):
+ nominatim.db.status.set_indexed(temp_db_conn, True)
+
+ assert temp_db_cursor.scalar("SELECT count(*) FROM import_status") == 0
+
+
+def text_log_status(status_table, temp_db_conn):
+ date = dt.datetime.fromordinal(1000000).replace(tzinfo=dt.timezone.utc)
+ start = dt.datetime.now() - dt.timedelta(hours=1)
+ nominatim.db.status.set_status(temp_db_conn, date=date, seq=56)
+ nominatim.db.status.log_status(temp_db_conn, start, 'index')
+
+ assert temp_db_cursor.scalar("SELECT count(*) FROM import_osmosis_log") == 1
+ assert temp_db_cursor.scalar("SELECT seq FROM import_osmosis_log") == 56
+ assert temp_db_cursor.scalar("SELECT date FROM import_osmosis_log") == date
extra_env = dict(SCRIPT_FILENAME=str(tmp_project_dir / 'website' / 'test.php'))
assert 0 == exec_utils.run_api_script('badname', tmp_project_dir,
extra_env=extra_env)
+
+
+### run_osm2pgsql
+
+def test_run_osm2pgsql():
+ exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
+ dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
+ osm2pgsql_style='./my.style',
+ import_file='foo.bar'))
Tests for replication functionality.
"""
import datetime as dt
+import time
import pytest
from osmium.replication.server import OsmosisState
</osm>
"""
+### init replication
def test_init_replication_bad_base_url(monkeypatch, status_table, place_row, temp_db_conn, temp_db_cursor):
place_row(osm_type='N', osm_id=100)
assert temp_db_cursor.fetchone() == [expected_date, 234, True]
+### checking for updates
+
def test_check_for_updates_empty_status_table(status_table, temp_db_conn):
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
def test_check_for_updates_seq_not_set(status_table, temp_db_conn):
- status.set_status(temp_db_conn, dt.datetime.now().replace(tzinfo=dt.timezone.utc))
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc))
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 254
def test_check_for_updates_no_state(monkeypatch, status_table, temp_db_conn):
- status.set_status(temp_db_conn,
- dt.datetime.now().replace(tzinfo=dt.timezone.utc),
- seq=345)
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=345)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
"get_state_info", lambda self: None)
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == 253
-@pytest.mark.parametrize("server_sequence,result", [(344, 1), (345, 1), (346, 0)])
+@pytest.mark.parametrize("server_sequence,result", [(344, 2), (345, 2), (346, 0)])
def test_check_for_updates_no_new_data(monkeypatch, status_table, temp_db_conn,
server_sequence, result):
- date = dt.datetime.now().replace(tzinfo=dt.timezone.utc)
+ date = dt.datetime.now(dt.timezone.utc)
status.set_status(temp_db_conn, date, seq=345)
monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
lambda self: OsmosisState(server_sequence, date))
assert nominatim.tools.replication.check_for_updates(temp_db_conn, 'https://test.io') == result
+
+
+### updating
+
+@pytest.fixture
+def update_options(tmpdir):
+ return dict(base_url='https://test.io',
+ indexed_only=False,
+ update_interval=3600,
+ import_file=tmpdir / 'foo.osm',
+ max_diff_size=1)
+
+def test_update_empty_status_table(status_table, temp_db_conn):
+ with pytest.raises(RuntimeError):
+ nominatim.tools.replication.update(temp_db_conn, {})
+
+
+def test_update_already_indexed(status_table, temp_db_conn):
+ status.set_status(temp_db_conn, dt.datetime.now(dt.timezone.utc), seq=34, indexed=False)
+
+ assert nominatim.tools.replication.update(temp_db_conn, dict(indexed_only=True)) \
+ == nominatim.tools.replication.UpdateState.MORE_PENDING
+
+
+def test_update_no_data_no_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+ date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(days=1)
+ status.set_status(temp_db_conn, date, seq=34)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "apply_diffs",
+ lambda *args, **kwargs: None)
+
+ sleeptime = []
+ monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+ assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+ == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+ assert not sleeptime
+
+
+def test_update_no_data_sleep(monkeypatch, status_table, temp_db_conn, update_options):
+ date = dt.datetime.now(dt.timezone.utc) - dt.timedelta(minutes=30)
+ status.set_status(temp_db_conn, date, seq=34)
+
+ monkeypatch.setattr(nominatim.tools.replication.ReplicationServer,
+ "apply_diffs",
+ lambda *args, **kwargs: None)
+
+ sleeptime = []
+ monkeypatch.setattr(time, 'sleep', lambda s: sleeptime.append(s))
+
+ assert nominatim.tools.replication.update(temp_db_conn, update_options) \
+ == nominatim.tools.replication.UpdateState.NO_CHANGES
+
+ assert len(sleeptime) == 1
+ assert sleeptime[0] < 3600
+ assert sleeptime[0] > 0
+++ /dev/null
-#!/usr/bin/env python3
-
-import osmium
-import sys
-import datetime
-
-
-class Datecounter(osmium.SimpleHandler):
-
- filedate = None
-
- def date(self, o):
- ts = o.timestamp
- if self.filedate is None or ts > self.filedate:
- self.filedate = ts
-
- node = date
- way = date
- relation = date
-
-
-if __name__ == '__main__':
- if len(sys.argv) != 2:
- print("Usage: python osm_file_date.py <osmfile>")
- sys.exit(-1)
-
- h = Datecounter()
-
- h.apply_file(sys.argv[1])
-
- if h.filedate is None:
- exit(5)
-
- print(h.filedate)