This adds a new dependecy to the Python psutil package.
--- /dev/null
+[MASTER]
+
+extension-pkg-whitelist=osmium
+
+[MESSAGES CONTROL]
+
+[TYPECHECK]
+
+# closing added here because it sometimes triggers a false positive with
+# 'with' statements.
+ignored-classes=NominatimArgs,closing
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
- COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim
+ COMMAND ${PYLINT} nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")
* [Python 3](https://www.python.org/) (3.5+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
+ * [psutil] (https://github.com/giampaolo/psutil)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
- ( PHP-cgi (for running queries from the command line)
+ * PHP-cgi (for running queries from the command line)
For running continuous updates:
return join(' ', $aEscaped);
}
- public function run($bExitOnFail = False)
+ public function run($bExitOnFail = false)
{
$sCmd = $this->escapedCmd();
// $aEnv does not need escaping, proc_open seems to handle it fine
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
$bDidSomething = true;
- (clone($oNominatimCmd))->addParams('transition', '--setup-db')->run(true);
+ $oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
+
+ if ($aCMDResult['no-partitions'] ?? false) {
+ $oCmd->addParams('--no-partitions');
+ }
+
+ $oCmd->run(true);
}
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
$bDidSomething = true;
- $oSetup->importData($aCMDResult['osm-file']);
+ $oCmd = (clone($oNominatimCmd))
+ ->addParams('transition', '--import-data')
+ ->addParams('--osm-file', $aCMDResult['osm-file']);
+ if ($aCMDResult['drop'] ?? false) {
+ $oCmd->addParams('--drop');
+ }
+
+ $oCmd->run(true);
}
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
}
}
- public function importData($sOSMFile)
- {
- info('Import data');
-
- if (!file_exists(getOsm2pgsqlBinary())) {
- echo "Check NOMINATIM_OSM2PGSQL_BINARY in your local .env file.\n";
- echo "Normally you should not need to set this manually.\n";
- fail("osm2pgsql not found in '".getOsm2pgsqlBinary()."'");
- }
-
- $oCmd = new \Nominatim\Shell(getOsm2pgsqlBinary());
- $oCmd->addParams('--style', getImportStyle());
-
- if (getSetting('FLATNODE_FILE')) {
- $oCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
- }
- if (getSetting('TABLESPACE_OSM_DATA')) {
- $oCmd->addParams('--tablespace-slim-data', getSetting('TABLESPACE_OSM_DATA'));
- }
- if (getSetting('TABLESPACE_OSM_INDEX')) {
- $oCmd->addParams('--tablespace-slim-index', getSetting('TABLESPACE_OSM_INDEX'));
- }
- if (getSetting('TABLESPACE_PLACE_DATA')) {
- $oCmd->addParams('--tablespace-main-data', getSetting('TABLESPACE_PLACE_DATA'));
- }
- if (getSetting('TABLESPACE_PLACE_INDEX')) {
- $oCmd->addParams('--tablespace-main-index', getSetting('TABLESPACE_PLACE_INDEX'));
- }
- $oCmd->addParams('--latlong', '--slim', '--create');
- $oCmd->addParams('--output', 'gazetteer');
- $oCmd->addParams('--hstore');
- $oCmd->addParams('--number-processes', 1);
- $oCmd->addParams('--with-forward-dependencies', 'false');
- $oCmd->addParams('--log-progress', 'true');
- $oCmd->addParams('--cache', $this->iCacheMemory);
- $oCmd->addParams('--port', $this->aDSNInfo['port']);
-
- if (isset($this->aDSNInfo['username'])) {
- $oCmd->addParams('--username', $this->aDSNInfo['username']);
- }
- if (isset($this->aDSNInfo['password'])) {
- $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
- }
- if (isset($this->aDSNInfo['hostspec'])) {
- $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
- }
- $oCmd->addParams('--database', $this->aDSNInfo['database']);
- $oCmd->addParams($sOSMFile);
- $oCmd->run();
-
- if (!$this->sIgnoreErrors && !$this->db()->getRow('select * from place limit 1')) {
- fail('No Data');
- }
-
- if ($this->bDrop) {
- $this->dropTable('planet_osm_nodes');
- $this->removeFlatnodeFile();
- }
- }
-
public function createFunctions()
{
info('Create Functions');
"""
-class NominatimArgs:
+class NominatimArgs: # pylint: disable=too-few-public-methods
""" Customized namespace class for the nominatim command line tool
to receive the command-line arguments.
"""
osm2pgsql_style=self.config.get_import_style_file(),
threads=self.threads or default_threads,
dsn=self.config.get_libpq_dsn(),
- flatnode_file=self.config.FLATNODE_FILE)
-
+ flatnode_file=self.config.FLATNODE_FILE,
+ tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA,
+ slim_index=self.config.TABLESPACE_OSM_INDEX,
+ main_data=self.config.TABLESPACE_PLACE_DATA,
+ main_index=self.config.TABLESPACE_PLACE_INDEX
+ )
+ )
"""
Implementation of the 'index' subcommand.
"""
-import os
+import psutil
from ..db import status
from ..db.connection import connect
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
-def _num_system_cpus():
- try:
- cpus = len(os.sched_getaffinity(0))
- except NotImplementedError:
- cpus = None
-
- return cpus or os.cpu_count()
-
class UpdateIndex:
"""\
from ..indexer.indexer import Indexer
indexer = Indexer(args.config.get_libpq_dsn(),
- args.threads or _num_system_cpus() or 1)
+ args.threads or psutil.cpu_count() or 1)
if not args.no_boundaries:
indexer.index_boundaries(args.minrank, args.maxrank)
This module will be removed as soon as the transition phase is over.
"""
import logging
+from pathlib import Path
from ..db.connection import connect
+from ..errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
help='Create nominatim db')
group.add_argument('--setup-db', action='store_true',
help='Build a blank nominatim db')
+ group.add_argument('--import-data', action='store_true',
+ help='Import a osm file')
group = parser.add_argument_group('Options')
group.add_argument('--no-partitions', action='store_true',
help='Do not partition search indices')
+ group.add_argument('--osm-file', metavar='FILE',
+ help='File to import')
+ group.add_argument('--drop', action='store_true',
+ help='Drop tables needed for updates, making the database readonly')
+ group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+ help='Size of cache to be used by osm2pgsql (in MB)')
@staticmethod
def run(args):
database_import.import_base_data(args.config.get_libpq_dsn(),
args.data_dir, args.no_partitions)
+
+ if args.import_data:
+ LOG.warning('Import data')
+ if not args.osm_file:
+ raise UsageError('Missing required --osm-file argument')
+ database_import.import_osm_data(Path(args.osm_file),
+ args.osm2pgsql_options(0, 1),
+ drop=args.drop)
return True
+ def drop_table(self, name, if_exists=True):
+ """ Drop the table with the given name.
+ Set `if_exists` to False if a non-existant table should raise
+ an exception instead of just being ignored.
+ """
+ with self.cursor() as cur:
+ cur.execute("""DROP TABLE {} "{}"
+ """.format('IF EXISTS' if if_exists else '', name))
+ self.commit()
+
+
def server_version_tuple(self):
""" Return the server version as a tuple of (major, minor).
Converts correctly for pre-10 and post-10 PostgreSQL versions.
Functions for setting up and importing a new Nominatim database.
"""
import logging
+import os
import subprocess
import shutil
+from pathlib import Path
+
+import psutil
from ..db.connection import connect, get_pg_env
from ..db import utils as db_utils
+from .exec_utils import run_osm2pgsql
from ..errors import UsageError
from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
raise UsageError('Creating new database failed.')
with connect(dsn) as conn:
- postgres_version = conn.server_version_tuple() # pylint: disable=E1101
+ postgres_version = conn.server_version_tuple()
if postgres_version < POSTGRESQL_REQUIRED_VERSION:
LOG.fatal('Minimum supported version of Postgresql is %d.%d. '
'Found version %d.%d.',
raise UsageError('PostgreSQL server is too old.')
if rouser is not None:
- with conn.cursor() as cur: # pylint: disable=E1101
+ with conn.cursor() as cur:
cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s',
(rouser, ))
if cnt == 0:
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
- the background for geocoding.
+ the background for geocoding. Data is assumed to not yet exist.
"""
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions:
with connect(dsn) as conn:
- with conn.cursor() as cur: # pylint: disable=E1101
+ with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0')
- conn.commit() # pylint: disable=E1101
+ conn.commit()
+
+
+def import_osm_data(osm_file, options, drop=False):
+ """ Import the given OSM file. 'options' contains the list of
+ default settings for osm2pgsql.
+ """
+ options['import_file'] = osm_file
+ options['append'] = False
+ options['threads'] = 1
+
+ if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0:
+ # Make some educated guesses about cache size based on the size
+ # of the import file and the available memory.
+ mem = psutil.virtual_memory()
+ fsize = os.stat(str(osm_file)).st_size
+ options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
+ fsize * 2) / 1024 / 1024) + 1
+
+ run_osm2pgsql(options)
+
+ with connect(options['dsn']) as conn:
+ with conn.cursor() as cur:
+ cur.execute('SELECT * FROM place LIMIT 1')
+ if cur.rowcount == 0:
+ raise UsageError('No data imported by osm2pgsql.')
+
+ if drop:
+ conn.drop_table('planet_osm_nodes')
+
+ if drop:
+ if options['flatnode_file']:
+ Path(options['flatnode_file']).unlink()
]
if options['append']:
cmd.append('--append')
+ else:
+ cmd.append('--create')
if options['flatnode_file']:
cmd.extend(('--flat-nodes', options['flatnode_file']))
+ for key, param in (('slim_data', '--tablespace-slim-data'),
+ ('slim_index', '--tablespace-slim-index'),
+ ('main_data', '--tablespace-main-data'),
+ ('main_index', '--tablespace-main-index')):
+ if options['tablespaces'][key]:
+ cmd.extend((param, options['tablespaces'][key]))
+
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
temp_db_conn.commit()
-
+@pytest.fixture
+def osm2pgsql_options(temp_db):
+ return dict(osm2pgsql='echo',
+ osm2pgsql_cache=10,
+ osm2pgsql_style='style.file',
+ threads=1,
+ dsn='dbname=' + temp_db,
+ flatnode_file='',
+ tablespaces=dict(slim_data='', slim_index='',
+ main_data='', main_index=''))
monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
return mock
+
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
return get_mock
+
def test_cli_help(capsys):
""" Running nominatim tool without arguments prints help.
"""
Tests for specialised conenction and cursor classes.
"""
import pytest
+import psycopg2
from nominatim.db.connection import connect, get_pg_env
assert db.index_exists('some_index', table='bar') == False
+def test_drop_table_existing(db, temp_db_cursor):
+ temp_db_cursor.execute('CREATE TABLE dummy (id INT)')
+
+ assert db.table_exists('dummy')
+ db.drop_table('dummy')
+ assert not db.table_exists('dummy')
+
+
+def test_drop_table_non_existsing(db):
+ db.drop_table('dfkjgjriogjigjgjrdghehtre')
+
+
+def test_drop_table_non_existing_force(db):
+ with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'):
+ db.drop_table('dfkjgjriogjigjgjrdghehtre', if_exists=False)
+
def test_connection_server_version_tuple(db):
ver = db.server_version_tuple()
import pytest
import psycopg2
import sys
+from pathlib import Path
from nominatim.tools import database_import
from nominatim.errors import UsageError
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0
assert temp_db_cursor.scalar('SELECT count(*) FROM country_name WHERE partition != 0') == 0
+
+
+def test_import_osm_data_simple(temp_db_cursor,osm2pgsql_options):
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ temp_db_cursor.execute('INSERT INTO place values (1)')
+
+ database_import.import_osm_data('file.pdf', osm2pgsql_options)
+
+
+def test_import_osm_data_simple_no_data(temp_db_cursor,osm2pgsql_options):
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+
+ with pytest.raises(UsageError, match='No data.*'):
+ database_import.import_osm_data('file.pdf', osm2pgsql_options)
+
+
+def test_import_osm_data_drop(temp_db_conn, temp_db_cursor, tmp_path, osm2pgsql_options):
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ temp_db_cursor.execute('CREATE TABLE planet_osm_nodes (id INT)')
+ temp_db_cursor.execute('INSERT INTO place values (1)')
+
+ flatfile = tmp_path / 'flatfile'
+ flatfile.write_text('touch')
+
+ osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
+
+ database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
+
+ assert not flatfile.exists()
+ assert not temp_db_conn.table_exists('planet_osm_nodes')
+
+
+def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options):
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ temp_db_cursor.execute('INSERT INTO place values (1)')
+
+ osm2pgsql_options['osm2pgsql_cache'] = 0
+
+ database_import.import_osm_data(Path(__file__), osm2pgsql_options)
### run_osm2pgsql
-def test_run_osm2pgsql():
- exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None,
- dsn='dbname=foobar', threads=1, osm2pgsql_cache=500,
- osm2pgsql_style='./my.style',
- import_file='foo.bar'))
+def test_run_osm2pgsql(osm2pgsql_options):
+ osm2pgsql_options['append'] = False
+ osm2pgsql_options['import_file'] = 'foo.bar'
+ osm2pgsql_options['tablespaces']['osm_data'] = 'extra'
+ exec_utils.run_osm2pgsql(osm2pgsql_options)
+
+
+def test_run_osm2pgsql_disable_jit(osm2pgsql_options):
+ osm2pgsql_options['append'] = True
+ osm2pgsql_options['import_file'] = 'foo.bar'
+ osm2pgsql_options['disable_jit'] = True
+ exec_utils.run_osm2pgsql(osm2pgsql_options)
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
- pip3 install --user psycopg2 python-dotenv
+ pip3 install --user psycopg2 python-dotenv psutil
#
python3-pip python3-setuptools python3-devel \
expat-devel zlib-devel
- pip3 install --user psycopg2 python-dotenv
+ pip3 install --user psycopg2 python-dotenv psutil
#
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl python3-pip \
- python3-psycopg2 git
+ python3-psycopg2 python3-psutil git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl python3-dotenv \
- python3-psycopg2 git
+ python3-psycopg2 python3-psutil git
#
# System Configuration