+++ /dev/null
-<?php
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/setup_functions.php');
-
-ini_set('memory_limit', '800M');
-
-// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
-$aCMDOptions
-= array(
- 'Import / update / index osm data',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
- array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
-
- array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
- array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
- array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
-
- array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
- array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
- array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
- array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
-
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- );
-
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-setupHTTPProxy();
-
-date_default_timezone_set('Etc/UTC');
-
-$oDB = new Nominatim\DB();
-$oDB->connect();
-$fPostgresVersion = $oDB->getPostgresVersion();
-
-$aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
-if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) {
- $aDSNInfo['port'] = 5432;
-}
-
-// cache memory to be used by osm2pgsql, should not be more than the available memory
-$iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
-if ($iCacheMemory + 500 > getTotalMemoryMB()) {
- $iCacheMemory = getCacheMemoryMB();
- echo "WARNING: resetting cache memory to $iCacheMemory\n";
-}
-
-$oOsm2pgsqlCmd = (new \Nominatim\Shell(getOsm2pgsqlBinary()))
- ->addParams('--hstore')
- ->addParams('--latlong')
- ->addParams('--append')
- ->addParams('--slim')
- ->addParams('--with-forward-dependencies', 'false')
- ->addParams('--log-progress', 'true')
- ->addParams('--number-processes', 1)
- ->addParams('--cache', $iCacheMemory)
- ->addParams('--output', 'gazetteer')
- ->addParams('--style', getImportStyle())
- ->addParams('--database', $aDSNInfo['database'])
- ->addParams('--port', $aDSNInfo['port']);
-
-if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
- $oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']);
-}
-if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
- $oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']);
-}
-if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
- $oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
-}
-if (getSetting('FLATNODE_FILE')) {
- $oOsm2pgsqlCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
-}
-if ($fPostgresVersion >= 11.0) {
- $oOsm2pgsqlCmd->addEnvPair(
- 'PGOPTIONS',
- '-c jit=off -c max_parallel_workers_per_gather=0'
- );
-}
-
-if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
- // import diffs and files directly (e.g. from osmosis --rri)
- $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
-
- if (!file_exists($sNextFile)) {
- fail("Cannot open $sNextFile\n");
- }
-
- // Import the file
- $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile);
- echo $oCMD->escapedCmd()."\n";
- $iRet = $oCMD->run();
-
- if ($iRet) {
- fail("Error from osm2pgsql, $iRet\n");
- }
-
- // Don't update the import status - we don't know what this file contains
-}
-
-$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
-$bHaveDiff = false;
-$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
-$sContentURL = '';
-if (isset($aResult['import-node']) && $aResult['import-node']) {
- if ($bUseOSMApi) {
- $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
- } else {
- $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
- }
-}
-
-if (isset($aResult['import-way']) && $aResult['import-way']) {
- if ($bUseOSMApi) {
- $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
- } else {
- $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;';
- }
-}
-
-if (isset($aResult['import-relation']) && $aResult['import-relation']) {
- if ($bUseOSMApi) {
- $sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
- } else {
- $sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;';
- }
-}
-
-if ($sContentURL) {
- file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
- $bHaveDiff = true;
-}
-
-if ($bHaveDiff) {
- // import generated change file
-
- $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile);
- echo $oCMD->escapedCmd()."\n";
-
- $iRet = $oCMD->run();
- if ($iRet) {
- fail("osm2pgsql exited with error level $iRet\n");
- }
-}
#
# No need to document the functions each time.
# pylint: disable=C0111
-# Using non-top-level imports to make pyosmium optional for replication only.
-# pylint: disable=E0012,C0415
-class UpdateAddData:
- """\
- Add additional data from a file or an online source.
-
- Data is only imported, not indexed. You need to call `nominatim index`
- to complete the process.
- """
-
- @staticmethod
- def add_args(parser):
- group_name = parser.add_argument_group('Source')
- group = group_name.add_mutually_exclusive_group(required=True)
- group.add_argument('--file', metavar='FILE',
- help='Import data from an OSM file')
- group.add_argument('--diff', metavar='FILE',
- help='Import data from an OSM diff file')
- group.add_argument('--node', metavar='ID', type=int,
- help='Import a single node from the API')
- group.add_argument('--way', metavar='ID', type=int,
- help='Import a single way from the API')
- group.add_argument('--relation', metavar='ID', type=int,
- help='Import a single relation from the API')
- group.add_argument('--tiger-data', metavar='DIR',
- help='Add housenumbers from the US TIGER census database.')
- group = parser.add_argument_group('Extra arguments')
- group.add_argument('--use-main-api', action='store_true',
- help='Use OSM API instead of Overpass to download objects')
-
- @staticmethod
- def run(args):
- from nominatim.tokenizer import factory as tokenizer_factory
- from nominatim.tools import tiger_data
-
- if args.tiger_data:
- tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
- return tiger_data.add_tiger_data(args.tiger_data,
- args.config, args.threads or 1,
- tokenizer)
-
- params = ['update.php']
- if args.file:
- params.extend(('--import-file', args.file))
- elif args.diff:
- params.extend(('--import-diff', args.diff))
- elif args.node:
- params.extend(('--import-node', args.node))
- elif args.way:
- params.extend(('--import-way', args.way))
- elif args.relation:
- params.extend(('--import-relation', args.relation))
- if args.use_main_api:
- params.append('--use-main-api')
- return run_legacy_script(*params, nominatim_env=args)
-
-
class QueryExport:
"""\
Export addresses as CSV file from the database.
parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
- parser.add_subcommand('add-data', UpdateAddData)
+ parser.add_subcommand('add-data', clicmd.UpdateAddData)
parser.add_subcommand('index', clicmd.UpdateIndex)
parser.add_subcommand('refresh', clicmd.UpdateRefresh())
from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
from nominatim.clicmd.index import UpdateIndex
from nominatim.clicmd.refresh import UpdateRefresh
+from nominatim.clicmd.add_data import UpdateAddData
from nominatim.clicmd.admin import AdminFuncs
from nominatim.clicmd.freeze import SetupFreeze
from nominatim.clicmd.special_phrases import ImportSpecialPhrases
--- /dev/null
+"""
+Implementation of the 'add-data' subcommand.
+"""
+import logging
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+LOG = logging.getLogger()
+
+class UpdateAddData:
+ """\
+ Add additional data from a file or an online source.
+
+ Data is only imported, not indexed. You need to call `nominatim index`
+ to complete the process.
+ """
+
+ @staticmethod
+ def add_args(parser):
+ group_name = parser.add_argument_group('Source')
+ group = group_name.add_mutually_exclusive_group(required=True)
+ group.add_argument('--file', metavar='FILE',
+ help='Import data from an OSM file or diff file')
+ group.add_argument('--diff', metavar='FILE',
+ help='Import data from an OSM diff file (deprecated: use --file)')
+ group.add_argument('--node', metavar='ID', type=int,
+ help='Import a single node from the API')
+ group.add_argument('--way', metavar='ID', type=int,
+ help='Import a single way from the API')
+ group.add_argument('--relation', metavar='ID', type=int,
+ help='Import a single relation from the API')
+ group.add_argument('--tiger-data', metavar='DIR',
+ help='Add housenumbers from the US TIGER census database.')
+ group = parser.add_argument_group('Extra arguments')
+ group.add_argument('--use-main-api', action='store_true',
+ help='Use OSM API instead of Overpass to download objects')
+ group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
+ help='Size of cache to be used by osm2pgsql (in MB)')
+ group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
+ help='Set timeout for file downloads.')
+
+ @staticmethod
+ def run(args):
+ from nominatim.tokenizer import factory as tokenizer_factory
+ from nominatim.tools import tiger_data, add_osm_data
+
+ if args.tiger_data:
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+ return tiger_data.add_tiger_data(args.tiger_data,
+ args.config, args.threads or 1,
+ tokenizer)
+
+ osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
+ if args.file or args.diff:
+ return add_osm_data.add_data_from_file(args.file or args.diff,
+ osm2pgsql_params)
+
+ if args.node:
+ return add_osm_data.add_osm_object('node', args.node,
+ args.use_main_api,
+ osm2pgsql_params)
+
+ if args.way:
+ return add_osm_data.add_osm_object('way', args.way,
+ args.use_main_api,
+ osm2pgsql_params)
+
+ if args.relation:
+ return add_osm_data.add_osm_object('relation', args.relation,
+ args.use_main_api,
+ osm2pgsql_params)
+
+ return 0
--- /dev/null
+"""
+Function to add additional OSM data from a file or the API into the database.
+"""
+from pathlib import Path
+import logging
+import urllib
+
+from nominatim.tools.exec_utils import run_osm2pgsql, get_url
+
+LOG = logging.getLogger()
+
+def add_data_from_file(fname, options):
+ """ Adds data from a OSM file to the database. The file may be a normal
+ OSM file or a diff file in all formats supported by libosmium.
+ """
+ options['import_file'] = Path(fname)
+ options['append'] = True
+ run_osm2pgsql(options)
+
+ # No status update. We don't know where the file came from.
+ return 0
+
+
+def add_osm_object(osm_type, osm_id, use_main_api, options):
+ """ Add or update a single OSM object from the latest version of the
+ API.
+ """
+ if use_main_api:
+ base_url = f'https://www.openstreetmap.org/api/0.6/{osm_type}/{osm_id}'
+ if osm_type in ('way', 'relation'):
+ base_url += '/full'
+ else:
+ # use Overpass API
+ if osm_type == 'node':
+ data = f'node({osm_id});out meta;'
+ elif osm_type == 'way':
+ data = f'(way({osm_id});>;);out meta;'
+ else:
+ data = f'(rel(id:{osm_id});>;);out meta;'
+ base_url = 'https://overpass-api.de/api/interpreter?' \
+ + urllib.parse.urlencode({'data': data})
+
+ options['append'] = True
+ options['import_data'] = get_url(base_url).encode('utf-8')
+
+ run_osm2pgsql(options)
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
- cmd.append(str(options['import_file']))
+ if 'import_data' in options:
+ cmd.extend(('-r', 'xml', '-'))
+ else:
+ cmd.append(str(options['import_file']))
- subprocess.run(cmd, cwd=options.get('cwd', '.'), env=env, check=True)
+ subprocess.run(cmd, cwd=options.get('cwd', '.'),
+ input=options.get('import_data'),
+ env=env, check=True)
def get_url(url):
import nominatim.clicmd.setup
import nominatim.indexer.indexer
import nominatim.tools.admin
+import nominatim.tools.add_osm_data
import nominatim.tools.check_database
import nominatim.tools.database_import
import nominatim.tools.freeze
@pytest.mark.parametrize("command,script", [
- (('add-data', '--file', 'foo.osm'), 'update'),
(('export',), 'export')
])
def test_legacy_commands_simple(self, mock_run_legacy, command, script):
assert mock.called == 1
- @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'),
- ('node', 12), ('way', 8), ('relation', 32)])
- def test_add_data_command(self, mock_run_legacy, name, oid):
+ @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc')])
+ def test_add_data_file_command(self, mock_func_factory, name, oid):
+ mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_data_from_file')
+ assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0
+
+ assert mock_run_legacy.called == 1
+
+
+ @pytest.mark.parametrize("name,oid", [('node', 12), ('way', 8), ('relation', 32)])
+ def test_add_data_object_command(self, mock_func_factory, name, oid):
+ mock_run_legacy = mock_func_factory(nominatim.tools.add_osm_data, 'add_osm_object')
assert self.call_nominatim('add-data', '--' + name, str(oid)) == 0
assert mock_run_legacy.called == 1
- assert mock_run_legacy.last_args == ('update.php', '--import-' + name, oid)
def test_serve_command(self, mock_func_factory):