From: Sarah Hoffmann Date: Sat, 14 Aug 2021 19:42:21 +0000 (+0200) Subject: allow multiple files for the import command X-Git-Tag: v4.0.0~42^2~4 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/87dedde5d65becff99b9ff3c4b467cf4f9381403 allow multiple files for the import command The files are forwarded to osm2pgsql which is now able to merge them correctly. --- diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 878c8826..2fc1587b 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -27,8 +27,9 @@ class SetupAll: def add_args(parser): group_name = parser.add_argument_group('Required arguments') group = group_name.add_mutually_exclusive_group(required=True) - group.add_argument('--osm-file', metavar='FILE', - help='OSM file to be imported.') + group.add_argument('--osm-file', metavar='FILE', action='append', + help='OSM file to be imported' + ' (repeat for importing multiple files.') group.add_argument('--continue', dest='continue_at', choices=['load-data', 'indexing', 'db-postprocess'], help='Continue an import that was interrupted') @@ -56,9 +57,12 @@ class SetupAll: from ..indexer.indexer import Indexer from ..tokenizer import factory as tokenizer_factory - if args.osm_file and not Path(args.osm_file).is_file(): - LOG.fatal("OSM file '%s' does not exist.", args.osm_file) - raise UsageError('Cannot access file.') + if args.osm_file: + files = [Path(f) for f in args.osm_file] + for fname in files: + if not fname.is_file(): + LOG.fatal("OSM file '%s' does not exist.", fname) + raise UsageError('Cannot access file.') if args.continue_at is None: database_import.setup_database_skeleton(args.config.get_libpq_dsn(), @@ -67,7 +71,7 @@ class SetupAll: rouser=args.config.DATABASE_WEBUSER) LOG.warning('Importing OSM data file') - database_import.import_osm_data(Path(args.osm_file), + database_import.import_osm_data(files, args.osm2pgsql_options(0, 1), drop=args.no_updates, ignore_errors=args.ignore_errors) diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index a4d7220f..0dd93490 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -103,11 +103,11 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False): conn.commit() -def import_osm_data(osm_file, options, drop=False, ignore_errors=False): - """ Import the given OSM file. 'options' contains the list of +def import_osm_data(osm_files, options, drop=False, ignore_errors=False): + """ Import the given OSM files. 'options' contains the list of default settings for osm2pgsql. """ - options['import_file'] = osm_file + options['import_file'] = osm_files options['append'] = False options['threads'] = 1 @@ -115,7 +115,12 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False): # Make some educated guesses about cache size based on the size # of the import file and the available memory. mem = psutil.virtual_memory() - fsize = os.stat(str(osm_file)).st_size + fsize = 0 + if isinstance(osm_files, list): + for fname in osm_files: + fsize += os.stat(str(fname)).st_size + else: + fsize = os.stat(str(osm_files)).st_size options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75, fsize * 2) / 1024 / 1024) + 1 diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 6177b15f..cb39ad48 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -130,6 +130,9 @@ def run_osm2pgsql(options): if 'import_data' in options: cmd.extend(('-r', 'xml', '-')) + elif isinstance(options['import_file'], list): + for fname in options['import_file']: + cmd.append(str(fname)) else: cmd.append(str(options['import_file'])) diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py index 2291c166..aa90f8db 100644 --- a/test/python/test_tools_database_import.py +++ b/test/python/test_tools_database_import.py @@ -98,14 +98,25 @@ def test_import_base_data_ignore_partitions(dsn, src_dir, temp_db_with_extension def test_import_osm_data_simple(table_factory, osm2pgsql_options): table_factory('place', content=((1, ), )) - database_import.import_osm_data('file.pdf', osm2pgsql_options) + database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options) + + +def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options): + table_factory('place', content=((1, ), )) + osm2pgsql_options['osm2pgsql_cache'] = 0 + + files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm'] + for f in files: + f.write_text('test') + + database_import.import_osm_data(files, osm2pgsql_options) def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options): table_factory('place') with pytest.raises(UsageError, match='No data.*'): - database_import.import_osm_data('file.pdf', osm2pgsql_options) + database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options) def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options): @@ -117,7 +128,7 @@ def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_o osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) - database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True) + database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, drop=True) assert not flatfile.exists() assert not temp_db_conn.table_exists('planet_osm_nodes')