]> git.openstreetmap.org Git - nominatim.git/commitdiff
allow multiple files for the import command
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 14 Aug 2021 19:42:21 +0000 (21:42 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 14 Aug 2021 19:42:21 +0000 (21:42 +0200)
The files are forwarded to osm2pgsql which is now able to merge
them correctly.

nominatim/clicmd/setup.py
nominatim/tools/database_import.py
nominatim/tools/exec_utils.py
test/python/test_tools_database_import.py

index 878c88260602a691052fff87156115b1196410bd..2fc1587b11e2f1b119e3cfd93d7fde8a4a9bb46e 100644 (file)
@@ -27,8 +27,9 @@ class SetupAll:
     def add_args(parser):
         group_name = parser.add_argument_group('Required arguments')
         group = group_name.add_mutually_exclusive_group(required=True)
-        group.add_argument('--osm-file', metavar='FILE',
-                           help='OSM file to be imported.')
+        group.add_argument('--osm-file', metavar='FILE', action='append',
+                           help='OSM file to be imported'
+                                ' (repeat for importing multiple files.')
         group.add_argument('--continue', dest='continue_at',
                            choices=['load-data', 'indexing', 'db-postprocess'],
                            help='Continue an import that was interrupted')
@@ -56,9 +57,12 @@ class SetupAll:
         from ..indexer.indexer import Indexer
         from ..tokenizer import factory as tokenizer_factory
 
-        if args.osm_file and not Path(args.osm_file).is_file():
-            LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
-            raise UsageError('Cannot access file.')
+        if args.osm_file:
+            files = [Path(f) for f in args.osm_file]
+            for fname in files:
+                if not fname.is_file():
+                    LOG.fatal("OSM file '%s' does not exist.", fname)
+                    raise UsageError('Cannot access file.')
 
         if args.continue_at is None:
             database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
@@ -67,7 +71,7 @@ class SetupAll:
                                                     rouser=args.config.DATABASE_WEBUSER)
 
             LOG.warning('Importing OSM data file')
-            database_import.import_osm_data(Path(args.osm_file),
+            database_import.import_osm_data(files,
                                             args.osm2pgsql_options(0, 1),
                                             drop=args.no_updates,
                                             ignore_errors=args.ignore_errors)
index a4d7220fb8c73bcf7f38f1370c028122fb020b39..0dd93490d7b4c8c316bdd5635a02a3b1c9a6c6da 100644 (file)
@@ -103,11 +103,11 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False):
             conn.commit()
 
 
-def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
-    """ Import the given OSM file. 'options' contains the list of
+def import_osm_data(osm_files, options, drop=False, ignore_errors=False):
+    """ Import the given OSM files. 'options' contains the list of
         default settings for osm2pgsql.
     """
-    options['import_file'] = osm_file
+    options['import_file'] = osm_files
     options['append'] = False
     options['threads'] = 1
 
@@ -115,7 +115,12 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False):
         # Make some educated guesses about cache size based on the size
         # of the import file and the available memory.
         mem = psutil.virtual_memory()
-        fsize = os.stat(str(osm_file)).st_size
+        fsize = 0
+        if isinstance(osm_files, list):
+            for fname in osm_files:
+                fsize += os.stat(str(fname)).st_size
+        else:
+            fsize = os.stat(str(osm_files)).st_size
         options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75,
                                              fsize * 2) / 1024 / 1024) + 1
 
index 6177b15f000e4e429c9d8e4f27384d546abd49c1..cb39ad486a128557a1c6b503844676ea80ee6ed4 100644 (file)
@@ -130,6 +130,9 @@ def run_osm2pgsql(options):
 
     if 'import_data' in options:
         cmd.extend(('-r', 'xml', '-'))
+    elif isinstance(options['import_file'], list):
+        for fname in options['import_file']:
+            cmd.append(str(fname))
     else:
         cmd.append(str(options['import_file']))
 
index 2291c166bfed34fdce3b833ae1f8f43ea2dd8938..aa90f8dba02ed3aaf3380362122dcb6029ce1f47 100644 (file)
@@ -98,14 +98,25 @@ def test_import_base_data_ignore_partitions(dsn, src_dir, temp_db_with_extension
 def test_import_osm_data_simple(table_factory, osm2pgsql_options):
     table_factory('place', content=((1, ), ))
 
-    database_import.import_osm_data('file.pdf', osm2pgsql_options)
+    database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
+
+
+def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options):
+    table_factory('place', content=((1, ), ))
+    osm2pgsql_options['osm2pgsql_cache'] = 0
+
+    files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
+    for f in files:
+        f.write_text('test')
+
+    database_import.import_osm_data(files, osm2pgsql_options)
 
 
 def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options):
     table_factory('place')
 
     with pytest.raises(UsageError, match='No data.*'):
-        database_import.import_osm_data('file.pdf', osm2pgsql_options)
+        database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
 
 
 def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_options):
@@ -117,7 +128,7 @@ def test_import_osm_data_drop(table_factory, temp_db_conn, tmp_path, osm2pgsql_o
 
     osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
 
-    database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True)
+    database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, drop=True)
 
     assert not flatfile.exists()
     assert not temp_db_conn.table_exists('planet_osm_nodes')