From dd03aeb966c2a101759d447a0fa00340812c89da Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 26 Feb 2021 16:14:29 +0100 Subject: [PATCH] bdd: use python library where possible Replace calls to PHP scripts with direct calls into the nominatim Python library where possible. This speed up tests quite a bit. --- lib-php/admin/setup.php | 4 +++ nominatim/cli.py | 14 +++++----- nominatim/clicmd/setup.py | 3 ++- nominatim/clicmd/transition.py | 5 +++- nominatim/tools/database_import.py | 11 ++++---- test/bdd/steps/nominatim_environment.py | 36 +++++++++++++++++-------- test/bdd/steps/steps_db_ops.py | 16 ++++++----- test/bdd/steps/steps_osm_data.py | 5 ++-- 8 files changed, 61 insertions(+), 33 deletions(-) diff --git a/lib-php/admin/setup.php b/lib-php/admin/setup.php index 6493460d..3aa6b828 100644 --- a/lib-php/admin/setup.php +++ b/lib-php/admin/setup.php @@ -69,7 +69,11 @@ $iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1 function run($oCmd) { global $iInstances; + global $aCMDResult; $oCmd->addParams('--threads', $iInstances); + if ($aCMDResult['ignore-errors'] ?? false) { + $oCmd->addParams('--ignore-errors'); + } $oCmd->run(true); } diff --git a/nominatim/cli.py b/nominatim/cli.py index 35c6c1f0..7459711f 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -75,12 +75,14 @@ class CommandlineParser: setattr(args, arg, Path(kwargs[arg])) args.project_dir = Path(args.project_dir).resolve() - logging.basicConfig(stream=sys.stderr, - format='%(asctime)s: %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - level=max(4 - args.verbose, 1) * 10) - - args.config = Configuration(args.project_dir, args.config_dir) + if 'cli_args' not in kwargs: + logging.basicConfig(stream=sys.stderr, + format='%(asctime)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + level=max(4 - args.verbose, 1) * 10) + + args.config = Configuration(args.project_dir, args.config_dir, + environ=kwargs.get('environ', os.environ)) log = logging.getLogger() log.warning('Using project directory: %s', str(args.project_dir)) diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 8f717cbb..f8229b39 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -75,7 +75,8 @@ class SetupAll: LOG.warning('Importing OSM data file') database_import.import_osm_data(Path(args.osm_file), args.osm2pgsql_options(0, 1), - drop=args.no_updates) + drop=args.no_updates, + ignore_errors=args.ignore_errors) LOG.warning('Create functions (1st pass)') with connect(args.config.get_libpq_dsn()) as conn: diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py index 1d78062e..210eec70 100644 --- a/nominatim/clicmd/transition.py +++ b/nominatim/clicmd/transition.py @@ -48,6 +48,8 @@ class AdminTransition: help='Size of cache to be used by osm2pgsql (in MB)') group.add_argument('--no-analyse', action='store_true', help='Do not perform analyse operations during index') + group.add_argument('--ignore-errors', action='store_true', + help="Ignore certain erros on import.") @staticmethod def run(args): @@ -75,7 +77,8 @@ class AdminTransition: raise UsageError('Missing required --osm-file argument') database_import.import_osm_data(Path(args.osm_file), args.osm2pgsql_options(0, 1), - drop=args.drop) + drop=args.drop, + ignore_errors=args.ignore_errors) if args.load_data: LOG.warning('Load data') diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 6e65e73a..7d71386f 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -145,7 +145,7 @@ def import_base_data(dsn, sql_dir, ignore_partitions=False): conn.commit() -def import_osm_data(osm_file, options, drop=False): +def import_osm_data(osm_file, options, drop=False, ignore_errors=False): """ Import the given OSM file. 'options' contains the list of default settings for osm2pgsql. """ @@ -164,10 +164,11 @@ def import_osm_data(osm_file, options, drop=False): run_osm2pgsql(options) with connect(options['dsn']) as conn: - with conn.cursor() as cur: - cur.execute('SELECT * FROM place LIMIT 1') - if cur.rowcount == 0: - raise UsageError('No data imported by osm2pgsql.') + if not ignore_errors: + with conn.cursor() as cur: + cur.execute('SELECT * FROM place LIMIT 1') + if cur.rowcount == 0: + raise UsageError('No data imported by osm2pgsql.') if drop: conn.drop_table('planet_osm_nodes') diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 811faf5c..168334b1 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -7,6 +7,7 @@ import psycopg2.extras sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve())) +from nominatim import cli from nominatim.config import Configuration from nominatim.tools import refresh from steps.utils import run_script @@ -88,18 +89,18 @@ class NominatimEnvironment: self.test_env['NOMINATIM_FLATNODE_FILE'] = '' self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' - self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data' - self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql' - self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings' - self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module' - self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql' - self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim' + self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve()) + self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve()) + self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve()) + self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve()) + self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve()) + self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve()) if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path else: # avoid module being copied into the temporary environment - self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module' + self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve()) if self.website_dir is not None: self.website_dir.cleanup() @@ -182,9 +183,9 @@ class NominatimEnvironment: self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) try: - self.run_setup_script('all', osm_file=self.api_test_file) + self.run_nominatim('import', '--osm-file', str(self.api_test_file)) self.run_setup_script('import-tiger-data') - self.run_setup_script('drop') + self.run_nominatim('freeze') phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) @@ -249,12 +250,25 @@ class NominatimEnvironment: """ with db.cursor() as cur: while True: - self.run_update_script('index') + self.run_nominatim('index') cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") if cur.rowcount == 0: return + def run_nominatim(self, *cmdline): + """ Run the nominatim command-line tool via the library. + """ + cli.nominatim(module_dir='', + osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'), + phplib_dir=str(self.src_dir / 'lib-php'), + sqllib_dir=str(self.src_dir / 'lib-sql'), + data_dir=str(self.src_dir / 'data'), + config_dir=str(self.src_dir / 'settings'), + cli_args=cmdline, + phpcgi_path='', + environ=self.test_env) + def run_setup_script(self, *args, **kwargs): """ Run the Nominatim setup script with the given arguments. """ @@ -285,7 +299,7 @@ class NominatimEnvironment: """ Copy data from place to the placex and location_property_osmline tables invoking the appropriate triggers. """ - self.run_setup_script('create-functions', 'create-partition-functions') + self.run_nominatim('refresh', '--functions', '--no-diff-updates') with db.cursor() as cur: cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type, diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index c549f3eb..9d443b43 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -5,6 +5,7 @@ import psycopg2.extras from place_inserter import PlaceColumn from table_compare import NominatimID, DBRow +from nominatim.indexer.indexer import Indexer def check_database_integrity(context): """ Check some generic constraints on the tables. @@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context): """ Import data previously set up in the place table. """ context.nominatim.copy_from_place(context.db) - context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') + context.nominatim.run_setup_script('calculate-postcodes') + + # Call directly as the refresh function does not include postcodes. + indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1) + indexer.index_full(analyse=False) + check_database_integrity(context) @when("updating places") @@ -93,8 +99,7 @@ def update_place_table(context): """ Update the place table with the given data. Also runs all triggers related to updates and reindexes the new data. """ - context.nominatim.run_setup_script( - 'create-functions', 'create-partition-functions', 'enable-diff-updates') + context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for row in context.table: PlaceColumn(context).add_row(row, False).db_insert(cur) @@ -106,7 +111,7 @@ def update_place_table(context): def update_postcodes(context): """ Rerun the calculation of postcodes. """ - context.nominatim.run_update_script('calculate-postcodes') + context.nominatim.run_nominatim('refresh', '--postcodes') @when("marking for delete (?P.*)") def delete_places(context, oids): @@ -114,8 +119,7 @@ def delete_places(context, oids): separated by commas. Also runs all triggers related to updates and reindexes the new data. """ - context.nominatim.run_setup_script( - 'create-functions', 'create-partition-functions', 'enable-diff-updates') + context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for oid in oids.split(','): NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 3858198b..844fb274 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -75,9 +75,8 @@ def update_from_osm_file(context): The data is expected as attached text in OPL format. """ context.nominatim.copy_from_place(context.db) - context.nominatim.run_setup_script('index', 'index-noanalyse') - context.nominatim.run_setup_script('create-functions', 'create-partition-functions', - 'enable-diff-updates') + context.nominatim.run_nominatim('index') + context.nominatim.run_nominatim('refresh', '--functions') # create an OSM file and import it fname = write_opl_file(context.text, context.osm) -- 2.39.5