X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/faa85ded50bf8ab4e772d0352f316c25d2a0f510..0af8dac3d35a94afe0b6ad775f3226d8d147501d:/test/bdd/steps/nominatim_environment.py diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 4c973358..ba19bb48 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -1,20 +1,28 @@ -import logging -import os +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2024 by the Nominatim developer community. +# For a full list of authors see the git log. from pathlib import Path -import subprocess +import importlib import tempfile -import psycopg2 -import psycopg2.extras +import psycopg +from psycopg import sql as pysql -LOG = logging.getLogger(__name__) +from nominatim_db import cli +from nominatim_db.config import Configuration +from nominatim_db.db.connection import Connection, register_hstore, execute_scalar +from nominatim_db.tools import refresh +from nominatim_db.tokenizer import factory as tokenizer_factory +from steps.utils import run_script class NominatimEnvironment: """ Collects all functions for the execution of Nominatim functions. """ def __init__(self, config): - self.build_dir = Path(config['BUILDDIR']).resolve() self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve() self.db_host = config['DB_HOST'] self.db_port = config['DB_PORT'] @@ -23,21 +31,27 @@ class NominatimEnvironment: self.template_db = config['TEMPLATE_DB'] self.test_db = config['TEST_DB'] self.api_test_db = config['API_TEST_DB'] - self.server_module_path = config['SERVER_MODULE_PATH'] + self.api_test_file = config['API_TEST_FILE'] + self.tokenizer = config['TOKENIZER'] + self.import_style = config['STYLE'] self.reuse_template = not config['REMOVE_TEMPLATE'] self.keep_scenario_db = config['KEEP_TEST_DB'] - self.code_coverage_path = config['PHPCOV'] - self.code_coverage_id = 1 - self.test_env = None + self.default_config = Configuration(None).get_os_env() + self.test_env = None self.template_db_done = False + self.api_db_done = False self.website_dir = None + if not hasattr(self, f"create_api_request_func_{config['API_ENGINE']}"): + raise RuntimeError(f"Unknown API engine '{config['API_ENGINE']}'") + self.api_engine = getattr(self, f"create_api_request_func_{config['API_ENGINE']}")() + def connect_database(self, dbname): """ Return a connection to the database with the given name. Uses configured host, user and port. """ - dbargs = {'database': dbname} + dbargs = {'dbname': dbname, 'row_factory': psycopg.rows.dict_row} if self.db_host: dbargs['host'] = self.db_host if self.db_port: @@ -46,16 +60,8 @@ class NominatimEnvironment: dbargs['user'] = self.db_user if self.db_pass: dbargs['password'] = self.db_pass - conn = psycopg2.connect(**dbargs) - return conn + return psycopg.connect(**dbargs) - def next_code_coverage_file(self): - """ Generate the next name for a coverage file. - """ - fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id) - self.code_coverage_id += 1 - - return fn.resolve() def write_nominatim_config(self, dbname): """ Set up a custom test configuration that connects to the given @@ -63,7 +69,10 @@ class NominatimEnvironment: be picked up by dotenv and creates a project directory with the appropriate website scripts. """ - dsn = 'pgsql:dbname={}'.format(dbname) + if dbname.startswith('sqlite:'): + dsn = 'sqlite:dbname={}'.format(dbname[7:]) + else: + dsn = 'pgsql:dbname={}'.format(dbname) if self.db_host: dsn += ';host=' + self.db_host if self.db_port: @@ -73,35 +82,54 @@ class NominatimEnvironment: if self.db_pass: dsn += ';password=' + self.db_pass - if self.website_dir is not None \ - and self.test_env is not None \ - and dsn == self.test_env['NOMINATIM_DATABASE_DSN']: - return # environment already set uo - - self.test_env = os.environ + self.test_env = dict(self.default_config) self.test_env['NOMINATIM_DATABASE_DSN'] = dsn + self.test_env['NOMINATIM_LANGUAGES'] = 'en,de,fr,ja' self.test_env['NOMINATIM_FLATNODE_FILE'] = '' self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' - - if self.server_module_path: - self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path + self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve()) + self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve()) + self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve()) + if self.tokenizer is not None: + self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer + if self.import_style is not None: + self.test_env['NOMINATIM_IMPORT_STYLE'] = self.import_style if self.website_dir is not None: self.website_dir.cleanup() self.website_dir = tempfile.TemporaryDirectory() - self.run_setup_script('setup-website') + + + def get_test_config(self): + cfg = Configuration(Path(self.website_dir.name), environ=self.test_env) + return cfg + + def get_libpq_dsn(self): + dsn = self.test_env['NOMINATIM_DATABASE_DSN'] + + def quote_param(param): + key, val = param.split('=') + val = val.replace('\\', '\\\\').replace("'", "\\'") + if ' ' in val: + val = "'" + val + "'" + return key + '=' + val + + if dsn.startswith('pgsql:'): + # Old PHP DSN format. Convert before returning. + return ' '.join([quote_param(p) for p in dsn[6:].split(';')]) + + return dsn def db_drop_database(self, name): """ Drop the database with the given name. """ - conn = self.connect_database('postgres') - conn.set_isolation_level(0) - cur = conn.cursor() - cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) - conn.close() + with self.connect_database('postgres') as conn: + conn.autocommit = True + conn.execute(pysql.SQL('DROP DATABASE IF EXISTS') + + pysql.Identifier(name)) def setup_template_db(self): """ Setup a template database that already contains common test data. @@ -113,50 +141,23 @@ class NominatimEnvironment: self.template_db_done = True - if self.reuse_template: - # check that the template is there - conn = self.connect_database('postgres') - cur = conn.cursor() - cur.execute('select count(*) from pg_database where datname = %s', - (self.template_db,)) - if cur.fetchone()[0] == 1: - return - conn.close() - else: - # just in case... make sure a previous table has been dropped - self.db_drop_database(self.template_db) - - try: - # call the first part of database setup - self.write_nominatim_config(self.template_db) - self.run_setup_script('create-db', 'setup-db') - # remove external data to speed up indexing for tests - conn = self.connect_database(self.template_db) - cur = conn.cursor() - cur.execute("""select tablename from pg_tables - where tablename in ('gb_postcode', 'us_postcode')""") - for t in cur: - conn.cursor().execute('TRUNCATE TABLE {}'.format(t[0])) - conn.commit() - conn.close() - - # execute osm2pgsql import on an empty file to get the right tables - with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd: - fd.write(b'') - fd.flush() - self.run_setup_script('import-data', - 'ignore-errors', - 'create-functions', - 'create-tables', - 'create-partition-tables', - 'create-partition-functions', - 'load-data', - 'create-search-indices', - osm_file=fd.name, - osm2pgsql_cache='200') - except: - self.db_drop_database(self.template_db) - raise + self.write_nominatim_config(self.template_db) + + if not self._reuse_or_drop_db(self.template_db): + try: + # execute nominatim import on an empty file to get the right tables + with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd: + fd.write(b'') + fd.flush() + self.run_nominatim('import', '--osm-file', fd.name, + '--osm2pgsql-cache', '1', + '--ignore-errors', + '--offline', '--index-noanalyse') + except: + self.db_drop_database(self.template_db) + raise + + self.run_nominatim('refresh', '--functions') def setup_api_db(self): @@ -164,61 +165,167 @@ class NominatimEnvironment: """ self.write_nominatim_config(self.api_test_db) + if self.api_test_db.startswith('sqlite:'): + return + + if not self.api_db_done: + self.api_db_done = True + + if not self._reuse_or_drop_db(self.api_test_db): + testdata = (Path(__file__) / '..' / '..' / '..' / 'testdb').resolve() + self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata) + simp_file = Path(self.website_dir.name) / 'secondary_importance.sql.gz' + simp_file.symlink_to(testdata / 'secondary_importance.sql.gz') + + try: + self.run_nominatim('import', '--osm-file', str(self.api_test_file)) + self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger')) + self.run_nominatim('freeze') + + csv_path = str(testdata / 'full_en_phrases_test.csv') + self.run_nominatim('special-phrases', '--import-from-csv', csv_path) + except: + self.db_drop_database(self.api_test_db) + raise + + tokenizer_factory.get_tokenizer_for_db(self.get_test_config()) + + def setup_unknown_db(self): """ Setup a test against a non-existing database. """ - self.write_nominatim_config('UNKNOWN_DATABASE_NAME') + # The tokenizer needs an existing database to function. + # So start with the usual database + class _Context: + db = None + + context = _Context() + self.setup_db(context) + tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False) + + # Then drop the DB again + self.teardown_db(context, force_drop=True) def setup_db(self, context): """ Setup a test against a fresh, empty test database. """ self.setup_template_db() + with self.connect_database(self.template_db) as conn: + conn.autocommit = True + conn.execute(pysql.SQL('DROP DATABASE IF EXISTS') + + pysql.Identifier(self.test_db)) + conn.execute(pysql.SQL('CREATE DATABASE {} TEMPLATE = {}').format( + pysql.Identifier(self.test_db), + pysql.Identifier(self.template_db))) + self.write_nominatim_config(self.test_db) - conn = self.connect_database(self.template_db) - conn.set_isolation_level(0) - cur = conn.cursor() - cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db)) - cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db)) - conn.close() context.db = self.connect_database(self.test_db) - psycopg2.extras.register_hstore(context.db, globally=False) + context.db.autocommit = True + register_hstore(context.db) - def teardown_db(self, context): + def teardown_db(self, context, force_drop=False): """ Remove the test database, if it exists. """ - if 'db' in context: + if hasattr(context, 'db'): context.db.close() - if not self.keep_scenario_db: + if force_drop or not self.keep_scenario_db: self.db_drop_database(self.test_db) - def run_setup_script(self, *args, **kwargs): - """ Run the Nominatim setup script with the given arguments. + def _reuse_or_drop_db(self, name): + """ Check for the existence of the given DB. If reuse is enabled, + then the function checks for existnce and returns True if the + database is already there. Otherwise an existing database is + dropped and always false returned. """ - self.run_nominatim_script('setup', *args, **kwargs) + if self.reuse_template: + with self.connect_database('postgres') as conn: + num = execute_scalar(conn, + 'select count(*) from pg_database where datname = %s', + (name,)) + if num == 1: + return True + else: + self.db_drop_database(name) - def run_update_script(self, *args, **kwargs): - """ Run the Nominatim update script with the given arguments. - """ - self.run_nominatim_script('update', *args, **kwargs) + return False - def run_nominatim_script(self, script, *args, **kwargs): - """ Run one of the Nominatim utility scripts with the given arguments. + + def reindex_placex(self, db): + """ Run the indexing step until all data in the placex has + been processed. Indexing during updates can produce more data + to index under some circumstances. That is why indexing may have + to be run multiple times. """ - cmd = ['/usr/bin/env', 'php', '-Cq'] - cmd.append((Path(self.build_dir) / 'utils' / '{}.php'.format(script)).resolve()) - cmd.extend(['--' + x for x in args]) - for k, v in kwargs.items(): - cmd.extend(('--' + k.replace('_', '-'), str(v))) + self.run_nominatim('index') + + def run_nominatim(self, *cmdline): + """ Run the nominatim command-line tool via the library. + """ if self.website_dir is not None: - cwd = self.website_dir.name - else: - cwd = self.build_dir - - proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - (outp, outerr) = proc.communicate() - outerr = outerr.decode('utf-8').replace('\\n', '\n') - LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr) - assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr) + cmdline = list(cmdline) + ['--project-dir', self.website_dir.name] + + cli.nominatim(osm2pgsql_path=None, + cli_args=cmdline, + environ=self.test_env) + + + def copy_from_place(self, db): + """ Copy data from place to the placex and location_property_osmline + tables invoking the appropriate triggers. + """ + self.run_nominatim('refresh', '--functions', '--no-diff-updates') + + with db.cursor() as cur: + cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type, + name, admin_level, address, + extratags, geometry) + SELECT osm_type, osm_id, class, type, + name, admin_level, address, + extratags, geometry + FROM place + WHERE not (class='place' and type='houses' and osm_type='W')""") + cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo) + SELECT osm_id, address, geometry + FROM place + WHERE class='place' and type='houses' + and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString'""") + + + def create_api_request_func_starlette(self): + import nominatim_api.server.starlette.server + from asgi_lifespan import LifespanManager + import httpx + + async def _request(endpoint, params, project_dir, environ, http_headers): + app = nominatim_api.server.starlette.server.get_application(project_dir, environ) + + async with LifespanManager(app): + async with httpx.AsyncClient(app=app, base_url="http://nominatim.test") as client: + response = await client.get(f"/{endpoint}", params=params, + headers=http_headers) + + return response.text, response.status_code + + return _request + + + def create_api_request_func_falcon(self): + import nominatim_api.server.falcon.server + import falcon.testing + + async def _request(endpoint, params, project_dir, environ, http_headers): + app = nominatim_api.server.falcon.server.get_application(project_dir, environ) + + async with falcon.testing.ASGIConductor(app) as conductor: + response = await conductor.get(f"/{endpoint}", params=params, + headers=http_headers) + + return response.text, response.status_code + + return _request + + +