]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/bdd/steps/nominatim_environment.py
Merge pull request #3493 from lonvia/clean-up-bdd-tests
[nominatim.git] / test / bdd / steps / nominatim_environment.py
index 6381e4b4a2da8865c89e711910458a2a7fb13d74..c4b055885d1a61211190d227e47eaef57d9d206c 100644 (file)
@@ -1,15 +1,21 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2024 by the Nominatim developer community.
+# For a full list of authors see the git log.
 from pathlib import Path
-import sys
+import importlib
 import tempfile
 
-import psycopg2
-import psycopg2.extras
+import psycopg
+from psycopg import sql as pysql
 
-sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
-
-from nominatim import cli
-from nominatim.config import Configuration
-from nominatim.tools import refresh
+from nominatim_db import cli
+from nominatim_db.config import Configuration
+from nominatim_db.db.connection import Connection, register_hstore, execute_scalar
+from nominatim_db.tools import refresh
+from nominatim_db.tokenizer import factory as tokenizer_factory
 from steps.utils import run_script
 
 class NominatimEnvironment:
@@ -17,7 +23,6 @@ class NominatimEnvironment:
     """
 
     def __init__(self, config):
-        self.build_dir = Path(config['BUILDDIR']).resolve()
         self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve()
         self.db_host = config['DB_HOST']
         self.db_port = config['DB_PORT']
@@ -27,23 +32,32 @@ class NominatimEnvironment:
         self.test_db = config['TEST_DB']
         self.api_test_db = config['API_TEST_DB']
         self.api_test_file = config['API_TEST_FILE']
+        self.tokenizer = config['TOKENIZER']
+        self.import_style = config['STYLE']
         self.server_module_path = config['SERVER_MODULE_PATH']
         self.reuse_template = not config['REMOVE_TEMPLATE']
         self.keep_scenario_db = config['KEEP_TEST_DB']
-        self.code_coverage_path = config['PHPCOV']
-        self.code_coverage_id = 1
 
-        self.default_config = Configuration(None, self.src_dir / 'settings').get_os_env()
+        self.default_config = Configuration(None).get_os_env()
         self.test_env = None
         self.template_db_done = False
         self.api_db_done = False
         self.website_dir = None
 
+        self.api_engine = None
+        if config['API_ENGINE'] != 'php':
+            if not hasattr(self, f"create_api_request_func_{config['API_ENGINE']}"):
+                raise RuntimeError(f"Unknown API engine '{config['API_ENGINE']}'")
+            self.api_engine = getattr(self, f"create_api_request_func_{config['API_ENGINE']}")()
+
+        if self.tokenizer == 'legacy' and self.server_module_path is None:
+            raise RuntimeError("You must set -DSERVER_MODULE_PATH when testing the legacy tokenizer.")
+
     def connect_database(self, dbname):
         """ Return a connection to the database with the given name.
             Uses configured host, user and port.
         """
-        dbargs = {'database': dbname}
+        dbargs = {'dbname': dbname, 'row_factory': psycopg.rows.dict_row}
         if self.db_host:
             dbargs['host'] = self.db_host
         if self.db_port:
@@ -52,16 +66,8 @@ class NominatimEnvironment:
             dbargs['user'] = self.db_user
         if self.db_pass:
             dbargs['password'] = self.db_pass
-        conn = psycopg2.connect(**dbargs)
-        return conn
+        return psycopg.connect(**dbargs)
 
-    def next_code_coverage_file(self):
-        """ Generate the next name for a coverage file.
-        """
-        fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id)
-        self.code_coverage_id += 1
-
-        return fn.resolve()
 
     def write_nominatim_config(self, dbname):
         """ Set up a custom test configuration that connects to the given
@@ -69,7 +75,10 @@ class NominatimEnvironment:
             be picked up by dotenv and creates a project directory with the
             appropriate website scripts.
         """
-        dsn = 'pgsql:dbname={}'.format(dbname)
+        if dbname.startswith('sqlite:'):
+            dsn = 'sqlite:dbname={}'.format(dbname[7:])
+        else:
+            dsn = 'pgsql:dbname={}'.format(dbname)
         if self.db_host:
             dsn += ';host=' + self.db_host
         if self.db_port:
@@ -79,36 +88,42 @@ class NominatimEnvironment:
         if self.db_pass:
             dsn += ';password=' + self.db_pass
 
-        if self.website_dir is not None \
-           and self.test_env is not None \
-           and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
-            return # environment already set uo
-
         self.test_env = dict(self.default_config)
         self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
+        self.test_env['NOMINATIM_LANGUAGES'] = 'en,de,fr,ja'
         self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
         self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
         self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
         self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
         self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
         self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
-        self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
-        self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
-        self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
+        if self.tokenizer is not None:
+            self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
+        if self.import_style is not None:
+            self.test_env['NOMINATIM_IMPORT_STYLE'] = self.import_style
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
-        else:
-            # avoid module being copied into the temporary environment
-            self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
 
         if self.website_dir is not None:
             self.website_dir.cleanup()
 
         self.website_dir = tempfile.TemporaryDirectory()
-        cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env)
-        cfg.lib_dir.php = self.src_dir / 'lib-php'
-        refresh.setup_website(Path(self.website_dir.name) / 'website', cfg)
+
+        try:
+            conn = self.connect_database(dbname)
+        except:
+            conn = False
+        refresh.setup_website(Path(self.website_dir.name) / 'website',
+                              self.get_test_config(), conn)
+        if conn:
+            conn.close()
+
+
+    def get_test_config(self):
+        cfg = Configuration(Path(self.website_dir.name), environ=self.test_env)
+        cfg.set_libdirs(module=self.server_module_path)
+        return cfg
 
     def get_libpq_dsn(self):
         dsn = self.test_env['NOMINATIM_DATABASE_DSN']
@@ -130,11 +145,10 @@ class NominatimEnvironment:
     def db_drop_database(self, name):
         """ Drop the database with the given name.
         """
-        conn = self.connect_database('postgres')
-        conn.set_isolation_level(0)
-        cur = conn.cursor()
-        cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
-        conn.close()
+        with self.connect_database('postgres') as conn:
+            conn.autocommit = True
+            conn.execute(pysql.SQL('DROP DATABASE IF EXISTS')
+                         +  pysql.Identifier(name))
 
     def setup_template_db(self):
         """ Setup a template database that already contains common test data.
@@ -146,22 +160,23 @@ class NominatimEnvironment:
 
         self.template_db_done = True
 
-        if self._reuse_or_drop_db(self.template_db):
-            return
-
         self.write_nominatim_config(self.template_db)
 
-        try:
-            # execute nominatim import on an empty file to get the right tables
-            with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
-                fd.write(b'<osm version="0.6"></osm>')
-                fd.flush()
-                self.run_nominatim('import', '--osm-file', fd.name,
-                                             '--osm2pgsql-cache', '1',
-                                             '--ignore-errors')
-        except:
-            self.db_drop_database(self.template_db)
-            raise
+        if not self._reuse_or_drop_db(self.template_db):
+            try:
+                # execute nominatim import on an empty file to get the right tables
+                with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
+                    fd.write(b'<osm version="0.6"></osm>')
+                    fd.flush()
+                    self.run_nominatim('import', '--osm-file', fd.name,
+                                                 '--osm2pgsql-cache', '1',
+                                                 '--ignore-errors',
+                                                 '--offline', '--index-noanalyse')
+            except:
+                self.db_drop_database(self.template_db)
+                raise
+
+        self.run_nominatim('refresh', '--functions')
 
 
     def setup_api_db(self):
@@ -169,90 +184,104 @@ class NominatimEnvironment:
         """
         self.write_nominatim_config(self.api_test_db)
 
-        if self.api_db_done:
+        if self.api_test_db.startswith('sqlite:'):
             return
 
-        self.api_db_done = True
+        if not self.api_db_done:
+            self.api_db_done = True
 
-        if self._reuse_or_drop_db(self.api_test_db):
-            return
+            if not self._reuse_or_drop_db(self.api_test_db):
+                testdata = (Path(__file__) / '..' / '..' / '..' / 'testdb').resolve()
+                self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata)
+                simp_file = Path(self.website_dir.name) / 'secondary_importance.sql.gz'
+                simp_file.symlink_to(testdata / 'secondary_importance.sql.gz')
 
-        testdata = Path('__file__') / '..' / '..' / 'testdb'
-        self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+                try:
+                    self.run_nominatim('import', '--osm-file', str(self.api_test_file))
+                    self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger'))
+                    self.run_nominatim('freeze')
 
-        try:
-            self.run_nominatim('import', '--osm-file', str(self.api_test_file))
-            self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
-            self.run_nominatim('freeze')
+                    if self.tokenizer == 'legacy':
+                        phrase_file = str(testdata / 'specialphrases_testdb.sql')
+                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+                    else:
+                        csv_path = str(testdata / 'full_en_phrases_test.csv')
+                        self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
+                except:
+                    self.db_drop_database(self.api_test_db)
+                    raise
 
-            phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
-            run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
-        except:
-            self.db_drop_database(self.api_test_db)
-            raise
+        tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
 
 
     def setup_unknown_db(self):
         """ Setup a test against a non-existing database.
         """
-        self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
+        # The tokenizer needs an existing database to function.
+        # So start with the usual database
+        class _Context:
+            db = None
+
+        context = _Context()
+        self.setup_db(context)
+        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+
+        # Then drop the DB again
+        self.teardown_db(context, force_drop=True)
 
     def setup_db(self, context):
         """ Setup a test against a fresh, empty test database.
         """
         self.setup_template_db()
+        with self.connect_database(self.template_db) as conn:
+            conn.autocommit = True
+            conn.execute(pysql.SQL('DROP DATABASE IF EXISTS')
+                                   + pysql.Identifier(self.test_db))
+            conn.execute(pysql.SQL('CREATE DATABASE {} TEMPLATE = {}').format(
+                           pysql.Identifier(self.test_db),
+                           pysql.Identifier(self.template_db)))
+
         self.write_nominatim_config(self.test_db)
-        conn = self.connect_database(self.template_db)
-        conn.set_isolation_level(0)
-        cur = conn.cursor()
-        cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
-        cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
-        conn.close()
         context.db = self.connect_database(self.test_db)
         context.db.autocommit = True
-        psycopg2.extras.register_hstore(context.db, globally=False)
+        register_hstore(context.db)
 
-    def teardown_db(self, context):
+    def teardown_db(self, context, force_drop=False):
         """ Remove the test database, if it exists.
         """
-        if 'db' in context:
+        if hasattr(context, 'db'):
             context.db.close()
 
-        if not self.keep_scenario_db:
+        if force_drop or not self.keep_scenario_db:
             self.db_drop_database(self.test_db)
 
     def _reuse_or_drop_db(self, name):
-        """ Check for the existance of the given DB. If reuse is enabled,
-            then the function checks for existance and returns True if the
+        """ Check for the existence of the given DB. If reuse is enabled,
+            then the function checks for existnce and returns True if the
             database is already there. Otherwise an existing database is
             dropped and always false returned.
         """
         if self.reuse_template:
-            conn = self.connect_database('postgres')
-            with conn.cursor() as cur:
-                cur.execute('select count(*) from pg_database where datname = %s',
-                            (name,))
-                if cur.fetchone()[0] == 1:
+            with self.connect_database('postgres') as conn:
+                num = execute_scalar(conn,
+                                     'select count(*) from pg_database where datname = %s',
+                                     (name,))
+                if num == 1:
                     return True
-            conn.close()
         else:
             self.db_drop_database(name)
 
         return False
 
+
     def reindex_placex(self, db):
         """ Run the indexing step until all data in the placex has
             been processed. Indexing during updates can produce more data
             to index under some circumstances. That is why indexing may have
             to be run multiple times.
         """
-        with db.cursor() as cur:
-            while True:
-                self.run_nominatim('index')
+        self.run_nominatim('index')
 
-                cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
-                if cur.rowcount == 0:
-                    return
 
     def run_nominatim(self, *cmdline):
         """ Run the nominatim command-line tool via the library.
@@ -260,14 +289,9 @@ class NominatimEnvironment:
         if self.website_dir is not None:
             cmdline = list(cmdline) + ['--project-dir', self.website_dir.name]
 
-        cli.nominatim(module_dir='',
-                      osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
-                      phplib_dir=str(self.src_dir / 'lib-php'),
-                      sqllib_dir=str(self.src_dir / 'lib-sql'),
-                      data_dir=str(self.src_dir / 'data'),
-                      config_dir=str(self.src_dir / 'settings'),
+        cli.nominatim(module_dir=self.server_module_path,
+                      osm2pgsql_path=None,
                       cli_args=cmdline,
-                      phpcgi_path='',
                       environ=self.test_env)
 
 
@@ -292,3 +316,40 @@ class NominatimEnvironment:
                               WHERE class='place' and type='houses'
                                     and osm_type='W'
                                     and ST_GeometryType(geometry) = 'ST_LineString'""")
+
+
+    def create_api_request_func_starlette(self):
+        import nominatim_api.server.starlette.server
+        from asgi_lifespan import LifespanManager
+        import httpx
+
+        async def _request(endpoint, params, project_dir, environ, http_headers):
+            app = nominatim_api.server.starlette.server.get_application(project_dir, environ)
+
+            async with LifespanManager(app):
+                async with httpx.AsyncClient(app=app, base_url="http://nominatim.test") as client:
+                    response = await client.get(f"/{endpoint}", params=params,
+                                                headers=http_headers)
+
+            return response.text, response.status_code
+
+        return _request
+
+
+    def create_api_request_func_falcon(self):
+        import nominatim_api.server.falcon.server
+        import falcon.testing
+
+        async def _request(endpoint, params, project_dir, environ, http_headers):
+            app = nominatim_api.server.falcon.server.get_application(project_dir, environ)
+
+            async with falcon.testing.ASGIConductor(app) as conductor:
+                response = await conductor.get(f"/{endpoint}", params=params,
+                                               headers=http_headers)
+
+            return response.text, response.status_code
+
+        return _request
+
+
+