]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/bdd/steps/nominatim_environment.py
use context management when processing Tiger data
[nominatim.git] / test / bdd / steps / nominatim_environment.py
index 4c9733585fde4bf386b312f5c71fbee9117d6d26..70a03e6ec31e37a148f1ea0255c3b283913704b5 100644 (file)
@@ -1,13 +1,24 @@
-import logging
-import os
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 from pathlib import Path
 from pathlib import Path
-import subprocess
+import sys
 import tempfile
 
 import psycopg2
 import psycopg2.extras
 
 import tempfile
 
 import psycopg2
 import psycopg2.extras
 
-LOG = logging.getLogger(__name__)
+sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
+
+from nominatim import cli
+from nominatim.config import Configuration
+from nominatim.db.connection import _Connection
+from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
+from steps.utils import run_script
 
 class NominatimEnvironment:
     """ Collects all functions for the execution of Nominatim functions.
 
 class NominatimEnvironment:
     """ Collects all functions for the execution of Nominatim functions.
@@ -23,14 +34,18 @@ class NominatimEnvironment:
         self.template_db = config['TEMPLATE_DB']
         self.test_db = config['TEST_DB']
         self.api_test_db = config['API_TEST_DB']
         self.template_db = config['TEMPLATE_DB']
         self.test_db = config['TEST_DB']
         self.api_test_db = config['API_TEST_DB']
+        self.api_test_file = config['API_TEST_FILE']
+        self.tokenizer = config['TOKENIZER']
         self.server_module_path = config['SERVER_MODULE_PATH']
         self.reuse_template = not config['REMOVE_TEMPLATE']
         self.keep_scenario_db = config['KEEP_TEST_DB']
         self.code_coverage_path = config['PHPCOV']
         self.code_coverage_id = 1
         self.server_module_path = config['SERVER_MODULE_PATH']
         self.reuse_template = not config['REMOVE_TEMPLATE']
         self.keep_scenario_db = config['KEEP_TEST_DB']
         self.code_coverage_path = config['PHPCOV']
         self.code_coverage_id = 1
-        self.test_env = None
 
 
+        self.default_config = Configuration(None, self.src_dir / 'settings').get_os_env()
+        self.test_env = None
         self.template_db_done = False
         self.template_db_done = False
+        self.api_db_done = False
         self.website_dir = None
 
     def connect_database(self, dbname):
         self.website_dir = None
 
     def connect_database(self, dbname):
@@ -46,7 +61,7 @@ class NominatimEnvironment:
             dbargs['user'] = self.db_user
         if self.db_pass:
             dbargs['password'] = self.db_pass
             dbargs['user'] = self.db_user
         if self.db_pass:
             dbargs['password'] = self.db_pass
-        conn = psycopg2.connect(**dbargs)
+        conn = psycopg2.connect(connection_factory=_Connection, **dbargs)
         return conn
 
     def next_code_coverage_file(self):
         return conn
 
     def next_code_coverage_file(self):
@@ -78,20 +93,64 @@ class NominatimEnvironment:
            and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
             return # environment already set uo
 
            and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
             return # environment already set uo
 
-        self.test_env = os.environ
+        self.test_env = dict(self.default_config)
         self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
         self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
         self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
         self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
         self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
         self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
         self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
         self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
+        self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
+        self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
+        self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
+        self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
+        self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
+        self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
+        if self.tokenizer is not None:
+            self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
 
         if self.server_module_path:
             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
+        else:
+            # avoid module being copied into the temporary environment
+            self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
 
         if self.website_dir is not None:
             self.website_dir.cleanup()
 
         self.website_dir = tempfile.TemporaryDirectory()
 
         if self.website_dir is not None:
             self.website_dir.cleanup()
 
         self.website_dir = tempfile.TemporaryDirectory()
-        self.run_setup_script('setup-website')
+
+        try:
+            conn = self.connect_database(dbname)
+        except:
+            conn = False
+        refresh.setup_website(Path(self.website_dir.name) / 'website',
+                              self.get_test_config(), conn)
+
+
+    def get_test_config(self):
+        cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
+                            environ=self.test_env)
+        cfg.set_libdirs(module=self.build_dir / 'module',
+                        osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
+                        php=self.src_dir / 'lib-php',
+                        sql=self.src_dir / 'lib-sql',
+                        data=self.src_dir / 'data')
+        return cfg
+
+    def get_libpq_dsn(self):
+        dsn = self.test_env['NOMINATIM_DATABASE_DSN']
+
+        def quote_param(param):
+            key, val = param.split('=')
+            val = val.replace('\\', '\\\\').replace("'", "\\'")
+            if ' ' in val:
+                val = "'" + val + "'"
+            return key + '=' + val
+
+        if dsn.startswith('pgsql:'):
+            # Old PHP DSN format. Convert before returning.
+            return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
+
+        return dsn
 
 
     def db_drop_database(self, name):
 
 
     def db_drop_database(self, name):
@@ -113,47 +172,19 @@ class NominatimEnvironment:
 
         self.template_db_done = True
 
 
         self.template_db_done = True
 
-        if self.reuse_template:
-            # check that the template is there
-            conn = self.connect_database('postgres')
-            cur = conn.cursor()
-            cur.execute('select count(*) from pg_database where datname = %s',
-                        (self.template_db,))
-            if cur.fetchone()[0] == 1:
-                return
-            conn.close()
-        else:
-            # just in case... make sure a previous table has been dropped
-            self.db_drop_database(self.template_db)
+        if self._reuse_or_drop_db(self.template_db):
+            return
 
 
-        try:
-            # call the first part of database setup
-            self.write_nominatim_config(self.template_db)
-            self.run_setup_script('create-db', 'setup-db')
-            # remove external data to speed up indexing for tests
-            conn = self.connect_database(self.template_db)
-            cur = conn.cursor()
-            cur.execute("""select tablename from pg_tables
-                           where tablename in ('gb_postcode', 'us_postcode')""")
-            for t in cur:
-                conn.cursor().execute('TRUNCATE TABLE {}'.format(t[0]))
-            conn.commit()
-            conn.close()
+        self.write_nominatim_config(self.template_db)
 
 
-            # execute osm2pgsql import on an empty file to get the right tables
+        try:
+            # execute nominatim import on an empty file to get the right tables
             with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
                 fd.write(b'<osm version="0.6"></osm>')
                 fd.flush()
             with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
                 fd.write(b'<osm version="0.6"></osm>')
                 fd.flush()
-                self.run_setup_script('import-data',
-                                      'ignore-errors',
-                                      'create-functions',
-                                      'create-tables',
-                                      'create-partition-tables',
-                                      'create-partition-functions',
-                                      'load-data',
-                                      'create-search-indices',
-                                      osm_file=fd.name,
-                                      osm2pgsql_cache='200')
+                self.run_nominatim('import', '--osm-file', fd.name,
+                                             '--osm2pgsql-cache', '1',
+                                             '--ignore-errors')
         except:
             self.db_drop_database(self.template_db)
             raise
         except:
             self.db_drop_database(self.template_db)
             raise
@@ -164,61 +195,138 @@ class NominatimEnvironment:
         """
         self.write_nominatim_config(self.api_test_db)
 
         """
         self.write_nominatim_config(self.api_test_db)
 
+        if not self.api_db_done:
+            self.api_db_done = True
+
+            if not self._reuse_or_drop_db(self.api_test_db):
+                testdata = Path('__file__') / '..' / '..' / 'testdb'
+                self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+
+                try:
+                    self.run_nominatim('import', '--osm-file', str(self.api_test_file))
+                    self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+                    self.run_nominatim('freeze')
+
+                    if self.tokenizer == 'legacy':
+                        phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+                        run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
+                    else:
+                        csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
+                        self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
+                except:
+                    self.db_drop_database(self.api_test_db)
+                    raise
+
+        tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
+
+
     def setup_unknown_db(self):
         """ Setup a test against a non-existing database.
         """
     def setup_unknown_db(self):
         """ Setup a test against a non-existing database.
         """
-        self.write_nominatim_config('UNKNOWN_DATABASE_NAME')
+        # The tokenizer needs an existing database to function.
+        # So start with the usual database
+        class _Context:
+            db = None
+
+        context = _Context()
+        self.setup_db(context)
+        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+
+        # Then drop the DB again
+        self.teardown_db(context, force_drop=True)
 
     def setup_db(self, context):
         """ Setup a test against a fresh, empty test database.
         """
         self.setup_template_db()
 
     def setup_db(self, context):
         """ Setup a test against a fresh, empty test database.
         """
         self.setup_template_db()
-        self.write_nominatim_config(self.test_db)
         conn = self.connect_database(self.template_db)
         conn.set_isolation_level(0)
         cur = conn.cursor()
         cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
         cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
         conn.close()
         conn = self.connect_database(self.template_db)
         conn.set_isolation_level(0)
         cur = conn.cursor()
         cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
         cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
         conn.close()
+        self.write_nominatim_config(self.test_db)
         context.db = self.connect_database(self.test_db)
         context.db = self.connect_database(self.test_db)
+        context.db.autocommit = True
         psycopg2.extras.register_hstore(context.db, globally=False)
 
         psycopg2.extras.register_hstore(context.db, globally=False)
 
-    def teardown_db(self, context):
+    def teardown_db(self, context, force_drop=False):
         """ Remove the test database, if it exists.
         """
         """ Remove the test database, if it exists.
         """
-        if 'db' in context:
+        if hasattr(context, 'db'):
             context.db.close()
 
             context.db.close()
 
-        if not self.keep_scenario_db:
+        if force_drop or not self.keep_scenario_db:
             self.db_drop_database(self.test_db)
 
             self.db_drop_database(self.test_db)
 
-    def run_setup_script(self, *args, **kwargs):
-        """ Run the Nominatim setup script with the given arguments.
+    def _reuse_or_drop_db(self, name):
+        """ Check for the existance of the given DB. If reuse is enabled,
+            then the function checks for existance and returns True if the
+            database is already there. Otherwise an existing database is
+            dropped and always false returned.
         """
         """
-        self.run_nominatim_script('setup', *args, **kwargs)
+        if self.reuse_template:
+            conn = self.connect_database('postgres')
+            with conn.cursor() as cur:
+                cur.execute('select count(*) from pg_database where datname = %s',
+                            (name,))
+                if cur.fetchone()[0] == 1:
+                    return True
+            conn.close()
+        else:
+            self.db_drop_database(name)
 
 
-    def run_update_script(self, *args, **kwargs):
-        """ Run the Nominatim update script with the given arguments.
-        """
-        self.run_nominatim_script('update', *args, **kwargs)
+        return False
 
 
-    def run_nominatim_script(self, script, *args, **kwargs):
-        """ Run one of the Nominatim utility scripts with the given arguments.
+    def reindex_placex(self, db):
+        """ Run the indexing step until all data in the placex has
+            been processed. Indexing during updates can produce more data
+            to index under some circumstances. That is why indexing may have
+            to be run multiple times.
         """
         """
-        cmd = ['/usr/bin/env', 'php', '-Cq']
-        cmd.append((Path(self.build_dir) / 'utils' / '{}.php'.format(script)).resolve())
-        cmd.extend(['--' + x for x in args])
-        for k, v in kwargs.items():
-            cmd.extend(('--' + k.replace('_', '-'), str(v)))
+        with db.cursor() as cur:
+            while True:
+                self.run_nominatim('index')
+
+                cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
+                if cur.rowcount == 0:
+                    return
 
 
+    def run_nominatim(self, *cmdline):
+        """ Run the nominatim command-line tool via the library.
+        """
         if self.website_dir is not None:
         if self.website_dir is not None:
-            cwd = self.website_dir.name
-        else:
-            cwd = self.build_dir
-
-        proc = subprocess.Popen(cmd, cwd=cwd, env=self.test_env,
-                                stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-        (outp, outerr) = proc.communicate()
-        outerr = outerr.decode('utf-8').replace('\\n', '\n')
-        LOG.debug("run_nominatim_script: %s\n%s\n%s", cmd, outp, outerr)
-        assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)
+            cmdline = list(cmdline) + ['--project-dir', self.website_dir.name]
+
+        cli.nominatim(module_dir='',
+                      osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
+                      phplib_dir=str(self.src_dir / 'lib-php'),
+                      sqllib_dir=str(self.src_dir / 'lib-sql'),
+                      data_dir=str(self.src_dir / 'data'),
+                      config_dir=str(self.src_dir / 'settings'),
+                      cli_args=cmdline,
+                      phpcgi_path='',
+                      environ=self.test_env)
+
+
+    def copy_from_place(self, db):
+        """ Copy data from place to the placex and location_property_osmline
+            tables invoking the appropriate triggers.
+        """
+        self.run_nominatim('refresh', '--functions', '--no-diff-updates')
+
+        with db.cursor() as cur:
+            cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,
+                                               name, admin_level, address,
+                                               extratags, geometry)
+                             SELECT osm_type, osm_id, class, type,
+                                    name, admin_level, address,
+                                    extratags, geometry
+                               FROM place
+                               WHERE not (class='place' and type='houses' and osm_type='W')""")
+            cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
+                             SELECT osm_id, address, geometry
+                               FROM place
+                              WHERE class='place' and type='houses'
+                                    and osm_type='W'
+                                    and ST_GeometryType(geometry) = 'ST_LineString'""")