]> git.openstreetmap.org Git - nominatim.git/blob - test/bdd/steps/nominatim_environment.py
Merge pull request #3346 from lonvia/reduce-artificial-importance
[nominatim.git] / test / bdd / steps / nominatim_environment.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 from pathlib import Path
8 import importlib
9 import sys
10 import tempfile
11
12 import psycopg2
13 import psycopg2.extras
14
15 sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
16
17 from nominatim import cli
18 from nominatim.config import Configuration
19 from nominatim.db.connection import Connection
20 from nominatim.tools import refresh
21 from nominatim.tokenizer import factory as tokenizer_factory
22 from steps.utils import run_script
23
24 class NominatimEnvironment:
25     """ Collects all functions for the execution of Nominatim functions.
26     """
27
28     def __init__(self, config):
29         self.build_dir = Path(config['BUILDDIR']).resolve()
30         self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve()
31         self.db_host = config['DB_HOST']
32         self.db_port = config['DB_PORT']
33         self.db_user = config['DB_USER']
34         self.db_pass = config['DB_PASS']
35         self.template_db = config['TEMPLATE_DB']
36         self.test_db = config['TEST_DB']
37         self.api_test_db = config['API_TEST_DB']
38         self.api_test_file = config['API_TEST_FILE']
39         self.tokenizer = config['TOKENIZER']
40         self.import_style = config['STYLE']
41         self.server_module_path = config['SERVER_MODULE_PATH']
42         self.reuse_template = not config['REMOVE_TEMPLATE']
43         self.keep_scenario_db = config['KEEP_TEST_DB']
44         self.code_coverage_path = config['PHPCOV']
45         self.code_coverage_id = 1
46
47         self.default_config = Configuration(None).get_os_env()
48         self.test_env = None
49         self.template_db_done = False
50         self.api_db_done = False
51         self.website_dir = None
52
53         self.api_engine = None
54         if config['API_ENGINE'] != 'php':
55             if not hasattr(self, f"create_api_request_func_{config['API_ENGINE']}"):
56                 raise RuntimeError(f"Unknown API engine '{config['API_ENGINE']}'")
57             self.api_engine = getattr(self, f"create_api_request_func_{config['API_ENGINE']}")()
58
59     def connect_database(self, dbname):
60         """ Return a connection to the database with the given name.
61             Uses configured host, user and port.
62         """
63         dbargs = {'database': dbname}
64         if self.db_host:
65             dbargs['host'] = self.db_host
66         if self.db_port:
67             dbargs['port'] = self.db_port
68         if self.db_user:
69             dbargs['user'] = self.db_user
70         if self.db_pass:
71             dbargs['password'] = self.db_pass
72         conn = psycopg2.connect(connection_factory=Connection, **dbargs)
73         return conn
74
75     def next_code_coverage_file(self):
76         """ Generate the next name for a coverage file.
77         """
78         fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id)
79         self.code_coverage_id += 1
80
81         return fn.resolve()
82
83     def write_nominatim_config(self, dbname):
84         """ Set up a custom test configuration that connects to the given
85             database. This sets up the environment variables so that they can
86             be picked up by dotenv and creates a project directory with the
87             appropriate website scripts.
88         """
89         if dbname.startswith('sqlite:'):
90             dsn = 'sqlite:dbname={}'.format(dbname[7:])
91         else:
92             dsn = 'pgsql:dbname={}'.format(dbname)
93         if self.db_host:
94             dsn += ';host=' + self.db_host
95         if self.db_port:
96             dsn += ';port=' + self.db_port
97         if self.db_user:
98             dsn += ';user=' + self.db_user
99         if self.db_pass:
100             dsn += ';password=' + self.db_pass
101
102         self.test_env = dict(self.default_config)
103         self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
104         self.test_env['NOMINATIM_LANGUAGES'] = 'en,de,fr,ja'
105         self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
106         self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
107         self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
108         self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
109         self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
110         self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
111         self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
112         self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
113         if self.tokenizer is not None:
114             self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
115         if self.import_style is not None:
116             self.test_env['NOMINATIM_IMPORT_STYLE'] = self.import_style
117
118         if self.server_module_path:
119             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
120         else:
121             # avoid module being copied into the temporary environment
122             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
123
124         if self.website_dir is not None:
125             self.website_dir.cleanup()
126
127         self.website_dir = tempfile.TemporaryDirectory()
128
129         try:
130             conn = self.connect_database(dbname)
131         except:
132             conn = False
133         refresh.setup_website(Path(self.website_dir.name) / 'website',
134                               self.get_test_config(), conn)
135
136
137     def get_test_config(self):
138         cfg = Configuration(Path(self.website_dir.name), environ=self.test_env)
139         cfg.set_libdirs(module=self.build_dir / 'module',
140                         osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql')
141         return cfg
142
143     def get_libpq_dsn(self):
144         dsn = self.test_env['NOMINATIM_DATABASE_DSN']
145
146         def quote_param(param):
147             key, val = param.split('=')
148             val = val.replace('\\', '\\\\').replace("'", "\\'")
149             if ' ' in val:
150                 val = "'" + val + "'"
151             return key + '=' + val
152
153         if dsn.startswith('pgsql:'):
154             # Old PHP DSN format. Convert before returning.
155             return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
156
157         return dsn
158
159
160     def db_drop_database(self, name):
161         """ Drop the database with the given name.
162         """
163         conn = self.connect_database('postgres')
164         conn.set_isolation_level(0)
165         cur = conn.cursor()
166         cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
167         conn.close()
168
169     def setup_template_db(self):
170         """ Setup a template database that already contains common test data.
171             Having a template database speeds up tests considerably but at
172             the price that the tests sometimes run with stale data.
173         """
174         if self.template_db_done:
175             return
176
177         self.template_db_done = True
178
179         self.write_nominatim_config(self.template_db)
180
181         if not self._reuse_or_drop_db(self.template_db):
182             try:
183                 # execute nominatim import on an empty file to get the right tables
184                 with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
185                     fd.write(b'<osm version="0.6"></osm>')
186                     fd.flush()
187                     self.run_nominatim('import', '--osm-file', fd.name,
188                                                  '--osm2pgsql-cache', '1',
189                                                  '--ignore-errors',
190                                                  '--offline', '--index-noanalyse')
191             except:
192                 self.db_drop_database(self.template_db)
193                 raise
194
195         self.run_nominatim('refresh', '--functions')
196
197
198     def setup_api_db(self):
199         """ Setup a test against the API test database.
200         """
201         self.write_nominatim_config(self.api_test_db)
202
203         if self.api_test_db.startswith('sqlite:'):
204             return
205
206         if not self.api_db_done:
207             self.api_db_done = True
208
209             if not self._reuse_or_drop_db(self.api_test_db):
210                 testdata = (Path(__file__) / '..' / '..' / '..' / 'testdb').resolve()
211                 self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata)
212                 simp_file = Path(self.website_dir.name) / 'secondary_importance.sql.gz'
213                 simp_file.symlink_to(testdata / 'secondary_importance.sql.gz')
214
215                 try:
216                     self.run_nominatim('import', '--osm-file', str(self.api_test_file))
217                     self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger'))
218                     self.run_nominatim('freeze')
219
220                     if self.tokenizer == 'legacy':
221                         phrase_file = str(testdata / 'specialphrases_testdb.sql')
222                         run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
223                     else:
224                         csv_path = str(testdata / 'full_en_phrases_test.csv')
225                         self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
226                 except:
227                     self.db_drop_database(self.api_test_db)
228                     raise
229
230         tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
231
232
233     def setup_unknown_db(self):
234         """ Setup a test against a non-existing database.
235         """
236         # The tokenizer needs an existing database to function.
237         # So start with the usual database
238         class _Context:
239             db = None
240
241         context = _Context()
242         self.setup_db(context)
243         tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
244
245         # Then drop the DB again
246         self.teardown_db(context, force_drop=True)
247
248     def setup_db(self, context):
249         """ Setup a test against a fresh, empty test database.
250         """
251         self.setup_template_db()
252         conn = self.connect_database(self.template_db)
253         conn.set_isolation_level(0)
254         cur = conn.cursor()
255         cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
256         cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
257         conn.close()
258         self.write_nominatim_config(self.test_db)
259         context.db = self.connect_database(self.test_db)
260         context.db.autocommit = True
261         psycopg2.extras.register_hstore(context.db, globally=False)
262
263     def teardown_db(self, context, force_drop=False):
264         """ Remove the test database, if it exists.
265         """
266         if hasattr(context, 'db'):
267             context.db.close()
268
269         if force_drop or not self.keep_scenario_db:
270             self.db_drop_database(self.test_db)
271
272     def _reuse_or_drop_db(self, name):
273         """ Check for the existance of the given DB. If reuse is enabled,
274             then the function checks for existance and returns True if the
275             database is already there. Otherwise an existing database is
276             dropped and always false returned.
277         """
278         if self.reuse_template:
279             conn = self.connect_database('postgres')
280             with conn.cursor() as cur:
281                 cur.execute('select count(*) from pg_database where datname = %s',
282                             (name,))
283                 if cur.fetchone()[0] == 1:
284                     return True
285             conn.close()
286         else:
287             self.db_drop_database(name)
288
289         return False
290
291     def reindex_placex(self, db):
292         """ Run the indexing step until all data in the placex has
293             been processed. Indexing during updates can produce more data
294             to index under some circumstances. That is why indexing may have
295             to be run multiple times.
296         """
297         with db.cursor() as cur:
298             while True:
299                 self.run_nominatim('index')
300
301                 cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
302                 if cur.rowcount == 0:
303                     return
304
305     def run_nominatim(self, *cmdline):
306         """ Run the nominatim command-line tool via the library.
307         """
308         if self.website_dir is not None:
309             cmdline = list(cmdline) + ['--project-dir', self.website_dir.name]
310
311         cli.nominatim(module_dir='',
312                       osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
313                       cli_args=cmdline,
314                       environ=self.test_env)
315
316
317     def copy_from_place(self, db):
318         """ Copy data from place to the placex and location_property_osmline
319             tables invoking the appropriate triggers.
320         """
321         self.run_nominatim('refresh', '--functions', '--no-diff-updates')
322
323         with db.cursor() as cur:
324             cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,
325                                                name, admin_level, address,
326                                                extratags, geometry)
327                              SELECT osm_type, osm_id, class, type,
328                                     name, admin_level, address,
329                                     extratags, geometry
330                                FROM place
331                                WHERE not (class='place' and type='houses' and osm_type='W')""")
332             cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
333                              SELECT osm_id, address, geometry
334                                FROM place
335                               WHERE class='place' and type='houses'
336                                     and osm_type='W'
337                                     and ST_GeometryType(geometry) = 'ST_LineString'""")
338
339
340     def create_api_request_func_starlette(self):
341         import nominatim.server.starlette.server
342         from asgi_lifespan import LifespanManager
343         import httpx
344
345         async def _request(endpoint, params, project_dir, environ, http_headers):
346             app = nominatim.server.starlette.server.get_application(project_dir, environ)
347
348             async with LifespanManager(app):
349                 async with httpx.AsyncClient(app=app, base_url="http://nominatim.test") as client:
350                     response = await client.get(f"/{endpoint}", params=params,
351                                                 headers=http_headers)
352
353             return response.text, response.status_code
354
355         return _request
356
357
358     def create_api_request_func_falcon(self):
359         import nominatim.server.falcon.server
360         import falcon.testing
361
362         async def _request(endpoint, params, project_dir, environ, http_headers):
363             app = nominatim.server.falcon.server.get_application(project_dir, environ)
364
365             async with falcon.testing.ASGIConductor(app) as conductor:
366                 response = await conductor.get(f"/{endpoint}", params=params,
367                                                headers=http_headers)
368
369             return response.text, response.status_code
370
371         return _request
372
373
374