]> git.openstreetmap.org Git - nominatim.git/blob - test/bdd/steps/nominatim_environment.py
Merge pull request #2428 from lonvia/rename-icu-tokenizer
[nominatim.git] / test / bdd / steps / nominatim_environment.py
1 from pathlib import Path
2 import sys
3 import tempfile
4
5 import psycopg2
6 import psycopg2.extras
7
8 sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve()))
9
10 from nominatim import cli
11 from nominatim.config import Configuration
12 from nominatim.db.connection import _Connection
13 from nominatim.tools import refresh
14 from nominatim.tokenizer import factory as tokenizer_factory
15 from steps.utils import run_script
16
17 class NominatimEnvironment:
18     """ Collects all functions for the execution of Nominatim functions.
19     """
20
21     def __init__(self, config):
22         self.build_dir = Path(config['BUILDDIR']).resolve()
23         self.src_dir = (Path(__file__) / '..' / '..' / '..' / '..').resolve()
24         self.db_host = config['DB_HOST']
25         self.db_port = config['DB_PORT']
26         self.db_user = config['DB_USER']
27         self.db_pass = config['DB_PASS']
28         self.template_db = config['TEMPLATE_DB']
29         self.test_db = config['TEST_DB']
30         self.api_test_db = config['API_TEST_DB']
31         self.api_test_file = config['API_TEST_FILE']
32         self.tokenizer = config['TOKENIZER']
33         self.server_module_path = config['SERVER_MODULE_PATH']
34         self.reuse_template = not config['REMOVE_TEMPLATE']
35         self.keep_scenario_db = config['KEEP_TEST_DB']
36         self.code_coverage_path = config['PHPCOV']
37         self.code_coverage_id = 1
38
39         self.default_config = Configuration(None, self.src_dir / 'settings').get_os_env()
40         self.test_env = None
41         self.template_db_done = False
42         self.api_db_done = False
43         self.website_dir = None
44
45     def connect_database(self, dbname):
46         """ Return a connection to the database with the given name.
47             Uses configured host, user and port.
48         """
49         dbargs = {'database': dbname}
50         if self.db_host:
51             dbargs['host'] = self.db_host
52         if self.db_port:
53             dbargs['port'] = self.db_port
54         if self.db_user:
55             dbargs['user'] = self.db_user
56         if self.db_pass:
57             dbargs['password'] = self.db_pass
58         conn = psycopg2.connect(connection_factory=_Connection, **dbargs)
59         return conn
60
61     def next_code_coverage_file(self):
62         """ Generate the next name for a coverage file.
63         """
64         fn = Path(self.code_coverage_path) / "{:06d}.cov".format(self.code_coverage_id)
65         self.code_coverage_id += 1
66
67         return fn.resolve()
68
69     def write_nominatim_config(self, dbname):
70         """ Set up a custom test configuration that connects to the given
71             database. This sets up the environment variables so that they can
72             be picked up by dotenv and creates a project directory with the
73             appropriate website scripts.
74         """
75         dsn = 'pgsql:dbname={}'.format(dbname)
76         if self.db_host:
77             dsn += ';host=' + self.db_host
78         if self.db_port:
79             dsn += ';port=' + self.db_port
80         if self.db_user:
81             dsn += ';user=' + self.db_user
82         if self.db_pass:
83             dsn += ';password=' + self.db_pass
84
85         if self.website_dir is not None \
86            and self.test_env is not None \
87            and dsn == self.test_env['NOMINATIM_DATABASE_DSN']:
88             return # environment already set uo
89
90         self.test_env = dict(self.default_config)
91         self.test_env['NOMINATIM_DATABASE_DSN'] = dsn
92         self.test_env['NOMINATIM_FLATNODE_FILE'] = ''
93         self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full'
94         self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes'
95         self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve())
96         self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve())
97         self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve())
98         self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve())
99         self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve())
100         self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve())
101         if self.tokenizer is not None:
102             self.test_env['NOMINATIM_TOKENIZER'] = self.tokenizer
103
104         if self.server_module_path:
105             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path
106         else:
107             # avoid module being copied into the temporary environment
108             self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve())
109
110         if self.website_dir is not None:
111             self.website_dir.cleanup()
112
113         self.website_dir = tempfile.TemporaryDirectory()
114
115         try:
116             conn = self.connect_database(dbname)
117         except:
118             conn = False
119         refresh.setup_website(Path(self.website_dir.name) / 'website',
120                               self.get_test_config(), conn)
121
122
123     def get_test_config(self):
124         cfg = Configuration(Path(self.website_dir.name), self.src_dir / 'settings',
125                             environ=self.test_env)
126         cfg.set_libdirs(module=self.build_dir / 'module',
127                         osm2pgsql=self.build_dir / 'osm2pgsql' / 'osm2pgsql',
128                         php=self.src_dir / 'lib-php',
129                         sql=self.src_dir / 'lib-sql',
130                         data=self.src_dir / 'data')
131         return cfg
132
133     def get_libpq_dsn(self):
134         dsn = self.test_env['NOMINATIM_DATABASE_DSN']
135
136         def quote_param(param):
137             key, val = param.split('=')
138             val = val.replace('\\', '\\\\').replace("'", "\\'")
139             if ' ' in val:
140                 val = "'" + val + "'"
141             return key + '=' + val
142
143         if dsn.startswith('pgsql:'):
144             # Old PHP DSN format. Convert before returning.
145             return ' '.join([quote_param(p) for p in dsn[6:].split(';')])
146
147         return dsn
148
149
150     def db_drop_database(self, name):
151         """ Drop the database with the given name.
152         """
153         conn = self.connect_database('postgres')
154         conn.set_isolation_level(0)
155         cur = conn.cursor()
156         cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
157         conn.close()
158
159     def setup_template_db(self):
160         """ Setup a template database that already contains common test data.
161             Having a template database speeds up tests considerably but at
162             the price that the tests sometimes run with stale data.
163         """
164         if self.template_db_done:
165             return
166
167         self.template_db_done = True
168
169         if self._reuse_or_drop_db(self.template_db):
170             return
171
172         self.write_nominatim_config(self.template_db)
173
174         try:
175             # execute nominatim import on an empty file to get the right tables
176             with tempfile.NamedTemporaryFile(dir='/tmp', suffix='.xml') as fd:
177                 fd.write(b'<osm version="0.6"></osm>')
178                 fd.flush()
179                 self.run_nominatim('import', '--osm-file', fd.name,
180                                              '--osm2pgsql-cache', '1',
181                                              '--ignore-errors')
182         except:
183             self.db_drop_database(self.template_db)
184             raise
185
186
187     def setup_api_db(self):
188         """ Setup a test against the API test database.
189         """
190         self.write_nominatim_config(self.api_test_db)
191
192         if not self.api_db_done:
193             self.api_db_done = True
194
195             if not self._reuse_or_drop_db(self.api_test_db):
196                 testdata = Path('__file__') / '..' / '..' / 'testdb'
197                 self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
198
199                 try:
200                     self.run_nominatim('import', '--osm-file', str(self.api_test_file))
201                     self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
202                     self.run_nominatim('freeze')
203
204                     if self.tokenizer != 'icu':
205                         phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
206                         run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
207                     else:
208                         csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
209                         self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
210                 except:
211                     self.db_drop_database(self.api_test_db)
212                     raise
213
214         tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
215
216
217     def setup_unknown_db(self):
218         """ Setup a test against a non-existing database.
219         """
220         # The tokenizer needs an existing database to function.
221         # So start with the usual database
222         class _Context:
223             db = None
224
225         context = _Context()
226         self.setup_db(context)
227         tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
228
229         # Then drop the DB again
230         self.teardown_db(context, force_drop=True)
231
232     def setup_db(self, context):
233         """ Setup a test against a fresh, empty test database.
234         """
235         self.setup_template_db()
236         conn = self.connect_database(self.template_db)
237         conn.set_isolation_level(0)
238         cur = conn.cursor()
239         cur.execute('DROP DATABASE IF EXISTS {}'.format(self.test_db))
240         cur.execute('CREATE DATABASE {} TEMPLATE = {}'.format(self.test_db, self.template_db))
241         conn.close()
242         self.write_nominatim_config(self.test_db)
243         context.db = self.connect_database(self.test_db)
244         context.db.autocommit = True
245         psycopg2.extras.register_hstore(context.db, globally=False)
246
247     def teardown_db(self, context, force_drop=False):
248         """ Remove the test database, if it exists.
249         """
250         if hasattr(context, 'db'):
251             context.db.close()
252
253         if force_drop or not self.keep_scenario_db:
254             self.db_drop_database(self.test_db)
255
256     def _reuse_or_drop_db(self, name):
257         """ Check for the existance of the given DB. If reuse is enabled,
258             then the function checks for existance and returns True if the
259             database is already there. Otherwise an existing database is
260             dropped and always false returned.
261         """
262         if self.reuse_template:
263             conn = self.connect_database('postgres')
264             with conn.cursor() as cur:
265                 cur.execute('select count(*) from pg_database where datname = %s',
266                             (name,))
267                 if cur.fetchone()[0] == 1:
268                     return True
269             conn.close()
270         else:
271             self.db_drop_database(name)
272
273         return False
274
275     def reindex_placex(self, db):
276         """ Run the indexing step until all data in the placex has
277             been processed. Indexing during updates can produce more data
278             to index under some circumstances. That is why indexing may have
279             to be run multiple times.
280         """
281         with db.cursor() as cur:
282             while True:
283                 self.run_nominatim('index')
284
285                 cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1")
286                 if cur.rowcount == 0:
287                     return
288
289     def run_nominatim(self, *cmdline):
290         """ Run the nominatim command-line tool via the library.
291         """
292         if self.website_dir is not None:
293             cmdline = list(cmdline) + ['--project-dir', self.website_dir.name]
294
295         cli.nominatim(module_dir='',
296                       osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
297                       phplib_dir=str(self.src_dir / 'lib-php'),
298                       sqllib_dir=str(self.src_dir / 'lib-sql'),
299                       data_dir=str(self.src_dir / 'data'),
300                       config_dir=str(self.src_dir / 'settings'),
301                       cli_args=cmdline,
302                       phpcgi_path='',
303                       environ=self.test_env)
304
305
306     def copy_from_place(self, db):
307         """ Copy data from place to the placex and location_property_osmline
308             tables invoking the appropriate triggers.
309         """
310         self.run_nominatim('refresh', '--functions', '--no-diff-updates')
311
312         with db.cursor() as cur:
313             cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type,
314                                                name, admin_level, address,
315                                                extratags, geometry)
316                              SELECT osm_type, osm_id, class, type,
317                                     name, admin_level, address,
318                                     extratags, geometry
319                                FROM place
320                                WHERE not (class='place' and type='houses' and osm_type='W')""")
321             cur.execute("""INSERT INTO location_property_osmline (osm_id, address, linegeo)
322                              SELECT osm_id, address, geometry
323                                FROM place
324                               WHERE class='place' and type='houses'
325                                     and osm_type='W'
326                                     and ST_GeometryType(geometry) = 'ST_LineString'""")