]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_tools_import_special_phrases.py
Tests added for the auto update of special phrases during import
[nominatim.git] / test / python / test_tools_import_special_phrases.py
1 """
2     Tests for import special phrases methods
3     of the class SpecialPhrasesImporter.
4 """
5 from mocks import MockParamCapture
6 from nominatim.errors import UsageError
7 from pathlib import Path
8 import tempfile
9 from shutil import copyfile
10 import pytest
11 from nominatim.tools.special_phrases import SpecialPhrasesImporter
12
13 TEST_BASE_DIR = Path(__file__) / '..' / '..'
14
15 def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
16                                             temp_db_conn):
17     """
18         Check for the fetch_existing_words_phrases() method.
19         It should return special phrase term added to the word
20         table.
21     """
22     with temp_db_conn.cursor() as temp_db_cursor:
23         query ="""
24             INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
25             'class', 'type', null, 0, 'near');
26         """
27         temp_db_cursor.execute(query)
28
29     assert not special_phrases_importer.words_phrases_to_delete
30     special_phrases_importer._fetch_existing_words_phrases()
31     contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
32     assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
33
34 def test_fetch_existing_words_phrases_housenumber(special_phrases_importer, word_table,
35                                                   temp_db_conn):
36     """
37         Check for the fetch_existing_words_phrases() method.
38         It should return nothing as the term added correspond
39         to a housenumber term.
40     """
41     with temp_db_conn.cursor() as temp_db_cursor:
42         query ="""
43             INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
44             'place', 'house', null, 0, 'near');
45         """
46         temp_db_cursor.execute(query)
47
48     special_phrases_importer._fetch_existing_words_phrases()
49     assert not special_phrases_importer.words_phrases_to_delete
50
51 def test_fetch_existing_words_phrases_postcode(special_phrases_importer, word_table,
52                                                temp_db_conn):
53     """
54         Check for the fetch_existing_words_phrases() method.
55         It should return nothing as the term added correspond
56         to a postcode term.
57     """
58     with temp_db_conn.cursor() as temp_db_cursor:
59         query ="""
60             INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
61             'place', 'postcode', null, 0, 'near');
62         """
63         temp_db_cursor.execute(query)
64
65     special_phrases_importer._fetch_existing_words_phrases()
66     assert not special_phrases_importer.words_phrases_to_delete
67
68 def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_conn):
69     """
70         Check for the fetch_existing_place_classtype_tables() method.
71         It should return the table just created.
72     """
73     with temp_db_conn.cursor() as temp_db_cursor:
74         query = 'CREATE TABLE place_classtype_testclasstypetable()'
75         temp_db_cursor.execute(query)
76
77     special_phrases_importer._fetch_existing_place_classtype_tables()
78     contained_table = special_phrases_importer.table_phrases_to_delete.pop()
79     assert contained_table == 'place_classtype_testclasstypetable'
80
81 def test_check_sanity_class(special_phrases_importer):
82     """
83         Check for _check_sanity() method.
84         If a wrong class or type is given, an UsageError should raise.
85         If a good class and type are given, nothing special happens.
86     """
87     with pytest.raises(UsageError):
88         special_phrases_importer._check_sanity('en', '', 'type')
89     
90     with pytest.raises(UsageError):
91         special_phrases_importer._check_sanity('en', 'class', '')
92
93     special_phrases_importer._check_sanity('en', 'class', 'type')
94
95 def test_load_white_and_black_lists(special_phrases_importer):
96     """
97         Test that _load_white_and_black_lists() well return
98         black list and white list and that they are of dict type.
99     """
100     black_list, white_list = special_phrases_importer._load_white_and_black_lists()
101
102     assert isinstance(black_list, dict) and isinstance(white_list, dict)
103
104 def test_convert_php_settings(special_phrases_importer):
105     """
106         Test that _convert_php_settings_if_needed() convert the given
107         php file to a json file.
108     """
109     php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
110
111     with tempfile.TemporaryDirectory() as temp_dir:
112         temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
113         copyfile(php_file, temp_settings)
114         special_phrases_importer._convert_php_settings_if_needed(temp_settings)
115
116         assert (Path(temp_dir) / 'phrase_settings.json').is_file()
117
118 def test_convert_settings_wrong_file(special_phrases_importer):
119     """
120         Test that _convert_php_settings_if_needed() raise an exception
121         if the given file is not a valid file.
122     """
123     with pytest.raises(UsageError, match='random_file is not a valid file.'):
124         special_phrases_importer._convert_php_settings_if_needed('random_file')
125
126 def test_convert_settings_json_already_exist(special_phrases_importer):
127     """
128         Test that if we give to '_convert_php_settings_if_needed' a php file path
129         and that a the corresponding json file already exists, it is returned.
130     """
131     php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
132     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
133
134     returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
135
136     assert returned == json_file
137
138 def test_convert_settings_giving_json(special_phrases_importer):
139     """
140         Test that if we give to '_convert_php_settings_if_needed' a json file path
141         the same path is directly returned
142     """
143     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
144     
145     returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
146
147     assert returned == json_file
148
149 def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
150                                        temp_db_conn):
151     """
152         Test that _process_amenity() execute well the 
153         getorcreate_amenityoperator() SQL function and that
154         the 2 differents operators are well handled.
155     """
156     special_phrases_importer._process_amenity('', '', '', '', 'near')
157     special_phrases_importer._process_amenity('', '', '', '', 'in')
158
159     with temp_db_conn.cursor() as temp_db_cursor:
160         temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
161         results = temp_db_cursor.fetchall()
162
163     assert len(results) == 2
164
165 def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
166                                           temp_db_conn):
167     """
168         Test that _process_amenity() execute well the
169         getorcreate_amenity() SQL function.
170     """
171     special_phrases_importer._process_amenity('', '', '', '', '')
172
173     with temp_db_conn.cursor() as temp_db_cursor:
174         temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
175         result = temp_db_cursor.fetchone()
176
177     assert result
178
179 def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
180     """
181         Test that _create_place_classtype_indexes() create the
182         place_id index and centroid index on the right place_class_type table.
183     """
184     phrase_class = 'class'
185     phrase_type = 'type'
186     table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
187
188     with temp_db_conn.cursor() as temp_db_cursor:
189         temp_db_cursor.execute("CREATE EXTENSION postgis;")
190         temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
191
192     special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
193
194     assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
195
196 def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
197     """
198         Test that _create_place_classtype_table() create
199         the right place_classtype table.
200     """
201     phrase_class = 'class'
202     phrase_type = 'type'
203     special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
204
205     assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
206
207 def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
208     """
209         Test that _grant_access_to_webuser() give 
210         right access to the web user.
211     """
212     phrase_class = 'class'
213     phrase_type = 'type'
214     table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
215
216     with temp_db_conn.cursor() as temp_db_cursor:
217         temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
218
219     special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
220
221     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
222
223 def test_create_place_classtype_table_and_indexes(
224         temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
225         getorcreate_amenityoperator_funcs, special_phrases_importer):
226     """
227         Test that _create_place_classtype_table_and_indexes()
228         create the right place_classtype tables and place_id indexes
229         and centroid indexes and grant access to the web user
230         for the given set of pairs.
231     """
232     pairs = set([('class1', 'type1'), ('class2', 'type2')])
233
234     special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
235
236     for pair in pairs:
237         assert check_table_exist(temp_db_conn, pair[0], pair[1])
238         assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
239         assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
240
241 def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, 
242                              getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
243     """
244         Test that _process_xml_content() process the given xml content right
245         by executing the right SQL functions for amenities and 
246         by returning the right set of pairs.
247     """
248     class_test = 'aerialway'
249     type_test = 'zip_line'
250
251     #Converted output set to a dict for easy assert further.
252     results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
253
254     assert check_amenities_with_op(temp_db_conn)
255     assert check_amenities_without_op(temp_db_conn)
256     assert results[class_test] and type_test in results.values()
257
258 def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
259                                              temp_db_conn):
260     """
261         Check for the remove_non_existent_phrases_from_db() method.
262
263         It should removed entries from the word table which are contained
264         in the words_phrases_to_delete set and not those also contained
265         in the words_phrases_still_exist set.
266
267         place_classtype tables contained in table_phrases_to_delete should
268         be deleted.
269     """
270     with temp_db_conn.cursor() as temp_db_cursor:
271         to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
272         to_keep_phrase_tuple = (
273             'normalized_word_exists', 'class_exists', 'type_exists', 'near'
274         )
275         special_phrases_importer.words_phrases_to_delete = {
276             to_delete_phrase_tuple,
277             to_keep_phrase_tuple
278         }
279         special_phrases_importer.words_phrases_still_exist = {
280             to_keep_phrase_tuple
281         }
282         special_phrases_importer.table_phrases_to_delete = {
283             'place_classtype_testclasstypetable_to_delete'
284         }
285
286         query_words = 'SELECT word, class, type, operator FROM word;'
287         query_tables = """
288             SELECT table_name
289             FROM information_schema.tables
290             WHERE table_schema='public'
291             AND table_name like 'place_classtype_%';
292         """
293
294         special_phrases_importer._remove_non_existent_phrases_from_db()
295
296         temp_db_cursor.execute(query_words)
297         words_result = temp_db_cursor.fetchall()
298         temp_db_cursor.execute(query_tables)
299         tables_result = temp_db_cursor.fetchall()
300         assert len(words_result) == 1 and words_result[0] == [
301             'normalized_word_exists', 'class_exists', 'type_exists', 'near'
302         ]
303         assert (len(tables_result) == 1 and
304             tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
305         )
306
307 def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table, 
308                           getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
309     """
310         Check that the main import_from_wiki() method is well executed.
311         It should create the place_classtype table, the place_id and centroid indexes,
312         grand access to the web user and executing the SQL functions for amenities.
313     """
314     mock_fetch_existing_words_phrases = MockParamCapture()
315     mock_fetch_existing_place_classtype_tables = MockParamCapture()
316     mock_remove_non_existent_phrases_from_db = MockParamCapture()
317
318     monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_words_phrases',
319                         mock_fetch_existing_words_phrases)
320     monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_place_classtype_tables',
321                         mock_fetch_existing_place_classtype_tables)
322     monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._remove_non_existent_phrases_from_db',
323                         mock_remove_non_existent_phrases_from_db)
324     monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
325     special_phrases_importer.import_from_wiki(['en'])
326
327     class_test = 'aerialway'
328     type_test = 'zip_line'
329
330     assert check_table_exist(temp_db_conn, class_test, type_test)
331     assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
332     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
333     assert check_amenities_with_op(temp_db_conn)
334     assert check_amenities_without_op(temp_db_conn)
335     assert mock_fetch_existing_words_phrases.called == 1
336     assert mock_fetch_existing_place_classtype_tables.called == 1
337     assert mock_remove_non_existent_phrases_from_db.called == 1
338
339 def mock_get_wiki_content(lang):
340     """
341         Mock the _get_wiki_content() method to return
342         static xml test file content.
343     """
344     return get_test_xml_wiki_content()
345
346 def get_test_xml_wiki_content():
347     """
348         return the content of the static xml test file.
349     """
350     xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve()
351     with open(xml_test_content_path) as xml_content_reader:
352         return xml_content_reader.read()
353
354 def check_table_exist(temp_db_conn, phrase_class, phrase_type):
355     """
356         Verify that the place_classtype table exists for the given
357         phrase_class and phrase_type.
358     """
359     table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
360
361     with temp_db_conn.cursor() as temp_db_cursor:
362         temp_db_cursor.execute("""
363             SELECT *
364             FROM information_schema.tables
365             WHERE table_type='BASE TABLE'
366             AND table_name='{}'""".format(table_name))
367         return temp_db_cursor.fetchone()
368
369 def check_grant_access(temp_db_conn, user, phrase_class, phrase_type):
370     """
371         Check that the web user has been granted right access to the
372         place_classtype table of the given phrase_class and phrase_type.
373     """
374     table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
375
376     with temp_db_conn.cursor() as temp_db_cursor:
377         temp_db_cursor.execute("""
378                 SELECT * FROM information_schema.role_table_grants
379                 WHERE table_name='{}'
380                 AND grantee='{}'
381                 AND privilege_type='SELECT'""".format(table_name, user))
382         return temp_db_cursor.fetchone()
383
384 def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
385     """
386         Check that the place_id index and centroid index exist for the
387         place_classtype table of the given phrase_class and phrase_type.
388     """
389     index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
390
391     return (
392         temp_db_conn.index_exists(index_prefix + 'centroid')
393         and
394         temp_db_conn.index_exists(index_prefix + 'place_id')
395     )
396
397 def check_amenities_with_op(temp_db_conn):
398     """
399         Check that the test table for the SQL function getorcreate_amenityoperator()
400         contains more than one value (so that the SQL function was call more than one time).
401     """
402     with temp_db_conn.cursor() as temp_db_cursor:
403         temp_db_cursor.execute("SELECT * FROM temp_with_operator")
404         return len(temp_db_cursor.fetchall()) > 1
405
406 def check_amenities_without_op(temp_db_conn):
407     """
408         Check that the test table for the SQL function getorcreate_amenity()
409         contains more than one value (so that the SQL function was call more than one time).
410     """
411     with temp_db_conn.cursor() as temp_db_cursor:
412         temp_db_cursor.execute("SELECT * FROM temp_without_operator")
413         return len(temp_db_cursor.fetchall()) > 1
414
415 @pytest.fixture
416 def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
417     """
418         Return an instance of SpecialPhrasesImporter.
419     """
420     return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
421
422 @pytest.fixture
423 def temp_phplib_dir_with_migration():
424     """
425         Return temporary phpdir with migration subdirectory and
426         PhraseSettingsToJson.php script inside.
427     """
428     migration_file = (TEST_BASE_DIR / '..' / 'lib-php' / 'migration'
429                       / 'PhraseSettingsToJson.php').resolve()
430     with tempfile.TemporaryDirectory() as phpdir:
431         (Path(phpdir) / 'migration').mkdir()
432         migration_dest_path = (Path(phpdir) / 'migration' / 'PhraseSettingsToJson.php').resolve()
433         copyfile(migration_file, migration_dest_path)
434
435         yield Path(phpdir)
436
437 @pytest.fixture
438 def default_phrases(word_table, temp_db_cursor):
439     temp_db_cursor.execute("""
440         INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
441         'class', 'type', null, 0, 'near');
442
443         INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
444         'class_exists', 'type_exists', null, 0, 'near');
445
446         CREATE TABLE place_classtype_testclasstypetable_to_delete();
447         CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
448
449 @pytest.fixture
450 def make_strandard_name_func(temp_db_cursor):
451     temp_db_cursor.execute("""
452         CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
453         BEGIN
454         RETURN trim(name); --Basically return only the trimed name for the tests
455         END;
456         $$ LANGUAGE plpgsql IMMUTABLE;""")
457         
458 @pytest.fixture
459 def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
460     temp_db_cursor.execute("""
461         CREATE TABLE temp_without_operator(op TEXT);
462     
463         CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
464                                                     lookup_class text, lookup_type text)
465         RETURNS void as $$
466         BEGIN
467             INSERT INTO temp_without_operator VALUES('no_operator');
468         END;
469         $$ LANGUAGE plpgsql""")
470
471 @pytest.fixture
472 def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
473     temp_db_cursor.execute("""
474         CREATE TABLE temp_with_operator(op TEXT);
475
476         CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
477                                                     lookup_class text, lookup_type text, op text)
478         RETURNS void as $$
479         BEGIN 
480             INSERT INTO temp_with_operator VALUES(op);
481         END;
482         $$ LANGUAGE plpgsql""")