]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/test_tools_import_special_phrases.py
Special phrases documentation updated
[nominatim.git] / test / python / test_tools_import_special_phrases.py
index b77ae10dc1233f0ede3a3535a2924a22b89251db..1b4ab19155fd20b6e84a54c883647e5de87a542b 100644 (file)
@@ -1,40 +1,52 @@
 """
     Tests for import special phrases methods
 """
     Tests for import special phrases methods
-    of the class SpecialPhrasesImporter.
+    of the class SPImporter.
 """
 from nominatim.errors import UsageError
 from pathlib import Path
 import tempfile
 from shutil import copyfile
 import pytest
 """
 from nominatim.errors import UsageError
 from pathlib import Path
 import tempfile
 from shutil import copyfile
 import pytest
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools.special_phrases.sp_importer import SPImporter
+from nominatim.tools.special_phrases.sp_wiki_loader import SPWikiLoader
+from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
+from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
 
 TEST_BASE_DIR = Path(__file__) / '..' / '..'
 
 
 TEST_BASE_DIR = Path(__file__) / '..' / '..'
 
-def test_check_sanity_class(special_phrases_importer):
+def test_fetch_existing_place_classtype_tables(sp_importer, temp_db_cursor):
+    """
+        Check for the fetch_existing_place_classtype_tables() method.
+        It should return the table just created.
+    """
+    temp_db_cursor.execute('CREATE TABLE place_classtype_testclasstypetable()')
+
+    sp_importer._fetch_existing_place_classtype_tables()
+    contained_table = sp_importer.table_phrases_to_delete.pop()
+    assert contained_table == 'place_classtype_testclasstypetable'
+
+def test_check_sanity_class(sp_importer):
     """
         Check for _check_sanity() method.
         If a wrong class or type is given, an UsageError should raise.
         If a good class and type are given, nothing special happens.
     """
     """
         Check for _check_sanity() method.
         If a wrong class or type is given, an UsageError should raise.
         If a good class and type are given, nothing special happens.
     """
-    with pytest.raises(UsageError):
-        special_phrases_importer._check_sanity('en', '', 'type')
-    
-    with pytest.raises(UsageError):
-        special_phrases_importer._check_sanity('en', 'class', '')
 
 
-    special_phrases_importer._check_sanity('en', 'class', 'type')
+    assert not sp_importer._check_sanity(SpecialPhrase('en', '', 'type', ''))
+    assert not sp_importer._check_sanity(SpecialPhrase('en', 'class', '', ''))
+
+    assert sp_importer._check_sanity(SpecialPhrase('en', 'class', 'type', ''))
 
 
-def test_load_white_and_black_lists(special_phrases_importer):
+def test_load_white_and_black_lists(sp_importer):
     """
         Test that _load_white_and_black_lists() well return
         black list and white list and that they are of dict type.
     """
     """
         Test that _load_white_and_black_lists() well return
         black list and white list and that they are of dict type.
     """
-    black_list, white_list = special_phrases_importer._load_white_and_black_lists()
+    black_list, white_list = sp_importer._load_white_and_black_lists()
 
     assert isinstance(black_list, dict) and isinstance(white_list, dict)
 
 
     assert isinstance(black_list, dict) and isinstance(white_list, dict)
 
-def test_convert_php_settings(special_phrases_importer):
+def test_convert_php_settings(sp_importer):
     """
         Test that _convert_php_settings_if_needed() convert the given
         php file to a json file.
     """
         Test that _convert_php_settings_if_needed() convert the given
         php file to a json file.
@@ -44,19 +56,19 @@ def test_convert_php_settings(special_phrases_importer):
     with tempfile.TemporaryDirectory() as temp_dir:
         temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
         copyfile(php_file, temp_settings)
     with tempfile.TemporaryDirectory() as temp_dir:
         temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
         copyfile(php_file, temp_settings)
-        special_phrases_importer._convert_php_settings_if_needed(temp_settings)
+        sp_importer._convert_php_settings_if_needed(temp_settings)
 
         assert (Path(temp_dir) / 'phrase_settings.json').is_file()
 
 
         assert (Path(temp_dir) / 'phrase_settings.json').is_file()
 
-def test_convert_settings_wrong_file(special_phrases_importer):
+def test_convert_settings_wrong_file(sp_importer):
     """
         Test that _convert_php_settings_if_needed() raise an exception
         if the given file is not a valid file.
     """
     with pytest.raises(UsageError, match='random_file is not a valid file.'):
     """
         Test that _convert_php_settings_if_needed() raise an exception
         if the given file is not a valid file.
     """
     with pytest.raises(UsageError, match='random_file is not a valid file.'):
-        special_phrases_importer._convert_php_settings_if_needed('random_file')
+        sp_importer._convert_php_settings_if_needed('random_file')
 
 
-def test_convert_settings_json_already_exist(special_phrases_importer):
+def test_convert_settings_json_already_exist(sp_importer):
     """
         Test that if we give to '_convert_php_settings_if_needed' a php file path
         and that a the corresponding json file already exists, it is returned.
     """
         Test that if we give to '_convert_php_settings_if_needed' a php file path
         and that a the corresponding json file already exists, it is returned.
@@ -64,52 +76,22 @@ def test_convert_settings_json_already_exist(special_phrases_importer):
     php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
 
     php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
 
-    returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
+    returned = sp_importer._convert_php_settings_if_needed(php_file)
 
     assert returned == json_file
 
 
     assert returned == json_file
 
-def test_convert_settings_giving_json(special_phrases_importer):
+def test_convert_settings_giving_json(sp_importer):
     """
         Test that if we give to '_convert_php_settings_if_needed' a json file path
         the same path is directly returned
     """
     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
     """
         Test that if we give to '_convert_php_settings_if_needed' a json file path
         the same path is directly returned
     """
     json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
-    
-    returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
-
-    assert returned == json_file
-
-def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
-                                       word_table, temp_db_conn):
-    """
-        Test that _process_amenity() execute well the 
-        getorcreate_amenityoperator() SQL function and that
-        the 2 differents operators are well handled.
-    """
-    special_phrases_importer._process_amenity('', '', '', '', 'near')
-    special_phrases_importer._process_amenity('', '', '', '', 'in')
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
-        results = temp_db_cursor.fetchall()
 
 
-    assert len(results) == 2
+    returned = sp_importer._convert_php_settings_if_needed(json_file)
 
 
-def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
-                                          temp_db_conn):
-    """
-        Test that _process_amenity() execute well the
-        getorcreate_amenity() SQL function.
-    """
-    special_phrases_importer._process_amenity('', '', '', '', '')
-
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
-        result = temp_db_cursor.fetchone()
-
-    assert result
+    assert returned == json_file
 
 
-def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
+def test_create_place_classtype_indexes(temp_db_conn, sp_importer):
     """
         Test that _create_place_classtype_indexes() create the
         place_id index and centroid index on the right place_class_type table.
     """
         Test that _create_place_classtype_indexes() create the
         place_id index and centroid index on the right place_class_type table.
@@ -122,24 +104,24 @@ def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
         temp_db_cursor.execute("CREATE EXTENSION postgis;")
         temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
 
         temp_db_cursor.execute("CREATE EXTENSION postgis;")
         temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
 
-    special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
+    sp_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
 
     assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
 
 
     assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
 
-def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
+def test_create_place_classtype_table(temp_db_conn, placex_table, sp_importer):
     """
         Test that _create_place_classtype_table() create
         the right place_classtype table.
     """
     phrase_class = 'class'
     phrase_type = 'type'
     """
         Test that _create_place_classtype_table() create
         the right place_classtype table.
     """
     phrase_class = 'class'
     phrase_type = 'type'
-    special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
+    sp_importer._create_place_classtype_table('', phrase_class, phrase_type)
 
     assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
 
 
     assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
 
-def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
+def test_grant_access_to_web_user(temp_db_conn, def_config, sp_importer):
     """
     """
-        Test that _grant_access_to_webuser() give 
+        Test that _grant_access_to_webuser() give
         right access to the web user.
     """
     phrase_class = 'class'
         right access to the web user.
     """
     phrase_class = 'class'
@@ -149,13 +131,13 @@ def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_impo
     with temp_db_conn.cursor() as temp_db_cursor:
         temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
 
     with temp_db_conn.cursor() as temp_db_cursor:
         temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
 
-    special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
+    sp_importer._grant_access_to_webuser(phrase_class, phrase_type)
 
     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
 
 def test_create_place_classtype_table_and_indexes(
 
     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
 
 def test_create_place_classtype_table_and_indexes(
-        temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
-        getorcreate_amenityoperator_funcs, special_phrases_importer):
+        temp_db_conn, def_config, placex_table,
+        sp_importer):
     """
         Test that _create_place_classtype_table_and_indexes()
         create the right place_classtype tables and place_id indexes
     """
         Test that _create_place_classtype_table_and_indexes()
         create the right place_classtype tables and place_id indexes
@@ -164,39 +146,69 @@ def test_create_place_classtype_table_and_indexes(
     """
     pairs = set([('class1', 'type1'), ('class2', 'type2')])
 
     """
     pairs = set([('class1', 'type1'), ('class2', 'type2')])
 
-    special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
+    sp_importer._create_place_classtype_table_and_indexes(pairs)
 
     for pair in pairs:
         assert check_table_exist(temp_db_conn, pair[0], pair[1])
         assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
         assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
 
 
     for pair in pairs:
         assert check_table_exist(temp_db_conn, pair[0], pair[1])
         assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
         assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
 
-def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, 
-                             getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
+                                             temp_db_conn):
     """
     """
-        Test that _process_xml_content() process the given xml content right
-        by executing the right SQL functions for amenities and 
-        by returning the right set of pairs.
+        Check for the remove_non_existent_phrases_from_db() method.
+
+        It should removed entries from the word table which are contained
+        in the words_phrases_to_delete set and not those also contained
+        in the words_phrases_still_exist set.
+
+        place_classtype tables contained in table_phrases_to_delete should
+        be deleted.
     """
     """
-    class_test = 'aerialway'
-    type_test = 'zip_line'
+    with temp_db_conn.cursor() as temp_db_cursor:
+        sp_importer.table_phrases_to_delete = {
+            'place_classtype_testclasstypetable_to_delete'
+        }
+
+        query_tables = """
+            SELECT table_name
+            FROM information_schema.tables
+            WHERE table_schema='public'
+            AND table_name like 'place_classtype_%';
+        """
 
 
-    #Converted output set to a dict for easy assert further.
-    results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
+        sp_importer._remove_non_existent_tables_from_db()
 
 
-    assert check_amenities_with_op(temp_db_conn)
-    assert check_amenities_without_op(temp_db_conn)
-    assert results[class_test] and type_test in results.values()
+        temp_db_cursor.execute(query_tables)
+        tables_result = temp_db_cursor.fetchall()
+        assert (len(tables_result) == 1 and
+            tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
+        )
 
 
-def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table, 
-                          getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+@pytest.mark.parametrize("should_replace", [(True), (False)])
+def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
+                        placex_table, tokenizer_mock, should_replace):
     """
     """
-        Check that the main import_from_wiki() method is well executed.
+        Check that the main import_phrases() method is well executed.
         It should create the place_classtype table, the place_id and centroid indexes,
         grand access to the web user and executing the SQL functions for amenities.
         It should create the place_classtype table, the place_id and centroid indexes,
         grand access to the web user and executing the SQL functions for amenities.
+        It should also update the database well by deleting or preserving existing entries
+        of the database.
     """
     """
-    monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
-    special_phrases_importer.import_from_wiki(['en'])
+    #Add some data to the database before execution in order to test
+    #what is deleted and what is preserved.
+    with temp_db_conn.cursor() as temp_db_cursor:
+        temp_db_cursor.execute("""
+            CREATE TABLE place_classtype_amenity_animal_shelter();
+            CREATE TABLE place_classtype_wrongclass_wrongtype();""")
+    
+    monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
+                        mock_get_wiki_content)
+
+    tokenizer = tokenizer_mock()
+    sp_importer.import_phrases(tokenizer, should_replace)
+
+    assert len(tokenizer.analyser_cache['special_phrases']) == 18
 
     class_test = 'aerialway'
     type_test = 'zip_line'
 
     class_test = 'aerialway'
     type_test = 'zip_line'
@@ -204,10 +216,41 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
     assert check_table_exist(temp_db_conn, class_test, type_test)
     assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
     assert check_table_exist(temp_db_conn, class_test, type_test)
     assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
     assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
-    assert check_amenities_with_op(temp_db_conn)
-    assert check_amenities_without_op(temp_db_conn)
+    assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
+    if should_replace:
+        assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
+
+    #Format (query, should_return_something_bool) use to easily execute all asserts
+    queries_tests = set()
 
 
-def mock_get_wiki_content(lang):
+    #Used to check that correct place_classtype table already in the datase before is still there.
+    query_existing_table = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema='public'
+        AND table_name = 'place_classtype_amenity_animal_shelter';
+    """
+    queries_tests.add((query_existing_table, True))
+
+    #Used to check that wrong place_classtype table was deleted from the database.
+    query_wrong_table = """
+        SELECT table_name
+        FROM information_schema.tables
+        WHERE table_schema='public'
+        AND table_name = 'place_classtype_wrongclass_wrongtype';
+    """
+    if should_replace:
+        queries_tests.add((query_wrong_table, False))
+
+    with temp_db_conn.cursor() as temp_db_cursor:
+        for query in queries_tests:
+            temp_db_cursor.execute(query[0])
+            if (query[1] == True):
+                assert temp_db_cursor.fetchone()
+            else:
+                assert not temp_db_cursor.fetchone()
+
+def mock_get_wiki_content(self, lang):
     """
         Mock the _get_wiki_content() method to return
         static xml test file content.
     """
         Mock the _get_wiki_content() method to return
         static xml test file content.
@@ -265,30 +308,13 @@ def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
         temp_db_conn.index_exists(index_prefix + 'place_id')
     )
 
         temp_db_conn.index_exists(index_prefix + 'place_id')
     )
 
-def check_amenities_with_op(temp_db_conn):
-    """
-        Check that the test table for the SQL function getorcreate_amenityoperator()
-        contains more than one value (so that the SQL function was call more than one time).
-    """
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM temp_with_operator")
-        return len(temp_db_cursor.fetchall()) > 1
-
-def check_amenities_without_op(temp_db_conn):
-    """
-        Check that the test table for the SQL function getorcreate_amenity()
-        contains more than one value (so that the SQL function was call more than one time).
-    """
-    with temp_db_conn.cursor() as temp_db_cursor:
-        temp_db_cursor.execute("SELECT * FROM temp_without_operator")
-        return len(temp_db_cursor.fetchall()) > 1
-
 @pytest.fixture
 @pytest.fixture
-def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
+def sp_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
     """
     """
-        Return an instance of SpecialPhrasesImporter.
+        Return an instance of SPImporter.
     """
     """
-    return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
+    loader = SPWikiLoader(def_config, ['en'])
+    return SPImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn, loader)
 
 @pytest.fixture
 def temp_phplib_dir_with_migration():
 
 @pytest.fixture
 def temp_phplib_dir_with_migration():
@@ -306,36 +332,7 @@ def temp_phplib_dir_with_migration():
         yield Path(phpdir)
 
 @pytest.fixture
         yield Path(phpdir)
 
 @pytest.fixture
-def make_strandard_name_func(temp_db_cursor):
-    temp_db_cursor.execute("""
-        CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
-        BEGIN
-        RETURN trim(name); --Basically return only the trimed name for the tests
-        END;
-        $$ LANGUAGE plpgsql IMMUTABLE;""")
-        
-@pytest.fixture
-def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
-    temp_db_cursor.execute("""
-        CREATE TABLE temp_without_operator(op TEXT);
-    
-        CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
-                                                    lookup_class text, lookup_type text)
-        RETURNS void as $$
-        BEGIN
-            INSERT INTO temp_without_operator VALUES('no_operator');
-        END;
-        $$ LANGUAGE plpgsql""")
-
-@pytest.fixture
-def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
+def default_phrases(temp_db_cursor):
     temp_db_cursor.execute("""
     temp_db_cursor.execute("""
-        CREATE TABLE temp_with_operator(op TEXT);
-
-        CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
-                                                    lookup_class text, lookup_type text, op text)
-        RETURNS void as $$
-        BEGIN 
-            INSERT INTO temp_with_operator VALUES(op);
-        END;
-        $$ LANGUAGE plpgsql""")
\ No newline at end of file
+        CREATE TABLE place_classtype_testclasstypetable_to_delete();
+        CREATE TABLE place_classtype_testclasstypetable_to_keep();""")