From: AntoJvlt Date: Mon, 17 May 2021 10:40:50 +0000 (+0200) Subject: Added --no-replace command for special phrases importation and added corresponding... X-Git-Tag: v4.0.0~79^2~3 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/8b8dfc46ebee5f78a91685dc83dc9382d21aad0e?ds=sidebyside Added --no-replace command for special phrases importation and added corresponding tests --- diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index 66fb41ed..b20a4101 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -27,6 +27,8 @@ class ImportSpecialPhrases: help='Import special phrases from the OSM wiki to the database.') group.add_argument('--import-from-csv', metavar='FILE', help='Import special phrases from a CSV file.') + group.add_argument('--no-replace', action='store_true', + help='Keep the old phrases and only add the new ones.') @staticmethod def run(args): @@ -51,7 +53,8 @@ class ImportSpecialPhrases: from ..tokenizer import factory as tokenizer_factory tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config) + should_replace = not args.no_replace with connect(args.config.get_libpq_dsn()) as db_connection: SPImporter( args.config, args.phplib_dir, db_connection, loader - ).import_phrases(tokenizer) + ).import_phrases(tokenizer, should_replace) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 065fdb03..e07602d9 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer: # WHERE word_id is null and type = 'postcode'""") - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): """ Replace the search index for special phrases with the new phrases. """ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3]) @@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer: columns=['word', 'word_token', 'class', 'type', 'operator', 'search_name_count']) - if to_delete: + if to_delete and should_replace: psycopg2.extras.execute_values( cur, """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 438a5aff..5bd45c51 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -314,7 +314,7 @@ class LegacyNameAnalyzer: FROM location_postcode) x""") - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): """ Replace the search index for special phrases with the new phrases. """ norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3]) @@ -343,7 +343,7 @@ class LegacyNameAnalyzer: FROM (VALUES %s) as v(name, class, type, op))""", to_add) - if to_delete: + if to_delete and should_replace: psycopg2.extras.execute_values( cur, """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py index e82f721e..ef4e85bd 100644 --- a/nominatim/tools/special_phrases/sp_importer.py +++ b/nominatim/tools/special_phrases/sp_importer.py @@ -23,7 +23,7 @@ LOG = logging.getLogger() class SPImporter(): # pylint: disable-msg=too-many-instance-attributes """ - Class handling the process of special phrases importations into the database. + Class handling the process of special phrases importation into the database. Take a sp loader which load the phrases from an external source. """ @@ -42,10 +42,14 @@ class SPImporter(): #special phrases class/type on the wiki. self.table_phrases_to_delete = set() - def import_phrases(self, tokenizer): + def import_phrases(self, tokenizer, should_replace): """ - Iterate through all specified languages and - extract corresponding special phrases from the wiki. + Iterate through all SpecialPhrases extracted from the + loader and import them into the database. + + If should_replace is set to True only the loaded phrases + will be kept into the database. All other phrases already + in the database will be removed. """ LOG.warning('Special phrases importation starting') self._fetch_existing_place_classtype_tables() @@ -60,11 +64,12 @@ class SPImporter(): class_type_pairs.update(result) self._create_place_classtype_table_and_indexes(class_type_pairs) - self._remove_non_existent_tables_from_db() + if should_replace: + self._remove_non_existent_tables_from_db() self.db_connection.commit() with tokenizer.name_analyzer() as analyzer: - analyzer.update_special_phrases(self.word_phrases) + analyzer.update_special_phrases(self.word_phrases, should_replace) LOG.warning('Import done.') self.statistics_handler.notify_import_done() diff --git a/test/python/dummy_tokenizer.py b/test/python/dummy_tokenizer.py index 6352a644..2e61a245 100644 --- a/test/python/dummy_tokenizer.py +++ b/test/python/dummy_tokenizer.py @@ -54,7 +54,7 @@ class DummyNameAnalyzer: def add_postcodes_from_db(self): pass - def update_special_phrases(self, phrases): + def update_special_phrases(self, phrases, should_replace): self.analyser_cache['special_phrases'] = phrases def add_country_names(self, code, names): diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 9810aee7..1d89ec69 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock, assert bnd_mock.called == do_bnds assert rank_mock.called == do_ranks -def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock): +@pytest.mark.parametrize("no_replace", [(True), (False)]) +def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace): func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases') - call_nominatim('special-phrases', '--import-from-wiki') + if no_replace: + call_nominatim('special-phrases', '--import-from-wiki', '--no-replace') + else: + call_nominatim('special-phrases', '--import-from-wiki') assert func.called == 1 -def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock): +@pytest.mark.parametrize("no_replace", [(True), (False)]) +def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace): func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases') - testdata = Path('__file__') / '..' / '..' / 'testdb' + testdata = SRC_DIR / 'test' / 'testdb' csv_path = str((testdata / 'full_en_phrases_test.csv').resolve()) - call_nominatim('special-phrases', '--import-from-csv', csv_path) + + if no_replace: + call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace') + else: + call_nominatim('special-phrases', '--import-from-csv', csv_path) assert func.called == 1 diff --git a/test/python/test_tokenizer_legacy.py b/test/python/test_tokenizer_legacy.py index c567a4c1..80147172 100644 --- a/test/python/test_tokenizer_legacy.py +++ b/test/python/test_tokenizer_legacy.py @@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor, ("König bei", "amenity", "royal", "near"), ("Könige", "amenity", "royal", "-"), ("strasse", "highway", "primary", "in") - ]) + ], True) assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator FROM word WHERE class != 'place'""") \ @@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor, assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") - analyzer.update_special_phrases([]) + analyzer.update_special_phrases([], True) assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") +def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor, + make_standard_name): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' foo', 'foo', 'amenity', 'prison', 'in'), + (' bar', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + analyzer.update_special_phrases([], False) + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor, make_standard_name): temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) @@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor, ('prison', 'amenity', 'prison', 'in'), ('bar', 'highway', 'road', '-'), ('garden', 'leisure', 'garden', 'near') - ]) + ], True) assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator FROM word WHERE class != 'place'""") \ diff --git a/test/python/test_tokenizer_legacy_icu.py b/test/python/test_tokenizer_legacy_icu.py index 836f15b9..92a83249 100644 --- a/test/python/test_tokenizer_legacy_icu.py +++ b/test/python/test_tokenizer_legacy_icu.py @@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor) ("König bei", "amenity", "royal", "near"), ("Könige", "amenity", "royal", "-"), ("street", "highway", "primary", "in") - ]) + ], True) assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator FROM word WHERE class != 'place'""") \ @@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor): assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") with analyzer() as a: - a.update_special_phrases([]) + a.update_special_phrases([], True) assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") +def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,): + temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) + VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'), + (' BAR', 'bar', 'highway', 'road', null)""") + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + with analyzer() as a: + a.update_special_phrases([], False) + + assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""") + + def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor): temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator) VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'), @@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor): ('prison', 'amenity', 'prison', 'in'), ('bar', 'highway', 'road', '-'), ('garden', 'leisure', 'garden', 'near') - ]) + ], True) assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator FROM word WHERE class != 'place'""") \ diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index 6f451ade..1b4ab191 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases, tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep' ) +@pytest.mark.parametrize("should_replace", [(True), (False)]) def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, - placex_table, tokenizer_mock): + placex_table, tokenizer_mock, should_replace): """ Check that the main import_phrases() method is well executed. It should create the place_classtype table, the place_id and centroid indexes, @@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, CREATE TABLE place_classtype_wrongclass_wrongtype();""") monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content', - mock_get_wiki_content) + mock_get_wiki_content) tokenizer = tokenizer_mock() - sp_importer.import_phrases(tokenizer) + sp_importer.import_phrases(tokenizer, should_replace) assert len(tokenizer.analyser_cache['special_phrases']) == 18 @@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test) assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test) assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter') - assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type') + if should_replace: + assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type') #Format (query, should_return_something_bool) use to easily execute all asserts queries_tests = set() @@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, WHERE table_schema='public' AND table_name = 'place_classtype_wrongclass_wrongtype'; """ - queries_tests.add((query_wrong_table, False)) + if should_replace: + queries_tests.add((query_wrong_table, False)) with temp_db_conn.cursor() as temp_db_cursor: for query in queries_tests: @@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer, else: assert not temp_db_cursor.fetchone() -def mock_get_wiki_content(lang): +def mock_get_wiki_content(self, lang): """ Mock the _get_wiki_content() method to return static xml test file content. diff --git a/test/python/test_tools_sp_csv_loader.py b/test/python/test_tools_sp_csv_loader.py index 628dfd16..ccc9996d 100644 --- a/test/python/test_tools_sp_csv_loader.py +++ b/test/python/test_tools_sp_csv_loader.py @@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader): phrases = sp_csv_loader.parse_csv() assert check_phrases_content(phrases) - def test_next(sp_csv_loader): """ Test objects returned from the next() method. diff --git a/test/python/test_tools_sp_wiki_loader.py b/test/python/test_tools_sp_wiki_loader.py index ad8e22c3..b4f6d529 100644 --- a/test/python/test_tools_sp_wiki_loader.py +++ b/test/python/test_tools_sp_wiki_loader.py @@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config): mock_get_wiki_content) return loader -def mock_get_wiki_content(lang): +def mock_get_wiki_content(self, lang): """ Mock the _get_wiki_content() method to return static xml test file content.