From 78fcabade88c885f2506911201e52de912204b56 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 1 Sep 2021 22:08:39 +0200 Subject: [PATCH] move country name generation to country_info module --- nominatim/clicmd/setup.py | 4 +-- nominatim/tools/country_info.py | 37 +++++++++++++++++++++++ nominatim/tools/database_import.py | 36 ---------------------- test/python/test_cli.py | 8 ++--- test/python/test_tools_database_import.py | 26 ---------------- 5 files changed, 43 insertions(+), 68 deletions(-) diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index e925f665..3d04a57a 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -113,8 +113,8 @@ class SetupAll: database_import.create_search_indices(conn, args.config, drop=args.no_updates) LOG.warning('Create search index for default country names.') - database_import.create_country_names(conn, tokenizer, - args.config.LANGUAGES) + country_info.create_country_names(conn, tokenizer, + args.config.LANGUAGES) conn.commit() if args.no_updates: freeze.drop_update_tables(conn) diff --git a/nominatim/tools/country_info.py b/nominatim/tools/country_info.py index 49cf7286..1b61ae68 100644 --- a/nominatim/tools/country_info.py +++ b/nominatim/tools/country_info.py @@ -1,6 +1,8 @@ """ Functions for importing and managing static country information. """ +import psycopg2.extras + from nominatim.db import utils as db_utils from nominatim.db.connection import connect @@ -16,3 +18,38 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False): with conn.cursor() as cur: cur.execute('UPDATE country_name SET partition = 0') conn.commit() + + +def create_country_names(conn, tokenizer, languages=None): + """ Add default country names to search index. `languages` is a comma- + separated list of language codes as used in OSM. If `languages` is not + empty then only name translations for the given languages are added + to the index. + """ + if languages: + languages = languages.split(',') + + def _include_key(key): + return key == 'name' or \ + (key.startswith('name:') and (not languages or key[5:] in languages)) + + with conn.cursor() as cur: + psycopg2.extras.register_hstore(cur) + cur.execute("""SELECT country_code, name FROM country_name + WHERE country_code is not null""") + + with tokenizer.name_analyzer() as analyzer: + for code, name in cur: + names = {'countrycode': code} + if code == 'gb': + names['short_name'] = 'UK' + if code == 'us': + names['short_name'] = 'United States' + + # country names (only in languages as provided) + if name: + names.update(((k, v) for k, v in name.items() if _include_key(k))) + + analyzer.add_country_names(code, names) + + conn.commit() diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 8a83ad0c..38635c34 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -8,7 +8,6 @@ import subprocess from pathlib import Path import psutil -import psycopg2.extras from psycopg2 import sql as pysql from nominatim.db.connection import connect, get_pg_env @@ -235,38 +234,3 @@ def create_search_indices(conn, config, drop=False): sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'indices.sql', drop=drop) - - -def create_country_names(conn, tokenizer, languages=None): - """ Add default country names to search index. `languages` is a comma- - separated list of language codes as used in OSM. If `languages` is not - empty then only name translations for the given languages are added - to the index. - """ - if languages: - languages = languages.split(',') - - def _include_key(key): - return key == 'name' or \ - (key.startswith('name:') and (not languages or key[5:] in languages)) - - with conn.cursor() as cur: - psycopg2.extras.register_hstore(cur) - cur.execute("""SELECT country_code, name FROM country_name - WHERE country_code is not null""") - - with tokenizer.name_analyzer() as analyzer: - for code, name in cur: - names = {'countrycode': code} - if code == 'gb': - names['short_name'] = 'UK' - if code == 'us': - names['short_name'] = 'United States' - - # country names (only in languages as provided) - if name: - names.update(((k, v) for k, v in name.items() if _include_key(k))) - - analyzer.add_country_names(code, names) - - conn.commit() diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 7c56fb1c..c83ee3dd 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -180,7 +180,7 @@ class TestCliWithDb: mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'), mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), @@ -204,7 +204,7 @@ class TestCliWithDb: mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), mock_func_factory(nominatim.tools.database_import, 'load_data'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), @@ -222,7 +222,7 @@ class TestCliWithDb: temp_db_conn): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.country_info, 'create_country_names'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') @@ -243,7 +243,7 @@ class TestCliWithDb: def test_import_continue_postprocess(self, mock_func_factory): mocks = [ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), - mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.country_info, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') ] diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py index 9c38c162..eda88903 100644 --- a/test/python/test_tools_database_import.py +++ b/test/python/test_tools_database_import.py @@ -156,29 +156,3 @@ def test_load_data(dsn, place_row, placex_table, osmline_table, assert temp_db_cursor.table_rows('placex') == 30 assert temp_db_cursor.table_rows('location_property_osmline') == 1 - - -@pytest.mark.parametrize("languages", (None, ' fr,en')) -def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor, - table_factory, tokenizer_mock, languages): - - table_factory('country_name', 'country_code varchar(2), name hstore', - content=(('us', '"name"=>"us1","name:af"=>"us2"'), - ('fr', '"name"=>"Fra", "name:en"=>"Fren"'))) - - assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2 - - tokenizer = tokenizer_mock() - - database_import.create_country_names(temp_db_conn, tokenizer, languages) - - assert len(tokenizer.analyser_cache['countries']) == 2 - - result_set = {k: set(v.values()) for k, v in tokenizer.analyser_cache['countries']} - - if languages: - assert result_set == {'us' : set(('us', 'us1', 'United States')), - 'fr' : set(('fr', 'Fra', 'Fren'))} - else: - assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')), - 'fr' : set(('fr', 'Fra', 'Fren'))} -- 2.39.5