]> git.openstreetmap.org Git - nominatim.git/blob - test/python/data/test_country_info.py
recreate word table when refreshing counts
[nominatim.git] / test / python / data / test_country_info.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for function that handle country properties.
9 """
10 from textwrap import dedent
11 import pytest
12
13 from nominatim.data import country_info
14
15 @pytest.fixture
16 def loaded_country(def_config):
17     country_info.setup_country_config(def_config)
18
19
20 @pytest.fixture
21 def env_with_country_config(project_env):
22
23     def _mk_config(cfg):
24         (project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg))
25
26         return project_env
27
28     return _mk_config
29
30
31 @pytest.mark.parametrize("no_partitions", (True, False))
32 def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cursor,
33                               loaded_country, no_partitions):
34     country_info.setup_country_tables(dsn, src_dir / 'data', no_partitions)
35
36     assert temp_db_cursor.table_exists('country_name')
37     assert temp_db_cursor.table_rows('country_name') == \
38         temp_db_cursor.scalar(
39             'SELECT count(DISTINCT country_code) FROM country_name')
40
41     partitions = temp_db_cursor.row_set(
42         "SELECT DISTINCT partition FROM country_name")
43     if no_partitions:
44         assert partitions == {(0, )}
45     else:
46         assert len(partitions) > 10
47
48     assert temp_db_cursor.table_exists('country_osm_grid')
49     assert temp_db_cursor.table_rows('country_osm_grid') > 100
50
51
52 @pytest.mark.parametrize("languages", (None, ['fr', 'en']))
53 def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
54                               table_factory, tokenizer_mock, languages, loaded_country):
55
56     table_factory('country_name', 'country_code varchar(2), name hstore',
57                   content=(('us', '"name"=>"us1","name:af"=>"us2"'),
58                            ('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
59
60     assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
61
62     tokenizer = tokenizer_mock()
63
64     country_info.create_country_names(temp_db_conn, tokenizer, languages)
65
66     assert len(tokenizer.analyser_cache['countries']) == 2
67
68     result_set = {k: set(v.values())
69                   for k, v in tokenizer.analyser_cache['countries']}
70
71     if languages:
72         assert result_set == {'us': set(('us', 'us1')),
73                               'fr': set(('fr', 'Fra', 'Fren'))}
74     else:
75         assert result_set == {'us': set(('us', 'us1', 'us2')),
76                               'fr': set(('fr', 'Fra', 'Fren'))}
77
78
79 def test_setup_country_names_prefixes(env_with_country_config):
80     config = env_with_country_config("""\
81                                      es:
82                                        names:
83                                          name:
84                                            en: Spain
85                                            de: Spanien
86                                            default: Espagñe
87                                      us:
88                                        names:
89                                          short_name:
90                                            default: USA
91                                          name:
92                                            default: United States
93                                            en: United States
94                                      """)
95     info = country_info._CountryInfo()
96     info.load(config)
97
98     assert info.get('es')['names'] == {"name": "Espagñe",
99                                        "name:en": "Spain",
100                                        "name:de": "Spanien"}
101     assert info.get('us')['names'] == {"name": "United States",
102                                        "name:en": "United States",
103                                        "short_name": "USA"}
104     assert 'names' not in info.get('xx')
105
106
107 def test_setup_country_config_languages_not_loaded(env_with_country_config):
108     config = env_with_country_config("""\
109                                      de:
110                                          partition: 3
111                                          names:
112                                              name:
113                                                  default: Deutschland
114                                      """)
115     info = country_info._CountryInfo()
116     info.load(config)
117     assert dict(info.items()) == {'de': {'partition': 3,
118                                   'languages': [],
119                                   'names': {'name': 'Deutschland'}}}
120
121
122 def test_setup_country_config_name_not_loaded(env_with_country_config):
123     config = env_with_country_config("""\
124                                      de:
125                                          partition: 3
126                                          languages: de
127                                          names:
128                                      """)
129
130     info = country_info._CountryInfo()
131     info.load(config)
132
133     assert dict(info.items()) == {'de': {'partition': 3,
134                                          'languages': ['de'],
135                                          'names': {}
136                                  }}
137
138
139 def test_setup_country_config_names_not_loaded(env_with_country_config):
140     config = env_with_country_config("""
141                                      de:
142                                          partition: 3
143                                          languages: de
144                                      """)
145
146     info = country_info._CountryInfo()
147     info.load(config)
148
149     assert dict(info.items()) == {'de': {'partition': 3,
150                                          'languages': ['de'],
151                                          'names': {}
152                                  }}
153
154
155 def test_setup_country_config_special_character(env_with_country_config):
156     config = env_with_country_config("""
157                                      bq:
158                                          partition: 250
159                                          languages: nl
160                                          names: 
161                                              name: 
162                                                  default: "\\N"
163                                      """)
164
165     info = country_info._CountryInfo()
166     info.load(config)
167
168     assert dict(info.items()) == {'bq': {'partition': 250,
169                                          'languages': ['nl'],
170                                          'names': {'name': '\x85'}
171                                  }}