]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/country_info.py
Merge pull request #2709 from lonvia/less-strict-country-assignment
[nominatim.git] / nominatim / tools / country_info.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Functions for importing and managing static country information.
9 """
10 import psycopg2.extras
11
12 from nominatim.db import utils as db_utils
13 from nominatim.db.connection import connect
14 from nominatim.errors import UsageError
15
16 def _flatten_name_list(names):
17     if names is None:
18         return {}
19
20     if not isinstance(names, dict):
21         raise UsageError("Expected key-value list for names in country_settings.py")
22
23     flat = {}
24     for prefix, remain in names.items():
25         if isinstance(remain, str):
26             flat[prefix] = remain
27         elif not isinstance(remain, dict):
28             raise UsageError("Entries in names must be key-value lists.")
29         else:
30             for suffix, name in remain.items():
31                 if suffix == 'default':
32                     flat[prefix] = name
33                 else:
34                     flat[f'{prefix}:{suffix}'] = name
35
36     return flat
37
38
39
40 class _CountryInfo:
41     """ Caches country-specific properties from the configuration file.
42     """
43
44     def __init__(self):
45         self._info = {}
46
47
48     def load(self, config):
49         """ Load the country properties from the configuration files,
50             if they are not loaded yet.
51         """
52         if not self._info:
53             self._info = config.load_sub_configuration('country_settings.yaml')
54             for prop in self._info.values():
55                 # Convert languages into a list for simpler handling.
56                 if 'languages' not in prop:
57                     prop['languages'] = []
58                 elif not isinstance(prop['languages'], list):
59                     prop['languages'] = [x.strip()
60                                          for x in prop['languages'].split(',')]
61                 prop['names'] = _flatten_name_list(prop.get('names'))
62
63
64     def items(self):
65         """ Return tuples of (country_code, property dict) as iterable.
66         """
67         return self._info.items()
68
69     def get(self, country_code):
70         """ Get country information for the country with the given country code.
71         """
72         return self._info.get(country_code, {})
73
74
75
76 _COUNTRY_INFO = _CountryInfo()
77
78
79 def setup_country_config(config):
80     """ Load country properties from the configuration file.
81         Needs to be called before using any other functions in this
82         file.
83     """
84     _COUNTRY_INFO.load(config)
85
86
87 def iterate():
88     """ Iterate over country code and properties.
89     """
90     return _COUNTRY_INFO.items()
91
92
93 def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
94     """ Create and populate the tables with basic static data that provides
95         the background for geocoding. Data is assumed to not yet exist.
96     """
97     db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
98
99     params = []
100     for ccode, props in _COUNTRY_INFO.items():
101         if ccode is not None and props is not None:
102             if ignore_partitions:
103                 partition = 0
104             else:
105                 partition = props.get('partition')
106             lang = props['languages'][0] if len(
107                 props['languages']) == 1 else None
108
109             params.append((ccode, props['names'], lang, partition))
110     with connect(dsn) as conn:
111         with conn.cursor() as cur:
112             psycopg2.extras.register_hstore(cur)
113             cur.execute(
114                 """ CREATE TABLE public.country_name (
115                         country_code character varying(2),
116                         name public.hstore,
117                         derived_name public.hstore,
118                         country_default_language_code text,
119                         partition integer
120                     ); """)
121             cur.execute_values(
122                 """ INSERT INTO public.country_name
123                     (country_code, name, country_default_language_code, partition) VALUES %s
124                 """, params)
125         conn.commit()
126
127
128 def create_country_names(conn, tokenizer, languages=None):
129     """ Add default country names to search index. `languages` is a comma-
130         separated list of language codes as used in OSM. If `languages` is not
131         empty then only name translations for the given languages are added
132         to the index.
133     """
134     if languages:
135         languages = languages.split(',')
136
137     def _include_key(key):
138         return ':' not in key or not languages or \
139                key[key.index(':') + 1:] in languages
140
141     with conn.cursor() as cur:
142         psycopg2.extras.register_hstore(cur)
143         cur.execute("""SELECT country_code, name FROM country_name
144                        WHERE country_code is not null""")
145
146         with tokenizer.name_analyzer() as analyzer:
147             for code, name in cur:
148                 names = {'countrycode': code}
149
150                 # country names (only in languages as provided)
151                 if name:
152                     names.update({k : v for k, v in name.items() if _include_key(k)})
153
154                 analyzer.add_country_names(code, names)
155
156     conn.commit()