1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for importing and managing static country information.
10 from typing import Dict, Any, Iterable, Tuple, Optional, Container, overload
11 from pathlib import Path
13 from ..db import utils as db_utils
14 from ..db.connection import connect, Connection, register_hstore
15 from ..errors import UsageError
16 from ..config import Configuration
17 from ..tokenizer.base import AbstractTokenizer
20 def _flatten_name_list(names: Any) -> Dict[str, str]:
24 if not isinstance(names, dict):
25 raise UsageError("Expected key-value list for names in country_settings.py")
28 for prefix, remain in names.items():
29 if isinstance(remain, str):
31 elif not isinstance(remain, dict):
32 raise UsageError("Entries in names must be key-value lists.")
34 for suffix, name in remain.items():
35 if suffix == 'default':
38 flat[f'{prefix}:{suffix}'] = name
44 """ Caches country-specific properties from the configuration file.
47 def __init__(self) -> None:
48 self._info: Dict[str, Dict[str, Any]] = {}
50 def load(self, config: Configuration) -> None:
51 """ Load the country properties from the configuration files,
52 if they are not loaded yet.
55 self._info = config.load_sub_configuration('country_settings.yaml')
56 for prop in self._info.values():
57 # Convert languages into a list for simpler handling.
58 if 'languages' not in prop:
59 prop['languages'] = []
60 elif not isinstance(prop['languages'], list):
61 prop['languages'] = [x.strip()
62 for x in prop['languages'].split(',')]
63 prop['names'] = _flatten_name_list(prop.get('names'))
65 def items(self) -> Iterable[Tuple[str, Dict[str, Any]]]:
66 """ Return tuples of (country_code, property dict) as iterable.
68 return self._info.items()
70 def get(self, country_code: str) -> Dict[str, Any]:
71 """ Get country information for the country with the given country code.
73 return self._info.get(country_code, {})
76 _COUNTRY_INFO = _CountryInfo()
79 def setup_country_config(config: Configuration) -> None:
80 """ Load country properties from the configuration file.
81 Needs to be called before using any other functions in this
84 _COUNTRY_INFO.load(config)
88 def iterate() -> Iterable[Tuple[str, Dict[str, Any]]]:
93 def iterate(prop: str) -> Iterable[Tuple[str, Any]]:
97 def iterate(prop: Optional[str] = None) -> Iterable[Tuple[str, Dict[str, Any]]]:
98 """ Iterate over country code and properties.
100 When `prop` is None, all countries are returned with their complete
103 If `prop` is given, then only countries are returned where the
104 given property is set. The second item of the tuple contains only
105 the content of the given property.
108 return _COUNTRY_INFO.items()
110 return ((c, p[prop]) for c, p in _COUNTRY_INFO.items() if prop in p)
113 def setup_country_tables(dsn: str, sql_dir: Path, ignore_partitions: bool = False) -> None:
114 """ Create and populate the tables with basic static data that provides
115 the background for geocoding. Data is assumed to not yet exist.
117 db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
120 for ccode, props in _COUNTRY_INFO.items():
121 if ccode is not None and props is not None:
122 if ignore_partitions:
125 partition = props.get('partition', 0)
126 lang = props['languages'][0] if len(
127 props['languages']) == 1 else None
129 params.append((ccode, props['names'], lang, partition))
130 with connect(dsn) as conn:
131 register_hstore(conn)
132 with conn.cursor() as cur:
134 """ CREATE TABLE public.country_name (
135 country_code character varying(2),
137 derived_name public.hstore,
138 country_default_language_code text,
142 """ INSERT INTO public.country_name
143 (country_code, name, country_default_language_code, partition)
144 VALUES (%s, %s, %s, %s)
149 def create_country_names(conn: Connection, tokenizer: AbstractTokenizer,
150 languages: Optional[Container[str]] = None) -> None:
151 """ Add default country names to search index. `languages` is a comma-
152 separated list of language codes as used in OSM. If `languages` is not
153 empty then only name translations for the given languages are added
156 def _include_key(key: str) -> bool:
157 return ':' not in key or not languages or \
158 key[key.index(':') + 1:] in languages
160 register_hstore(conn)
161 with conn.cursor() as cur:
162 cur.execute("""SELECT country_code, name FROM country_name
163 WHERE country_code is not null""")
165 with tokenizer.name_analyzer() as analyzer:
166 for code, name in cur:
167 names = {'countrycode': code}
169 # country names (only in languages as provided)
171 names.update({k: v for k, v in name.items() if _include_key(k)})
173 analyzer.add_country_names(code, names)