1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for bringing auxiliary data in the database up-to-date.
10 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
14 from textwrap import dedent
15 from pathlib import Path
17 from psycopg2 import sql as pysql
19 from ..config import Configuration
20 from ..db.connection import Connection, connect, postgis_version_tuple,\
21 drop_tables, table_exists
22 from ..db.utils import execute_file, CopyBuffer
23 from ..db.sql_preprocessor import SQLPreprocessor
24 from ..version import NOMINATIM_VERSION
26 LOG = logging.getLogger()
28 OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
30 def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
31 entry: Mapping[str, Any]) -> None:
32 """ Converts a single entry from the JSON format for address rank
33 descriptions into a flat format suitable for inserting into a
34 PostgreSQL table and adds these lines to `rows`.
36 countries = entry.get('countries') or (None, )
37 for key, values in entry['tags'].items():
38 for value, ranks in values.items():
39 if isinstance(ranks, list):
40 rank_search, rank_address = ranks
42 rank_search = rank_address = ranks
45 for country in countries:
46 rows.append((country, key, value, rank_search, rank_address))
49 def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
50 """ Replace the `address_levels` table with the contents of `levels'.
52 A new table is created any previously existing table is dropped.
53 The table has the following columns:
54 country, class, type, rank_search, rank_address
56 rows: List[Tuple[Any, ...]] = []
58 _add_address_level_rows_from_entry(rows, entry)
60 drop_tables(conn, table)
62 with conn.cursor() as cur:
63 cur.execute(pysql.SQL("""CREATE TABLE {} (
64 country_code varchar(2),
68 rank_address SMALLINT)
69 """).format(pysql.Identifier(table)))
71 cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
72 .format(pysql.Identifier(table)), rows)
74 cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
75 .format(pysql.Identifier(table)))
80 def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
81 """ Replace the `address_levels` table with the content as
82 defined in the given configuration. Uses the parameter
83 NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
86 cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
87 load_address_levels(conn, 'address_levels', cfg)
90 def create_functions(conn: Connection, config: Configuration,
91 enable_diff_updates: bool = True,
92 enable_debug: bool = False) -> None:
93 """ (Re)create the PL/pgSQL functions.
95 sql = SQLPreprocessor(conn, config)
97 sql.run_sql_file(conn, 'functions.sql',
98 disable_diff_updates=not enable_diff_updates,
113 # constants needed by PHP scripts: PHP name, config name, type
115 ('Database_DSN', 'DATABASE_DSN', str),
116 ('Default_Language', 'DEFAULT_LANGUAGE', str),
117 ('Log_DB', 'LOG_DB', bool),
118 ('Log_File', 'LOG_FILE', Path),
119 ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
120 ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
121 ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
122 ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
123 ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
124 ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
125 ('MapIcon_URL', 'MAPICON_URL', str),
126 ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool),
130 def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
131 """ Replaces the wikipedia importance tables with new data.
132 The import is run in a single transaction so that the new data
133 is replace seamlessly.
135 Returns 0 if all was well and 1 if the importance file could not
136 be found. Throws an exception if there was an error reading the file.
138 if import_importance_csv(dsn, data_path / 'wikimedia-importance.csv.gz') == 0 \
139 or import_importance_sql(dsn, data_path / 'wikimedia-importance.sql.gz',
146 def import_importance_csv(dsn: str, data_file: Path) -> int:
147 """ Replace wikipedia importance table with data from a
150 The file must be a gzipped CSV and have the following columns:
151 language, title, importance, wikidata_id
153 Other columns may be present but will be ignored.
155 if not data_file.exists():
158 # Only import the first occurence of a wikidata ID.
159 # This keeps indexes and table small.
162 with connect(dsn) as conn:
163 drop_tables(conn, 'wikipedia_article', 'wikipedia_redirect', 'wikimedia_importance')
164 with conn.cursor() as cur:
165 cur.execute("""CREATE TABLE wikimedia_importance (
166 language TEXT NOT NULL,
168 importance double precision NOT NULL,
172 with gzip.open(str(data_file), 'rt') as fd, CopyBuffer() as buf:
173 for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
174 wd_id = int(row['wikidata_id'][1:])
175 buf.add(row['language'], row['title'], row['importance'],
176 None if wd_id in wd_done else row['wikidata_id'])
179 if buf.size() > 10000000:
180 with conn.cursor() as cur:
181 buf.copy_out(cur, 'wikimedia_importance',
182 columns=['language', 'title', 'importance',
185 with conn.cursor() as cur:
186 buf.copy_out(cur, 'wikimedia_importance',
187 columns=['language', 'title', 'importance', 'wikidata'])
189 with conn.cursor() as cur:
190 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_title
191 ON wikimedia_importance (title)""")
192 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_wikidata
193 ON wikimedia_importance (wikidata)
194 WHERE wikidata is not null""")
201 def import_importance_sql(dsn: str, data_file: Path, ignore_errors: bool) -> int:
202 """ Replace wikipedia importance table with data from an SQL file.
204 if not data_file.exists():
208 DROP TABLE IF EXISTS "wikipedia_article";
209 DROP TABLE IF EXISTS "wikipedia_redirect";
210 DROP TABLE IF EXISTS "wikipedia_importance";
213 execute_file(dsn, data_file, ignore_errors=ignore_errors,
214 pre_code=pre_code, post_code=post_code)
219 def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
220 """ Replaces the secondary importance raster data table with new data.
222 Returns 0 if all was well and 1 if the raster SQL file could not
223 be found. Throws an exception if there was an error reading the file.
225 datafile = data_path / 'secondary_importance.sql.gz'
226 if not datafile.exists():
229 with connect(dsn) as conn:
230 postgis_version = postgis_version_tuple(conn)
231 if postgis_version[0] < 3:
232 LOG.error('PostGIS version is too old for using OSM raster data.')
235 execute_file(dsn, datafile, ignore_errors=ignore_errors)
239 def recompute_importance(conn: Connection) -> None:
240 """ Recompute wikipedia links and importance for all entries in placex.
241 This is a long-running operations that must not be executed in
242 parallel with updates.
244 with conn.cursor() as cur:
245 cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
247 UPDATE placex SET (wikipedia, importance) =
248 (SELECT wikipedia, importance
249 FROM compute_importance(extratags, country_code, rank_search, centroid))
252 UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
254 WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
255 and (s.wikipedia is null or s.importance < d.importance);
258 cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
262 def _quote_php_variable(var_type: Type[Any], config: Configuration,
263 conf_name: str) -> str:
265 return 'true' if config.get_bool(conf_name) else 'false'
268 return cast(str, getattr(config, conf_name))
270 if not getattr(config, conf_name):
274 value = str(config.get_path(conf_name) or '')
276 value = getattr(config, conf_name)
278 quoted = value.replace("'", "\\'")
282 def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
283 """ Create the website script stubs.
285 if config.lib_dir.php is None:
286 LOG.info("Python frontend does not require website setup. Skipping.")
289 if not basedir.exists():
290 LOG.info('Creating website directory.')
293 assert config.project_dir is not None
294 basedata = dedent(f"""\
297 @define('CONST_Debug', $_GET['debug'] ?? false);
298 @define('CONST_LibDir', '{config.lib_dir.php}');
299 @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
300 @define('CONST_NominatimVersion', '{NOMINATIM_VERSION!s}');
304 for php_name, conf_name, var_type in PHP_CONST_DEFS:
305 varout = _quote_php_variable(var_type, config, conf_name)
307 basedata += f"@define('CONST_{php_name}', {varout});\n"
309 template = "\nrequire_once(CONST_LibDir.'/website/{}');\n"
311 search_name_table_exists = bool(conn and table_exists(conn, 'search_name'))
313 for script in WEBSITE_SCRIPTS:
314 if not search_name_table_exists and script == 'search.php':
315 out = template.format('reverse-only-search.php')
317 out = template.format(script)
319 (basedir / script).write_text(basedata + out, 'utf-8')
322 def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
323 recursive: bool = True) -> None:
324 """ Mark the given OSM object for reindexing. When 'recursive' is set
325 to True (the default), then all dependent objects are marked for
328 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
329 If the given object does not exist, then nothing happens.
331 assert osm_type in ('N', 'R', 'W')
333 LOG.warning("Invalidating OSM %s %s%s.",
334 OSM_TYPE[osm_type], osm_id,
335 ' and its dependent places' if recursive else '')
337 with conn.cursor() as cur:
339 sql = """SELECT place_force_update(place_id)
340 FROM placex WHERE osm_type = %s and osm_id = %s"""
342 sql = """UPDATE placex SET indexed_status = 2
343 WHERE osm_type = %s and osm_id = %s"""
345 cur.execute(sql, (osm_type, osm_id))