1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for bringing auxiliary data in the database up-to-date.
10 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
14 from pathlib import Path
16 from psycopg import sql as pysql
18 from ..config import Configuration
19 from ..db.connection import Connection, connect, postgis_version_tuple, \
21 from ..db.utils import execute_file
22 from ..db.sql_preprocessor import SQLPreprocessor
24 LOG = logging.getLogger()
26 OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
29 def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
30 entry: Mapping[str, Any]) -> None:
31 """ Converts a single entry from the JSON format for address rank
32 descriptions into a flat format suitable for inserting into a
33 PostgreSQL table and adds these lines to `rows`.
35 countries = entry.get('countries') or (None, )
36 for key, values in entry['tags'].items():
37 for value, ranks in values.items():
38 if isinstance(ranks, list):
39 rank_search, rank_address = ranks
41 rank_search = rank_address = ranks
44 for country in countries:
45 rows.append((country, key, value, rank_search, rank_address))
48 def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
49 """ Replace the `address_levels` table with the contents of `levels'.
51 A new table is created any previously existing table is dropped.
52 The table has the following columns:
53 country, class, type, rank_search, rank_address
55 rows: List[Tuple[Any, ...]] = []
57 _add_address_level_rows_from_entry(rows, entry)
59 drop_tables(conn, table)
61 with conn.cursor() as cur:
62 cur.execute(pysql.SQL("""CREATE TABLE {} (
63 country_code varchar(2),
67 rank_address SMALLINT)
68 """).format(pysql.Identifier(table)))
70 cur.executemany(pysql.SQL("INSERT INTO {} VALUES (%s, %s, %s, %s, %s)")
71 .format(pysql.Identifier(table)), rows)
73 cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
74 .format(pysql.Identifier(table)))
79 def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
80 """ Replace the `address_levels` table with the content as
81 defined in the given configuration. Uses the parameter
82 NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
85 cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
86 load_address_levels(conn, 'address_levels', cfg)
89 def create_functions(conn: Connection, config: Configuration,
90 enable_diff_updates: bool = True,
91 enable_debug: bool = False) -> None:
92 """ (Re)create the PL/pgSQL functions.
94 sql = SQLPreprocessor(conn, config)
96 sql.run_sql_file(conn, 'functions.sql',
97 disable_diff_updates=not enable_diff_updates,
101 def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
102 """ Replaces the wikipedia importance tables with new data.
103 The import is run in a single transaction so that the new data
104 is replace seamlessly.
106 Returns 0 if all was well and 1 if the importance file could not
107 be found. Throws an exception if there was an error reading the file.
109 if import_importance_csv(dsn, data_path / 'wikimedia-importance.csv.gz') == 0 \
110 or import_importance_sql(dsn, data_path / 'wikimedia-importance.sql.gz',
117 def import_importance_csv(dsn: str, data_file: Path) -> int:
118 """ Replace wikipedia importance table with data from a
121 The file must be a gzipped CSV and have the following columns:
122 language, title, importance, wikidata_id
124 Other columns may be present but will be ignored.
126 if not data_file.exists():
129 # Only import the first occurrence of a wikidata ID.
130 # This keeps indexes and table small.
133 with connect(dsn) as conn:
134 drop_tables(conn, 'wikipedia_article', 'wikipedia_redirect', 'wikimedia_importance')
135 with conn.cursor() as cur:
136 cur.execute("""CREATE TABLE wikimedia_importance (
137 language TEXT NOT NULL,
139 importance double precision NOT NULL,
143 copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
145 with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy:
146 for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
147 wd_id = int(row['wikidata_id'][1:])
148 copy.write_row((row['language'],
151 None if wd_id in wd_done else row['wikidata_id']))
154 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_title
155 ON wikimedia_importance (title)""")
156 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_wikidata
157 ON wikimedia_importance (wikidata)
158 WHERE wikidata is not null""")
165 def import_importance_sql(dsn: str, data_file: Path, ignore_errors: bool) -> int:
166 """ Replace wikipedia importance table with data from an SQL file.
168 if not data_file.exists():
172 DROP TABLE IF EXISTS "wikipedia_article";
173 DROP TABLE IF EXISTS "wikipedia_redirect";
174 DROP TABLE IF EXISTS "wikipedia_importance";
177 execute_file(dsn, data_file, ignore_errors=ignore_errors,
178 pre_code=pre_code, post_code=post_code)
183 def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
184 """ Replaces the secondary importance raster data table with new data.
186 Returns 0 if all was well and 1 if the raster SQL file could not
187 be found. Throws an exception if there was an error reading the file.
189 datafile = data_path / 'secondary_importance.sql.gz'
190 if not datafile.exists():
193 with connect(dsn) as conn:
194 postgis_version = postgis_version_tuple(conn)
195 if postgis_version[0] < 3:
196 LOG.error('PostGIS version is too old for using OSM raster data.')
199 execute_file(dsn, datafile, ignore_errors=ignore_errors)
204 def recompute_importance(conn: Connection) -> None:
205 """ Recompute wikipedia links and importance for all entries in placex.
206 This is a long-running operations that must not be executed in
207 parallel with updates.
209 with conn.cursor() as cur:
210 cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
212 UPDATE placex SET (wikipedia, importance) =
213 (SELECT wikipedia, importance
214 FROM compute_importance(extratags, country_code, rank_search, centroid))
217 UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
219 WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
220 and (s.wikipedia is null or s.importance < d.importance);
223 cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
227 def _quote_php_variable(var_type: Type[Any], config: Configuration,
228 conf_name: str) -> str:
230 return 'true' if config.get_bool(conf_name) else 'false'
233 return cast(str, getattr(config, conf_name))
235 if not getattr(config, conf_name):
239 value = str(config.get_path(conf_name) or '')
241 value = getattr(config, conf_name)
243 quoted = value.replace("'", "\\'")
247 def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
248 recursive: bool = True) -> None:
249 """ Mark the given OSM object for reindexing. When 'recursive' is set
250 to True (the default), then all dependent objects are marked for
253 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
254 If the given object does not exist, then nothing happens.
256 assert osm_type in ('N', 'R', 'W')
258 LOG.warning("Invalidating OSM %s %s%s.",
259 OSM_TYPE[osm_type], osm_id,
260 ' and its dependent places' if recursive else '')
262 with conn.cursor() as cur:
264 sql = """SELECT place_force_update(place_id)
265 FROM placex WHERE osm_type = %s and osm_id = %s"""
267 sql = """UPDATE placex SET indexed_status = 2
268 WHERE osm_type = %s and osm_id = %s"""
270 cur.execute(sql, (osm_type, osm_id))