1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for bringing auxiliary data in the database up-to-date.
10 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
14 from textwrap import dedent
15 from pathlib import Path
17 from psycopg2 import sql as pysql
19 from nominatim_core.config import Configuration
20 from nominatim_core.db.connection import Connection, connect
21 from nominatim_core.db.utils import execute_file, CopyBuffer
22 from nominatim_core.db.sql_preprocessor import SQLPreprocessor
23 from ..version import NOMINATIM_VERSION
25 LOG = logging.getLogger()
27 OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
29 def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
30 entry: Mapping[str, Any]) -> None:
31 """ Converts a single entry from the JSON format for address rank
32 descriptions into a flat format suitable for inserting into a
33 PostgreSQL table and adds these lines to `rows`.
35 countries = entry.get('countries') or (None, )
36 for key, values in entry['tags'].items():
37 for value, ranks in values.items():
38 if isinstance(ranks, list):
39 rank_search, rank_address = ranks
41 rank_search = rank_address = ranks
44 for country in countries:
45 rows.append((country, key, value, rank_search, rank_address))
48 def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
49 """ Replace the `address_levels` table with the contents of `levels'.
51 A new table is created any previously existing table is dropped.
52 The table has the following columns:
53 country, class, type, rank_search, rank_address
55 rows: List[Tuple[Any, ...]] = []
57 _add_address_level_rows_from_entry(rows, entry)
59 with conn.cursor() as cur:
62 cur.execute(pysql.SQL("""CREATE TABLE {} (
63 country_code varchar(2),
67 rank_address SMALLINT)
68 """).format(pysql.Identifier(table)))
70 cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
71 .format(pysql.Identifier(table)), rows)
73 cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
74 .format(pysql.Identifier(table)))
79 def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
80 """ Replace the `address_levels` table with the content as
81 defined in the given configuration. Uses the parameter
82 NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
85 cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
86 load_address_levels(conn, 'address_levels', cfg)
89 def create_functions(conn: Connection, config: Configuration,
90 enable_diff_updates: bool = True,
91 enable_debug: bool = False) -> None:
92 """ (Re)create the PL/pgSQL functions.
94 sql = SQLPreprocessor(conn, config)
96 sql.run_sql_file(conn, 'functions.sql',
97 disable_diff_updates=not enable_diff_updates,
112 # constants needed by PHP scripts: PHP name, config name, type
114 ('Database_DSN', 'DATABASE_DSN', str),
115 ('Default_Language', 'DEFAULT_LANGUAGE', str),
116 ('Log_DB', 'LOG_DB', bool),
117 ('Log_File', 'LOG_FILE', Path),
118 ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
119 ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
120 ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
121 ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
122 ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
123 ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
124 ('MapIcon_URL', 'MAPICON_URL', str),
125 ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool),
129 def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
130 """ Replaces the wikipedia importance tables with new data.
131 The import is run in a single transaction so that the new data
132 is replace seamlessly.
134 Returns 0 if all was well and 1 if the importance file could not
135 be found. Throws an exception if there was an error reading the file.
137 if import_importance_csv(dsn, data_path / 'wikimedia-importance.csv.gz') == 0 \
138 or import_importance_sql(dsn, data_path / 'wikimedia-importance.sql.gz',
145 def import_importance_csv(dsn: str, data_file: Path) -> int:
146 """ Replace wikipedia importance table with data from a
149 The file must be a gzipped CSV and have the following columns:
150 language, title, importance, wikidata_id
152 Other columns may be present but will be ignored.
154 if not data_file.exists():
157 # Only import the first occurance of a wikidata ID.
158 # This keeps indexes and table small.
161 with connect(dsn) as conn:
162 with conn.cursor() as cur:
163 cur.drop_table('wikipedia_article')
164 cur.drop_table('wikipedia_redirect')
165 cur.drop_table('wikimedia_importance')
166 cur.execute("""CREATE TABLE wikimedia_importance (
167 language TEXT NOT NULL,
169 importance double precision NOT NULL,
173 with gzip.open(str(data_file), 'rt') as fd, CopyBuffer() as buf:
174 for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
175 wd_id = int(row['wikidata_id'][1:])
176 buf.add(row['language'], row['title'], row['importance'],
177 None if wd_id in wd_done else row['wikidata_id'])
180 if buf.size() > 10000000:
181 with conn.cursor() as cur:
182 buf.copy_out(cur, 'wikimedia_importance',
183 columns=['language', 'title', 'importance',
186 with conn.cursor() as cur:
187 buf.copy_out(cur, 'wikimedia_importance',
188 columns=['language', 'title', 'importance', 'wikidata'])
190 with conn.cursor() as cur:
191 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_title
192 ON wikimedia_importance (title)""")
193 cur.execute("""CREATE INDEX IF NOT EXISTS idx_wikimedia_importance_wikidata
194 ON wikimedia_importance (wikidata)
195 WHERE wikidata is not null""")
202 def import_importance_sql(dsn: str, data_file: Path, ignore_errors: bool) -> int:
203 """ Replace wikipedia importance table with data from an SQL file.
205 if not data_file.exists():
209 DROP TABLE IF EXISTS "wikipedia_article";
210 DROP TABLE IF EXISTS "wikipedia_redirect";
211 DROP TABLE IF EXISTS "wikipedia_importance";
214 execute_file(dsn, data_file, ignore_errors=ignore_errors,
215 pre_code=pre_code, post_code=post_code)
220 def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
221 """ Replaces the secondary importance raster data table with new data.
223 Returns 0 if all was well and 1 if the raster SQL file could not
224 be found. Throws an exception if there was an error reading the file.
226 datafile = data_path / 'secondary_importance.sql.gz'
227 if not datafile.exists():
230 with connect(dsn) as conn:
231 postgis_version = conn.postgis_version_tuple()
232 if postgis_version[0] < 3:
233 LOG.error('PostGIS version is too old for using OSM raster data.')
236 execute_file(dsn, datafile, ignore_errors=ignore_errors)
240 def recompute_importance(conn: Connection) -> None:
241 """ Recompute wikipedia links and importance for all entries in placex.
242 This is a long-running operations that must not be executed in
243 parallel with updates.
245 with conn.cursor() as cur:
246 cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
248 UPDATE placex SET (wikipedia, importance) =
249 (SELECT wikipedia, importance
250 FROM compute_importance(extratags, country_code, rank_search, centroid))
253 UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
255 WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
256 and (s.wikipedia is null or s.importance < d.importance);
259 cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
263 def _quote_php_variable(var_type: Type[Any], config: Configuration,
264 conf_name: str) -> str:
266 return 'true' if config.get_bool(conf_name) else 'false'
269 return cast(str, getattr(config, conf_name))
271 if not getattr(config, conf_name):
275 value = str(config.get_path(conf_name) or '')
277 value = getattr(config, conf_name)
279 quoted = value.replace("'", "\\'")
283 def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
284 """ Create the website script stubs.
286 if config.lib_dir.php is None:
287 LOG.info("Python frontend does not require website setup. Skipping.")
290 if not basedir.exists():
291 LOG.info('Creating website directory.')
294 assert config.project_dir is not None
295 basedata = dedent(f"""\
298 @define('CONST_Debug', $_GET['debug'] ?? false);
299 @define('CONST_LibDir', '{config.lib_dir.php}');
300 @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
301 @define('CONST_NominatimVersion', '{NOMINATIM_VERSION!s}');
305 for php_name, conf_name, var_type in PHP_CONST_DEFS:
306 varout = _quote_php_variable(var_type, config, conf_name)
308 basedata += f"@define('CONST_{php_name}', {varout});\n"
310 template = "\nrequire_once(CONST_LibDir.'/website/{}');\n"
312 search_name_table_exists = bool(conn and conn.table_exists('search_name'))
314 for script in WEBSITE_SCRIPTS:
315 if not search_name_table_exists and script == 'search.php':
316 out = template.format('reverse-only-search.php')
318 out = template.format(script)
320 (basedir / script).write_text(basedata + out, 'utf-8')
323 def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
324 recursive: bool = True) -> None:
325 """ Mark the given OSM object for reindexing. When 'recursive' is set
326 to True (the default), then all dependent objects are marked for
329 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
330 If the given object does not exist, then nothing happens.
332 assert osm_type in ('N', 'R', 'W')
334 LOG.warning("Invalidating OSM %s %s%s.",
335 OSM_TYPE[osm_type], osm_id,
336 ' and its dependent places' if recursive else '')
338 with conn.cursor() as cur:
340 sql = """SELECT place_force_update(place_id)
341 FROM placex WHERE osm_type = %s and osm_id = %s"""
343 sql = """UPDATE placex SET indexed_status = 2
344 WHERE osm_type = %s and osm_id = %s"""
346 cur.execute(sql, (osm_type, osm_id))