1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for bringing auxiliary data in the database up-to-date.
10 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
13 from textwrap import dedent
14 from pathlib import Path
16 from psycopg2 import sql as pysql
18 from nominatim.config import Configuration
19 from nominatim.db.connection import Connection, connect
20 from nominatim.db.utils import execute_file
21 from nominatim.db.sql_preprocessor import SQLPreprocessor
22 from nominatim.version import version_str
24 LOG = logging.getLogger()
26 OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
28 def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
29 entry: Mapping[str, Any]) -> None:
30 """ Converts a single entry from the JSON format for address rank
31 descriptions into a flat format suitable for inserting into a
32 PostgreSQL table and adds these lines to `rows`.
34 countries = entry.get('countries') or (None, )
35 for key, values in entry['tags'].items():
36 for value, ranks in values.items():
37 if isinstance(ranks, list):
38 rank_search, rank_address = ranks
40 rank_search = rank_address = ranks
43 for country in countries:
44 rows.append((country, key, value, rank_search, rank_address))
47 def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
48 """ Replace the `address_levels` table with the contents of `levels'.
50 A new table is created any previously existing table is dropped.
51 The table has the following columns:
52 country, class, type, rank_search, rank_address
54 rows: List[Tuple[Any, ...]] = []
56 _add_address_level_rows_from_entry(rows, entry)
58 with conn.cursor() as cur:
61 cur.execute(pysql.SQL("""CREATE TABLE {} (
62 country_code varchar(2),
66 rank_address SMALLINT)
67 """).format(pysql.Identifier(table)))
69 cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
70 .format(pysql.Identifier(table)), rows)
72 cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
73 .format(pysql.Identifier(table)))
78 def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
79 """ Replace the `address_levels` table with the content as
80 defined in the given configuration. Uses the parameter
81 NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
84 cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
85 load_address_levels(conn, 'address_levels', cfg)
88 def create_functions(conn: Connection, config: Configuration,
89 enable_diff_updates: bool = True,
90 enable_debug: bool = False) -> None:
91 """ (Re)create the PL/pgSQL functions.
93 sql = SQLPreprocessor(conn, config)
95 sql.run_sql_file(conn, 'functions.sql',
96 disable_diff_updates=not enable_diff_updates,
111 # constants needed by PHP scripts: PHP name, config name, type
113 ('Database_DSN', 'DATABASE_DSN', str),
114 ('Default_Language', 'DEFAULT_LANGUAGE', str),
115 ('Log_DB', 'LOG_DB', bool),
116 ('Log_File', 'LOG_FILE', Path),
117 ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
118 ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
119 ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
120 ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
121 ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
122 ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
123 ('MapIcon_URL', 'MAPICON_URL', str),
127 def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
128 """ Replaces the wikipedia importance tables with new data.
129 The import is run in a single transaction so that the new data
130 is replace seamlessly.
132 Returns 0 if all was well and 1 if the importance file could not
133 be found. Throws an exception if there was an error reading the file.
135 datafile = data_path / 'wikimedia-importance.sql.gz'
137 if not datafile.exists():
141 DROP TABLE IF EXISTS "wikipedia_article";
142 DROP TABLE IF EXISTS "wikipedia_redirect"
145 execute_file(dsn, datafile, ignore_errors=ignore_errors,
146 pre_code=pre_code, post_code=post_code)
150 def import_osm_views_geotiff(dsn: str, data_path: Path) -> int:
151 """ Replaces the OSM views table with new data.
153 Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
154 be found. Throws an exception if there was an error reading the file.
156 datafile = data_path / 'osmviews.tiff'
157 if not datafile.exists():
159 with connect(dsn) as conn:
161 postgis_version = conn.postgis_version_tuple()
162 if postgis_version[0] < 3:
165 with conn.cursor() as cur:
166 cur.drop_table("osm_views")
167 cur.drop_table("osm_views_stat")
169 # -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
170 reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
171 -co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
172 subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
175 import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
176 public.osm_views | psql {dsn} > /dev/null"
177 subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
179 cleanup = "rm raster2import.tiff"
180 subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
182 # To normalize osm views data, the max view value is needed
184 CREATE TABLE osm_views_stat AS (
185 SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
186 FROM osm_views CROSS JOIN
187 generate_series(1, {tile_size}) As x
188 CROSS JOIN generate_series(1, {tile_size}) As y
189 WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
195 def recompute_importance(conn: Connection) -> None:
196 """ Recompute wikipedia links and importance for all entries in placex.
197 This is a long-running operations that must not be executed in
198 parallel with updates.
200 with conn.cursor() as cur:
201 cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
203 UPDATE placex SET (wikipedia, importance) =
204 (SELECT wikipedia, importance
205 FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
208 UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
210 WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
211 and (s.wikipedia is null or s.importance < d.importance);
214 cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
218 def _quote_php_variable(var_type: Type[Any], config: Configuration,
219 conf_name: str) -> str:
221 return 'true' if config.get_bool(conf_name) else 'false'
224 return cast(str, getattr(config, conf_name))
226 if not getattr(config, conf_name):
230 value = str(config.get_path(conf_name) or '')
232 value = getattr(config, conf_name)
234 quoted = value.replace("'", "\\'")
238 def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
239 """ Create the website script stubs.
241 if not basedir.exists():
242 LOG.info('Creating website directory.')
245 template = dedent(f"""\
248 @define('CONST_Debug', $_GET['debug'] ?? false);
249 @define('CONST_LibDir', '{config.lib_dir.php}');
250 @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
251 @define('CONST_NominatimVersion', '{version_str()}');
255 for php_name, conf_name, var_type in PHP_CONST_DEFS:
256 varout = _quote_php_variable(var_type, config, conf_name)
258 template += f"@define('CONST_{php_name}', {varout});\n"
260 template += f"\nrequire_once('{config.lib_dir.php}/website/{{}}');\n"
262 search_name_table_exists = bool(conn and conn.table_exists('search_name'))
264 for script in WEBSITE_SCRIPTS:
265 if not search_name_table_exists and script == 'search.php':
266 (basedir / script).write_text(template.format('reverse-only-search.php'), 'utf-8')
268 (basedir / script).write_text(template.format(script), 'utf-8')
271 def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
272 recursive: bool = True) -> None:
273 """ Mark the given OSM object for reindexing. When 'recursive' is set
274 to True (the default), then all dependent objects are marked for
277 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
278 If the given object does not exist, then nothing happens.
280 assert osm_type in ('N', 'R', 'W')
282 LOG.warning("Invalidating OSM %s %s%s.",
283 OSM_TYPE[osm_type], osm_id,
284 ' and its dependent places' if recursive else '')
286 with conn.cursor() as cur:
288 sql = """SELECT place_force_update(place_id)
289 FROM placex WHERE osm_type = %s and osm_id = %s"""
291 sql = """UPDATE placex SET indexed_status = 2
292 WHERE osm_type = %s and osm_id = %s"""
294 cur.execute(sql, (osm_type, osm_id))