1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for bringing auxiliary data in the database up-to-date.
10 from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
12 from textwrap import dedent
13 from pathlib import Path
15 from psycopg2 import sql as pysql
17 from nominatim.config import Configuration
18 from nominatim.db.connection import Connection, connect
19 from nominatim.db.utils import execute_file
20 from nominatim.db.sql_preprocessor import SQLPreprocessor
21 from nominatim.version import NOMINATIM_VERSION
23 LOG = logging.getLogger()
25 OSM_TYPE = {'N': 'node', 'W': 'way', 'R': 'relation'}
27 def _add_address_level_rows_from_entry(rows: MutableSequence[Tuple[Any, ...]],
28 entry: Mapping[str, Any]) -> None:
29 """ Converts a single entry from the JSON format for address rank
30 descriptions into a flat format suitable for inserting into a
31 PostgreSQL table and adds these lines to `rows`.
33 countries = entry.get('countries') or (None, )
34 for key, values in entry['tags'].items():
35 for value, ranks in values.items():
36 if isinstance(ranks, list):
37 rank_search, rank_address = ranks
39 rank_search = rank_address = ranks
42 for country in countries:
43 rows.append((country, key, value, rank_search, rank_address))
46 def load_address_levels(conn: Connection, table: str, levels: Sequence[Mapping[str, Any]]) -> None:
47 """ Replace the `address_levels` table with the contents of `levels'.
49 A new table is created any previously existing table is dropped.
50 The table has the following columns:
51 country, class, type, rank_search, rank_address
53 rows: List[Tuple[Any, ...]] = []
55 _add_address_level_rows_from_entry(rows, entry)
57 with conn.cursor() as cur:
60 cur.execute(pysql.SQL("""CREATE TABLE {} (
61 country_code varchar(2),
65 rank_address SMALLINT)
66 """).format(pysql.Identifier(table)))
68 cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
69 .format(pysql.Identifier(table)), rows)
71 cur.execute(pysql.SQL('CREATE UNIQUE INDEX ON {} (country_code, class, type)')
72 .format(pysql.Identifier(table)))
77 def load_address_levels_from_config(conn: Connection, config: Configuration) -> None:
78 """ Replace the `address_levels` table with the content as
79 defined in the given configuration. Uses the parameter
80 NOMINATIM_ADDRESS_LEVEL_CONFIG to determine the location of the
83 cfg = config.load_sub_configuration('', config='ADDRESS_LEVEL_CONFIG')
84 load_address_levels(conn, 'address_levels', cfg)
87 def create_functions(conn: Connection, config: Configuration,
88 enable_diff_updates: bool = True,
89 enable_debug: bool = False) -> None:
90 """ (Re)create the PL/pgSQL functions.
92 sql = SQLPreprocessor(conn, config)
94 sql.run_sql_file(conn, 'functions.sql',
95 disable_diff_updates=not enable_diff_updates,
110 # constants needed by PHP scripts: PHP name, config name, type
112 ('Database_DSN', 'DATABASE_DSN', str),
113 ('Default_Language', 'DEFAULT_LANGUAGE', str),
114 ('Log_DB', 'LOG_DB', bool),
115 ('Log_File', 'LOG_FILE', Path),
116 ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool),
117 ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int),
118 ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int),
119 ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool),
120 ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
121 ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
122 ('MapIcon_URL', 'MAPICON_URL', str),
126 def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
127 """ Replaces the wikipedia importance tables with new data.
128 The import is run in a single transaction so that the new data
129 is replace seamlessly.
131 Returns 0 if all was well and 1 if the importance file could not
132 be found. Throws an exception if there was an error reading the file.
134 datafile = data_path / 'wikimedia-importance.sql.gz'
136 if not datafile.exists():
140 DROP TABLE IF EXISTS "wikipedia_article";
141 DROP TABLE IF EXISTS "wikipedia_redirect"
144 execute_file(dsn, datafile, ignore_errors=ignore_errors,
145 pre_code=pre_code, post_code=post_code)
149 def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
150 """ Replaces the secondary importance raster data table with new data.
152 Returns 0 if all was well and 1 if the raster SQL file could not
153 be found. Throws an exception if there was an error reading the file.
155 datafile = data_path / 'secondary_importance.sql.gz'
156 if not datafile.exists():
159 with connect(dsn) as conn:
160 postgis_version = conn.postgis_version_tuple()
161 if postgis_version[0] < 3:
162 LOG.error('PostGIS version is too old for using OSM raster data.')
165 execute_file(dsn, datafile, ignore_errors=ignore_errors)
169 def recompute_importance(conn: Connection) -> None:
170 """ Recompute wikipedia links and importance for all entries in placex.
171 This is a long-running operations that must not be executed in
172 parallel with updates.
174 with conn.cursor() as cur:
175 cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
177 UPDATE placex SET (wikipedia, importance) =
178 (SELECT wikipedia, importance
179 FROM compute_importance(extratags, country_code, rank_search, centroid))
182 UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
184 WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
185 and (s.wikipedia is null or s.importance < d.importance);
188 cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
192 def _quote_php_variable(var_type: Type[Any], config: Configuration,
193 conf_name: str) -> str:
195 return 'true' if config.get_bool(conf_name) else 'false'
198 return cast(str, getattr(config, conf_name))
200 if not getattr(config, conf_name):
204 value = str(config.get_path(conf_name) or '')
206 value = getattr(config, conf_name)
208 quoted = value.replace("'", "\\'")
212 def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
213 """ Create the website script stubs.
215 if not basedir.exists():
216 LOG.info('Creating website directory.')
219 assert config.project_dir is not None
220 basedata = dedent(f"""\
223 @define('CONST_Debug', $_GET['debug'] ?? false);
224 @define('CONST_LibDir', '{config.lib_dir.php}');
225 @define('CONST_TokenizerDir', '{config.project_dir / 'tokenizer'}');
226 @define('CONST_NominatimVersion', '{NOMINATIM_VERSION!s}');
230 for php_name, conf_name, var_type in PHP_CONST_DEFS:
231 varout = _quote_php_variable(var_type, config, conf_name)
233 basedata += f"@define('CONST_{php_name}', {varout});\n"
235 template = "\nrequire_once(CONST_LibDir.'/website/{}');\n"
237 search_name_table_exists = bool(conn and conn.table_exists('search_name'))
239 for script in WEBSITE_SCRIPTS:
240 if not search_name_table_exists and script == 'search.php':
241 out = template.format('reverse-only-search.php')
243 out = template.format(script)
245 (basedir / script).write_text(basedata + out, 'utf-8')
248 def invalidate_osm_object(osm_type: str, osm_id: int, conn: Connection,
249 recursive: bool = True) -> None:
250 """ Mark the given OSM object for reindexing. When 'recursive' is set
251 to True (the default), then all dependent objects are marked for
254 'osm_type' must be on of 'N' (node), 'W' (way) or 'R' (relation).
255 If the given object does not exist, then nothing happens.
257 assert osm_type in ('N', 'R', 'W')
259 LOG.warning("Invalidating OSM %s %s%s.",
260 OSM_TYPE[osm_type], osm_id,
261 ' and its dependent places' if recursive else '')
263 with conn.cursor() as cur:
265 sql = """SELECT place_force_update(place_id)
266 FROM placex WHERE osm_type = %s and osm_id = %s"""
268 sql = """UPDATE placex SET indexed_status = 2
269 WHERE osm_type = %s and osm_id = %s"""
271 cur.execute(sql, (osm_type, osm_id))