1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Collection of functions that check if the database is complete and functional.
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
12 from textwrap import dedent
14 from nominatim.config import Configuration
15 from nominatim.db.connection import connect, Connection
16 from nominatim.errors import UsageError
17 from nominatim.tokenizer import factory as tokenizer_factory
21 class CheckState(Enum):
22 """ Possible states of a check. FATAL stops check execution entirely.
30 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
31 CheckFunc = Callable[[Connection, Configuration], CheckResult]
33 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
34 """ Decorator for checks. It adds the function to the list of
35 checks to execute and adds the code for printing progress messages.
37 def decorator(func: CheckFunc) -> CheckFunc:
38 title = (func.__doc__ or '').split('\n', 1)[0].strip()
40 def run_check(conn: Connection, config: Configuration) -> CheckState:
41 print(title, end=' ... ')
42 ret = func(conn, config)
43 if isinstance(ret, tuple):
47 if ret == CheckState.OK:
48 print('\033[92mOK\033[0m')
49 elif ret == CheckState.WARN:
50 print('\033[93mWARNING\033[0m')
53 print(dedent(hint.format(**params)))
54 elif ret == CheckState.NOT_APPLICABLE:
55 print('not applicable')
57 print('\x1B[31mFailed\033[0m')
59 print(dedent(hint.format(**params)))
62 CHECKLIST.append(run_check)
69 def __init__(self, msg: str) -> None:
72 def close(self) -> None:
73 """ Dummy function to provide the implementation.
76 def check_database(config: Configuration) -> int:
77 """ Run a number of checks on the database and return the status.
80 conn = connect(config.get_libpq_dsn()).connection
81 except UsageError as err:
82 conn = _BadConnection(str(err)) # type: ignore[assignment]
85 for check in CHECKLIST:
86 ret = check(conn, config)
87 if ret == CheckState.FATAL:
90 if ret in (CheckState.FATAL, CheckState.FAIL):
97 def _get_indexes(conn: Connection) -> List[str]:
98 indexes = ['idx_place_addressline_address_place_id',
99 'idx_placex_rank_search',
100 'idx_placex_rank_address',
101 'idx_placex_parent_place_id',
102 'idx_placex_geometry_reverse_lookuppolygon',
103 'idx_placex_geometry_placenode',
104 'idx_osmline_parent_place_id',
105 'idx_osmline_parent_osm_id',
107 'idx_postcode_postcode'
109 if conn.table_exists('search_name'):
110 indexes.extend(('idx_search_name_nameaddress_vector',
111 'idx_search_name_name_vector',
112 'idx_search_name_centroid'))
113 if conn.server_version_tuple() >= (11, 0, 0):
114 indexes.extend(('idx_placex_housenumber',
115 'idx_osmline_parent_osm_id_with_hnr'))
116 if conn.table_exists('place'):
117 indexes.extend(('idx_placex_pendingsector',
118 'idx_location_area_country_place_id',
119 'idx_place_osm_unique'))
126 # Functions are exectured in the order they appear here.
132 * Is the database server started?
133 * Check the NOMINATIM_DATABASE_DSN variable in your local .env
134 * Try connecting to the database with the same settings
136 Project directory: {config.project_dir}
137 Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
139 def check_connection(conn: Any, config: Configuration) -> CheckResult:
140 """ Checking database connection
142 if isinstance(conn, _BadConnection):
143 return CheckState.FATAL, dict(error=conn.msg, config=config)
148 placex table not found
151 * Are you connecting to the right database?
152 * Did the import process finish without errors?
154 Project directory: {config.project_dir}
155 Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
157 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
158 """ Checking for placex table
160 if conn.table_exists('placex'):
163 return CheckState.FATAL, dict(config=config)
166 @_check(hint="""placex table has no data. Did the import finish successfully?""")
167 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
168 """ Checking for placex content
170 with conn.cursor() as cur:
171 cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
173 return CheckState.OK if cnt > 0 else CheckState.FATAL
176 @_check(hint="""{msg}""")
177 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
178 """ Checking that tokenizer works
181 tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
183 return CheckState.FAIL, dict(msg="""\
184 Cannot load tokenizer. Did the import finish successfully?""")
186 result = tokenizer.check_database(config)
191 return CheckState.FAIL, dict(msg=result)
195 Wikipedia/Wikidata importance tables missing.
196 Quality of search results may be degraded. Reverse geocoding is unaffected.
197 See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
199 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
200 """ Checking for wikipedia/wikidata data
202 if not conn.table_exists('search_name'):
203 return CheckState.NOT_APPLICABLE
205 with conn.cursor() as cur:
206 cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
208 return CheckState.WARN if cnt == 0 else CheckState.OK
212 The indexing didn't finish. {count} entries are not yet indexed.
214 To index the remaining entries, run: {index_cmd}
216 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
217 """ Checking indexing status
219 with conn.cursor() as cur:
220 cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
225 if conn.index_exists('idx_placex_rank_search'):
226 # Likely just an interrupted update.
227 index_cmd = 'nominatim index'
229 # Looks like the import process got interrupted.
230 index_cmd = 'nominatim import --continue indexing'
232 return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
236 The following indexes are missing:
239 Rerun the index creation with: nominatim import --continue db-postprocess
241 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
242 """ Checking that database indexes are complete
245 for index in _get_indexes(conn):
246 if not conn.index_exists(index):
247 missing.append(index)
250 return CheckState.FAIL, dict(indexes='\n '.join(missing))
256 At least one index is invalid. That can happen, e.g. when index creation was
257 disrupted and later restarted. You should delete the affected indices
263 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
264 """ Checking that all database indexes are valid
266 with conn.cursor() as cur:
267 cur.execute(""" SELECT relname FROM pg_class, pg_index
268 WHERE pg_index.indisvalid = false
269 AND pg_index.indexrelid = pg_class.oid""")
271 broken = [c[0] for c in cur]
274 return CheckState.FAIL, dict(indexes='\n '.join(broken))
281 Run TIGER import again: nominatim add-data --tiger-data <DIR>
283 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
284 """ Checking TIGER external data table.
286 if not config.get_bool('USE_US_TIGER_DATA'):
287 return CheckState.NOT_APPLICABLE
289 if not conn.table_exists('location_property_tiger'):
290 return CheckState.FAIL, dict(error='TIGER data table not found.')
292 with conn.cursor() as cur:
293 if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
294 return CheckState.FAIL, dict(error='TIGER data table is empty.')