]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
ensure consistent country assignments
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection, server_version_tuple,\
16                             index_exists, table_exists, execute_scalar
17 from ..db import properties
18 from ..errors import UsageError
19 from ..tokenizer import factory as tokenizer_factory
20 from . import freeze
21 from ..version import NOMINATIM_VERSION, parse_version
22
23 CHECKLIST = []
24
25 class CheckState(Enum):
26     """ Possible states of a check. FATAL stops check execution entirely.
27     """
28     OK = 0
29     FAIL = 1
30     FATAL = 2
31     NOT_APPLICABLE = 3
32     WARN = 4
33
34 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
35 CheckFunc = Callable[[Connection, Configuration], CheckResult]
36
37 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
38     """ Decorator for checks. It adds the function to the list of
39         checks to execute and adds the code for printing progress messages.
40     """
41     def decorator(func: CheckFunc) -> CheckFunc:
42         title = (func.__doc__ or '').split('\n', 1)[0].strip()
43
44         def run_check(conn: Connection, config: Configuration) -> CheckState:
45             print(title, end=' ... ')
46             ret = func(conn, config)
47             if isinstance(ret, tuple):
48                 ret, params = ret
49             else:
50                 params = {}
51             if ret == CheckState.OK:
52                 print('\033[92mOK\033[0m')
53             elif ret == CheckState.WARN:
54                 print('\033[93mWARNING\033[0m')
55                 if hint:
56                     print('')
57                     print(dedent(hint.format(**params)))
58             elif ret == CheckState.NOT_APPLICABLE:
59                 print('not applicable')
60             else:
61                 print('\x1B[31mFailed\033[0m')
62                 if hint:
63                     print(dedent(hint.format(**params)))
64             return ret
65
66         CHECKLIST.append(run_check)
67         return run_check
68
69     return decorator
70
71 class _BadConnection:
72
73     def __init__(self, msg: str) -> None:
74         self.msg = msg
75
76     def close(self) -> None:
77         """ Dummy function to provide the implementation.
78         """
79
80 def check_database(config: Configuration) -> int:
81     """ Run a number of checks on the database and return the status.
82     """
83     try:
84         conn = connect(config.get_libpq_dsn())
85     except UsageError as err:
86         conn = _BadConnection(str(err)) # type: ignore[assignment]
87
88     overall_result = 0
89     for check in CHECKLIST:
90         ret = check(conn, config)
91         if ret == CheckState.FATAL:
92             conn.close()
93             return 1
94         if ret in (CheckState.FATAL, CheckState.FAIL):
95             overall_result = 1
96
97     conn.close()
98     return overall_result
99
100
101 def _get_indexes(conn: Connection) -> List[str]:
102     indexes = ['idx_place_addressline_address_place_id',
103                'idx_placex_rank_search',
104                'idx_placex_rank_address',
105                'idx_placex_parent_place_id',
106                'idx_placex_geometry_reverse_lookupplacenode',
107                'idx_placex_geometry_reverse_lookuppolygon',
108                'idx_placex_geometry_placenode',
109                'idx_osmline_parent_place_id',
110                'idx_osmline_parent_osm_id',
111                'idx_postcode_id',
112                'idx_postcode_postcode'
113               ]
114
115     # These won't exist if --reverse-only import was used
116     if table_exists(conn, 'search_name'):
117         indexes.extend(('idx_search_name_nameaddress_vector',
118                         'idx_search_name_name_vector',
119                         'idx_search_name_centroid'))
120         if server_version_tuple(conn) >= (11, 0, 0):
121             indexes.extend(('idx_placex_housenumber',
122                             'idx_osmline_parent_osm_id_with_hnr'))
123
124     # These won't exist if --no-updates import was used
125     if table_exists(conn, 'place'):
126         indexes.extend(('idx_location_area_country_place_id',
127                         'idx_place_osm_unique',
128                         'idx_placex_rank_address_sector',
129                         'idx_placex_rank_boundaries_sector'))
130
131     return indexes
132
133
134 # CHECK FUNCTIONS
135 #
136 # Functions are executed in the order they appear here.
137
138 @_check(hint="""\
139              {error}
140
141              Hints:
142              * Is the database server started?
143              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
144              * Try connecting to the database with the same settings
145
146              Project directory: {config.project_dir}
147              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
148              """)
149 def check_connection(conn: Any, config: Configuration) -> CheckResult:
150     """ Checking database connection
151     """
152     if isinstance(conn, _BadConnection):
153         return CheckState.FATAL, dict(error=conn.msg, config=config)
154
155     return CheckState.OK
156
157 @_check(hint="""\
158              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
159
160              Hints:
161              * Are you connecting to the correct database?
162
163              {instruction}
164
165              Check the Migration chapter of the Administration Guide.
166
167              Project directory: {config.project_dir}
168              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
169              """)
170 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
171     """ Checking database_version matches Nominatim software version
172     """
173
174     if table_exists(conn, 'nominatim_properties'):
175         db_version_str = properties.get_property(conn, 'database_version')
176     else:
177         db_version_str = None
178
179     if db_version_str is not None:
180         db_version = parse_version(db_version_str)
181
182         if db_version == NOMINATIM_VERSION:
183             return CheckState.OK
184
185         instruction = (
186             'Run migrations: nominatim admin --migrate'
187             if db_version < NOMINATIM_VERSION
188             else 'You need to upgrade the Nominatim software.'
189         )
190     else:
191         instruction = ''
192
193     return CheckState.FATAL, dict(db_version=db_version_str,
194                                   nom_version=NOMINATIM_VERSION,
195                                   instruction=instruction,
196                                   config=config)
197
198 @_check(hint="""\
199              placex table not found
200
201              Hints:
202              * Are you connecting to the correct database?
203              * Did the import process finish without errors?
204
205              Project directory: {config.project_dir}
206              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
207              """)
208 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
209     """ Checking for placex table
210     """
211     if table_exists(conn, 'placex'):
212         return CheckState.OK
213
214     return CheckState.FATAL, dict(config=config)
215
216
217 @_check(hint="""placex table has no data. Did the import finish successfully?""")
218 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
219     """ Checking for placex content
220     """
221     cnt = execute_scalar(conn, 'SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
222
223     return CheckState.OK if cnt > 0 else CheckState.FATAL
224
225
226 @_check(hint="""{msg}""")
227 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
228     """ Checking that tokenizer works
229     """
230     try:
231         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
232     except UsageError:
233         return CheckState.FAIL, dict(msg="""\
234             Cannot load tokenizer. Did the import finish successfully?""")
235
236     result = tokenizer.check_database(config)
237
238     if result is None:
239         return CheckState.OK
240
241     return CheckState.FAIL, dict(msg=result)
242
243
244 @_check(hint="""\
245              Wikipedia/Wikidata importance tables missing.
246              Quality of search results may be degraded. Reverse geocoding is unaffected.
247              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
248              """)
249 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
250     """ Checking for wikipedia/wikidata data
251     """
252     if not table_exists(conn, 'search_name') or not table_exists(conn, 'place'):
253         return CheckState.NOT_APPLICABLE
254
255     if table_exists(conn, 'wikimedia_importance'):
256         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikimedia_importance')
257     else:
258         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikipedia_article')
259
260     return CheckState.WARN if cnt == 0 else CheckState.OK
261
262
263 @_check(hint="""\
264              The indexing didn't finish. {count} entries are not yet indexed.
265
266              To index the remaining entries, run:   {index_cmd}
267              """)
268 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
269     """ Checking indexing status
270     """
271     cnt = execute_scalar(conn, 'SELECT count(*) FROM placex WHERE indexed_status > 0')
272
273     if cnt == 0:
274         return CheckState.OK
275
276     if freeze.is_frozen(conn):
277         index_cmd="""\
278             Database is marked frozen, it cannot be updated.
279             Low counts of unindexed places are fine."""
280         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
281
282     if index_exists(conn, 'idx_placex_rank_search'):
283         # Likely just an interrupted update.
284         index_cmd = 'nominatim index'
285     else:
286         # Looks like the import process got interrupted.
287         index_cmd = 'nominatim import --continue indexing'
288
289     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
290
291
292 @_check(hint="""\
293              The following indexes are missing:
294                {indexes}
295
296              Rerun the index creation with:   nominatim import --continue db-postprocess
297              """)
298 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
299     """ Checking that database indexes are complete
300     """
301     missing = []
302     for index in _get_indexes(conn):
303         if not index_exists(conn, index):
304             missing.append(index)
305
306     if missing:
307         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
308
309     return CheckState.OK
310
311
312 @_check(hint="""\
313              At least one index is invalid. That can happen, e.g. when index creation was
314              disrupted and later restarted. You should delete the affected indices
315              and recreate them.
316
317              Invalid indexes:
318                {indexes}
319              """)
320 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
321     """ Checking that all database indexes are valid
322     """
323     with conn.cursor() as cur:
324         cur.execute(""" SELECT relname FROM pg_class, pg_index
325                         WHERE pg_index.indisvalid = false
326                         AND pg_index.indexrelid = pg_class.oid""")
327
328         broken = [c[0] for c in cur]
329
330     if broken:
331         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
332
333     return CheckState.OK
334
335
336 @_check(hint="""\
337              {error}
338              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
339              """)
340 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
341     """ Checking TIGER external data table.
342     """
343     if not config.get_bool('USE_US_TIGER_DATA'):
344         return CheckState.NOT_APPLICABLE
345
346     if not table_exists(conn, 'location_property_tiger'):
347         return CheckState.FAIL, dict(error='TIGER data table not found.')
348
349     if execute_scalar(conn, 'SELECT count(*) FROM location_property_tiger') == 0:
350         return CheckState.FAIL, dict(error='TIGER data table is empty.')
351
352     return CheckState.OK