]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection, \
16                             index_exists, table_exists, execute_scalar
17 from ..db import properties
18 from ..errors import UsageError
19 from ..tokenizer import factory as tokenizer_factory
20 from . import freeze
21 from ..version import NOMINATIM_VERSION, parse_version
22
23 CHECKLIST = []
24
25
26 class CheckState(Enum):
27     """ Possible states of a check. FATAL stops check execution entirely.
28     """
29     OK = 0
30     FAIL = 1
31     FATAL = 2
32     NOT_APPLICABLE = 3
33     WARN = 4
34
35
36 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
37 CheckFunc = Callable[[Connection, Configuration], CheckResult]
38
39
40 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
41     """ Decorator for checks. It adds the function to the list of
42         checks to execute and adds the code for printing progress messages.
43     """
44     def decorator(func: CheckFunc) -> CheckFunc:
45         title = (func.__doc__ or '').split('\n', 1)[0].strip()
46
47         def run_check(conn: Connection, config: Configuration) -> CheckState:
48             print(title, end=' ... ')
49             ret = func(conn, config)
50             if isinstance(ret, tuple):
51                 ret, params = ret
52             else:
53                 params = {}
54             if ret == CheckState.OK:
55                 print('\033[92mOK\033[0m')
56             elif ret == CheckState.WARN:
57                 print('\033[93mWARNING\033[0m')
58                 if hint:
59                     print('')
60                     print(dedent(hint.format(**params)))
61             elif ret == CheckState.NOT_APPLICABLE:
62                 print('not applicable')
63             else:
64                 print('\x1B[31mFailed\033[0m')
65                 if hint:
66                     print(dedent(hint.format(**params)))
67             return ret
68
69         CHECKLIST.append(run_check)
70         return run_check
71
72     return decorator
73
74
75 class _BadConnection:
76
77     def __init__(self, msg: str) -> None:
78         self.msg = msg
79
80     def close(self) -> None:
81         """ Dummy function to provide the implementation.
82         """
83
84
85 def check_database(config: Configuration) -> int:
86     """ Run a number of checks on the database and return the status.
87     """
88     try:
89         conn = connect(config.get_libpq_dsn())
90     except UsageError as err:
91         conn = _BadConnection(str(err))  # type: ignore[assignment]
92
93     overall_result = 0
94     for check in CHECKLIST:
95         ret = check(conn, config)
96         if ret == CheckState.FATAL:
97             conn.close()
98             return 1
99         if ret in (CheckState.FATAL, CheckState.FAIL):
100             overall_result = 1
101
102     conn.close()
103     return overall_result
104
105
106 def _get_indexes(conn: Connection) -> List[str]:
107     indexes = ['idx_place_addressline_address_place_id',
108                'idx_placex_rank_search',
109                'idx_placex_rank_address',
110                'idx_placex_parent_place_id',
111                'idx_placex_geometry_reverse_lookupplacenode',
112                'idx_placex_geometry_reverse_lookuppolygon',
113                'idx_placex_geometry_placenode',
114                'idx_osmline_parent_place_id',
115                'idx_osmline_parent_osm_id',
116                'idx_postcode_id',
117                'idx_postcode_postcode'
118                ]
119
120     # These won't exist if --reverse-only import was used
121     if table_exists(conn, 'search_name'):
122         indexes.extend(('idx_search_name_nameaddress_vector',
123                         'idx_search_name_name_vector',
124                         'idx_search_name_centroid',
125                         'idx_placex_housenumber',
126                         'idx_osmline_parent_osm_id_with_hnr'))
127
128     # These won't exist if --no-updates import was used
129     if table_exists(conn, 'place'):
130         indexes.extend(('idx_location_area_country_place_id',
131                         'idx_place_osm_unique',
132                         'idx_placex_rank_address_sector',
133                         'idx_placex_rank_boundaries_sector'))
134
135     return indexes
136
137
138 # CHECK FUNCTIONS
139 #
140 # Functions are executed in the order they appear here.
141
142 @_check(hint="""\
143              {error}
144
145              Hints:
146              * Is the database server started?
147              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
148              * Try connecting to the database with the same settings
149
150              Project directory: {config.project_dir}
151              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
152              """)
153 def check_connection(conn: Any, config: Configuration) -> CheckResult:
154     """ Checking database connection
155     """
156     if isinstance(conn, _BadConnection):
157         return CheckState.FATAL, dict(error=conn.msg, config=config)
158
159     return CheckState.OK
160
161
162 @_check(hint="""\
163              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
164
165              Hints:
166              * Are you connecting to the correct database?
167
168              {instruction}
169
170              Check the Migration chapter of the Administration Guide.
171
172              Project directory: {config.project_dir}
173              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
174              """)
175 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
176     """ Checking database_version matches Nominatim software version
177     """
178
179     if table_exists(conn, 'nominatim_properties'):
180         db_version_str = properties.get_property(conn, 'database_version')
181     else:
182         db_version_str = None
183
184     if db_version_str is not None:
185         db_version = parse_version(db_version_str)
186
187         if db_version == NOMINATIM_VERSION:
188             return CheckState.OK
189
190         instruction = (
191             'Run migrations: nominatim admin --migrate'
192             if db_version < NOMINATIM_VERSION
193             else 'You need to upgrade the Nominatim software.'
194         )
195     else:
196         instruction = ''
197
198     return CheckState.FATAL, dict(db_version=db_version_str,
199                                   nom_version=NOMINATIM_VERSION,
200                                   instruction=instruction,
201                                   config=config)
202
203
204 @_check(hint="""\
205              placex table not found
206
207              Hints:
208              * Are you connecting to the correct database?
209              * Did the import process finish without errors?
210
211              Project directory: {config.project_dir}
212              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
213              """)
214 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
215     """ Checking for placex table
216     """
217     if table_exists(conn, 'placex'):
218         return CheckState.OK
219
220     return CheckState.FATAL, dict(config=config)
221
222
223 @_check(hint="""placex table has no data. Did the import finish successfully?""")
224 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
225     """ Checking for placex content
226     """
227     cnt = execute_scalar(conn, 'SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
228
229     return CheckState.OK if cnt > 0 else CheckState.FATAL
230
231
232 @_check(hint="""{msg}""")
233 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
234     """ Checking that tokenizer works
235     """
236     try:
237         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
238     except UsageError:
239         return CheckState.FAIL, dict(msg="""\
240             Cannot load tokenizer. Did the import finish successfully?""")
241
242     result = tokenizer.check_database(config)
243
244     if result is None:
245         return CheckState.OK
246
247     return CheckState.FAIL, dict(msg=result)
248
249
250 @_check(hint="""\
251              Wikipedia/Wikidata importance tables missing.
252              Quality of search results may be degraded. Reverse geocoding is unaffected.
253              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
254              """)
255 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
256     """ Checking for wikipedia/wikidata data
257     """
258     if not table_exists(conn, 'search_name') or not table_exists(conn, 'place'):
259         return CheckState.NOT_APPLICABLE
260
261     if table_exists(conn, 'wikimedia_importance'):
262         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikimedia_importance')
263     else:
264         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikipedia_article')
265
266     return CheckState.WARN if cnt == 0 else CheckState.OK
267
268
269 @_check(hint="""\
270              The indexing didn't finish. {count} entries are not yet indexed.
271
272              To index the remaining entries, run:   {index_cmd}
273              """)
274 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
275     """ Checking indexing status
276     """
277     cnt = execute_scalar(conn, 'SELECT count(*) FROM placex WHERE indexed_status > 0')
278
279     if cnt == 0:
280         return CheckState.OK
281
282     if freeze.is_frozen(conn):
283         index_cmd = """\
284             Database is marked frozen, it cannot be updated.
285             Low counts of unindexed places are fine."""
286         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
287
288     if index_exists(conn, 'idx_placex_rank_search'):
289         # Likely just an interrupted update.
290         index_cmd = 'nominatim index'
291     else:
292         # Looks like the import process got interrupted.
293         index_cmd = 'nominatim import --continue indexing'
294
295     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
296
297
298 @_check(hint="""\
299              The following indexes are missing:
300                {indexes}
301
302              Rerun the index creation with:   nominatim import --continue db-postprocess
303              """)
304 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
305     """ Checking that database indexes are complete
306     """
307     missing = []
308     for index in _get_indexes(conn):
309         if not index_exists(conn, index):
310             missing.append(index)
311
312     if missing:
313         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
314
315     return CheckState.OK
316
317
318 @_check(hint="""\
319              At least one index is invalid. That can happen, e.g. when index creation was
320              disrupted and later restarted. You should delete the affected indices
321              and recreate them.
322
323              Invalid indexes:
324                {indexes}
325              """)
326 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
327     """ Checking that all database indexes are valid
328     """
329     with conn.cursor() as cur:
330         cur.execute(""" SELECT relname FROM pg_class, pg_index
331                         WHERE pg_index.indisvalid = false
332                         AND pg_index.indexrelid = pg_class.oid""")
333
334         broken = [c[0] for c in cur]
335
336     if broken:
337         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
338
339     return CheckState.OK
340
341
342 @_check(hint="""\
343              {error}
344              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
345              """)
346 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
347     """ Checking TIGER external data table.
348     """
349     if not config.get_bool('USE_US_TIGER_DATA'):
350         return CheckState.NOT_APPLICABLE
351
352     if not table_exists(conn, 'location_property_tiger'):
353         return CheckState.FAIL, dict(error='TIGER data table not found.')
354
355     if execute_scalar(conn, 'SELECT count(*) FROM location_property_tiger') == 0:
356         return CheckState.FAIL, dict(error='TIGER data table is empty.')
357
358     return CheckState.OK