]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
Merge pull request #3586 from lonvia/reduce-lookup-calls
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection, server_version_tuple, \
16                             index_exists, table_exists, execute_scalar
17 from ..db import properties
18 from ..errors import UsageError
19 from ..tokenizer import factory as tokenizer_factory
20 from . import freeze
21 from ..version import NOMINATIM_VERSION, parse_version
22
23 CHECKLIST = []
24
25
26 class CheckState(Enum):
27     """ Possible states of a check. FATAL stops check execution entirely.
28     """
29     OK = 0
30     FAIL = 1
31     FATAL = 2
32     NOT_APPLICABLE = 3
33     WARN = 4
34
35
36 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
37 CheckFunc = Callable[[Connection, Configuration], CheckResult]
38
39
40 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
41     """ Decorator for checks. It adds the function to the list of
42         checks to execute and adds the code for printing progress messages.
43     """
44     def decorator(func: CheckFunc) -> CheckFunc:
45         title = (func.__doc__ or '').split('\n', 1)[0].strip()
46
47         def run_check(conn: Connection, config: Configuration) -> CheckState:
48             print(title, end=' ... ')
49             ret = func(conn, config)
50             if isinstance(ret, tuple):
51                 ret, params = ret
52             else:
53                 params = {}
54             if ret == CheckState.OK:
55                 print('\033[92mOK\033[0m')
56             elif ret == CheckState.WARN:
57                 print('\033[93mWARNING\033[0m')
58                 if hint:
59                     print('')
60                     print(dedent(hint.format(**params)))
61             elif ret == CheckState.NOT_APPLICABLE:
62                 print('not applicable')
63             else:
64                 print('\x1B[31mFailed\033[0m')
65                 if hint:
66                     print(dedent(hint.format(**params)))
67             return ret
68
69         CHECKLIST.append(run_check)
70         return run_check
71
72     return decorator
73
74
75 class _BadConnection:
76
77     def __init__(self, msg: str) -> None:
78         self.msg = msg
79
80     def close(self) -> None:
81         """ Dummy function to provide the implementation.
82         """
83
84
85 def check_database(config: Configuration) -> int:
86     """ Run a number of checks on the database and return the status.
87     """
88     try:
89         conn = connect(config.get_libpq_dsn())
90     except UsageError as err:
91         conn = _BadConnection(str(err))  # type: ignore[assignment]
92
93     overall_result = 0
94     for check in CHECKLIST:
95         ret = check(conn, config)
96         if ret == CheckState.FATAL:
97             conn.close()
98             return 1
99         if ret in (CheckState.FATAL, CheckState.FAIL):
100             overall_result = 1
101
102     conn.close()
103     return overall_result
104
105
106 def _get_indexes(conn: Connection) -> List[str]:
107     indexes = ['idx_place_addressline_address_place_id',
108                'idx_placex_rank_search',
109                'idx_placex_rank_address',
110                'idx_placex_parent_place_id',
111                'idx_placex_geometry_reverse_lookupplacenode',
112                'idx_placex_geometry_reverse_lookuppolygon',
113                'idx_placex_geometry_placenode',
114                'idx_osmline_parent_place_id',
115                'idx_osmline_parent_osm_id',
116                'idx_postcode_id',
117                'idx_postcode_postcode'
118                ]
119
120     # These won't exist if --reverse-only import was used
121     if table_exists(conn, 'search_name'):
122         indexes.extend(('idx_search_name_nameaddress_vector',
123                         'idx_search_name_name_vector',
124                         'idx_search_name_centroid'))
125         if server_version_tuple(conn) >= (11, 0, 0):
126             indexes.extend(('idx_placex_housenumber',
127                             'idx_osmline_parent_osm_id_with_hnr'))
128
129     # These won't exist if --no-updates import was used
130     if table_exists(conn, 'place'):
131         indexes.extend(('idx_location_area_country_place_id',
132                         'idx_place_osm_unique',
133                         'idx_placex_rank_address_sector',
134                         'idx_placex_rank_boundaries_sector'))
135
136     return indexes
137
138
139 # CHECK FUNCTIONS
140 #
141 # Functions are executed in the order they appear here.
142
143 @_check(hint="""\
144              {error}
145
146              Hints:
147              * Is the database server started?
148              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
149              * Try connecting to the database with the same settings
150
151              Project directory: {config.project_dir}
152              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
153              """)
154 def check_connection(conn: Any, config: Configuration) -> CheckResult:
155     """ Checking database connection
156     """
157     if isinstance(conn, _BadConnection):
158         return CheckState.FATAL, dict(error=conn.msg, config=config)
159
160     return CheckState.OK
161
162
163 @_check(hint="""\
164              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
165
166              Hints:
167              * Are you connecting to the correct database?
168
169              {instruction}
170
171              Check the Migration chapter of the Administration Guide.
172
173              Project directory: {config.project_dir}
174              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
175              """)
176 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
177     """ Checking database_version matches Nominatim software version
178     """
179
180     if table_exists(conn, 'nominatim_properties'):
181         db_version_str = properties.get_property(conn, 'database_version')
182     else:
183         db_version_str = None
184
185     if db_version_str is not None:
186         db_version = parse_version(db_version_str)
187
188         if db_version == NOMINATIM_VERSION:
189             return CheckState.OK
190
191         instruction = (
192             'Run migrations: nominatim admin --migrate'
193             if db_version < NOMINATIM_VERSION
194             else 'You need to upgrade the Nominatim software.'
195         )
196     else:
197         instruction = ''
198
199     return CheckState.FATAL, dict(db_version=db_version_str,
200                                   nom_version=NOMINATIM_VERSION,
201                                   instruction=instruction,
202                                   config=config)
203
204
205 @_check(hint="""\
206              placex table not found
207
208              Hints:
209              * Are you connecting to the correct database?
210              * Did the import process finish without errors?
211
212              Project directory: {config.project_dir}
213              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
214              """)
215 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
216     """ Checking for placex table
217     """
218     if table_exists(conn, 'placex'):
219         return CheckState.OK
220
221     return CheckState.FATAL, dict(config=config)
222
223
224 @_check(hint="""placex table has no data. Did the import finish successfully?""")
225 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
226     """ Checking for placex content
227     """
228     cnt = execute_scalar(conn, 'SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
229
230     return CheckState.OK if cnt > 0 else CheckState.FATAL
231
232
233 @_check(hint="""{msg}""")
234 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
235     """ Checking that tokenizer works
236     """
237     try:
238         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
239     except UsageError:
240         return CheckState.FAIL, dict(msg="""\
241             Cannot load tokenizer. Did the import finish successfully?""")
242
243     result = tokenizer.check_database(config)
244
245     if result is None:
246         return CheckState.OK
247
248     return CheckState.FAIL, dict(msg=result)
249
250
251 @_check(hint="""\
252              Wikipedia/Wikidata importance tables missing.
253              Quality of search results may be degraded. Reverse geocoding is unaffected.
254              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
255              """)
256 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
257     """ Checking for wikipedia/wikidata data
258     """
259     if not table_exists(conn, 'search_name') or not table_exists(conn, 'place'):
260         return CheckState.NOT_APPLICABLE
261
262     if table_exists(conn, 'wikimedia_importance'):
263         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikimedia_importance')
264     else:
265         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikipedia_article')
266
267     return CheckState.WARN if cnt == 0 else CheckState.OK
268
269
270 @_check(hint="""\
271              The indexing didn't finish. {count} entries are not yet indexed.
272
273              To index the remaining entries, run:   {index_cmd}
274              """)
275 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
276     """ Checking indexing status
277     """
278     cnt = execute_scalar(conn, 'SELECT count(*) FROM placex WHERE indexed_status > 0')
279
280     if cnt == 0:
281         return CheckState.OK
282
283     if freeze.is_frozen(conn):
284         index_cmd = """\
285             Database is marked frozen, it cannot be updated.
286             Low counts of unindexed places are fine."""
287         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
288
289     if index_exists(conn, 'idx_placex_rank_search'):
290         # Likely just an interrupted update.
291         index_cmd = 'nominatim index'
292     else:
293         # Looks like the import process got interrupted.
294         index_cmd = 'nominatim import --continue indexing'
295
296     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
297
298
299 @_check(hint="""\
300              The following indexes are missing:
301                {indexes}
302
303              Rerun the index creation with:   nominatim import --continue db-postprocess
304              """)
305 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
306     """ Checking that database indexes are complete
307     """
308     missing = []
309     for index in _get_indexes(conn):
310         if not index_exists(conn, index):
311             missing.append(index)
312
313     if missing:
314         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
315
316     return CheckState.OK
317
318
319 @_check(hint="""\
320              At least one index is invalid. That can happen, e.g. when index creation was
321              disrupted and later restarted. You should delete the affected indices
322              and recreate them.
323
324              Invalid indexes:
325                {indexes}
326              """)
327 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
328     """ Checking that all database indexes are valid
329     """
330     with conn.cursor() as cur:
331         cur.execute(""" SELECT relname FROM pg_class, pg_index
332                         WHERE pg_index.indisvalid = false
333                         AND pg_index.indexrelid = pg_class.oid""")
334
335         broken = [c[0] for c in cur]
336
337     if broken:
338         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
339
340     return CheckState.OK
341
342
343 @_check(hint="""\
344              {error}
345              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
346              """)
347 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
348     """ Checking TIGER external data table.
349     """
350     if not config.get_bool('USE_US_TIGER_DATA'):
351         return CheckState.NOT_APPLICABLE
352
353     if not table_exists(conn, 'location_property_tiger'):
354         return CheckState.FAIL, dict(error='TIGER data table not found.')
355
356     if execute_scalar(conn, 'SELECT count(*) FROM location_property_tiger') == 0:
357         return CheckState.FAIL, dict(error='TIGER data table is empty.')
358
359     return CheckState.OK