]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
disable ZIP5+ test for Python frontend
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection, server_version_tuple,\
16                             index_exists, table_exists, execute_scalar
17 from ..db import properties
18 from ..errors import UsageError
19 from ..tokenizer import factory as tokenizer_factory
20 from . import freeze
21 from ..version import NOMINATIM_VERSION, parse_version
22
23 CHECKLIST = []
24
25 class CheckState(Enum):
26     """ Possible states of a check. FATAL stops check execution entirely.
27     """
28     OK = 0
29     FAIL = 1
30     FATAL = 2
31     NOT_APPLICABLE = 3
32     WARN = 4
33
34 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
35 CheckFunc = Callable[[Connection, Configuration], CheckResult]
36
37 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
38     """ Decorator for checks. It adds the function to the list of
39         checks to execute and adds the code for printing progress messages.
40     """
41     def decorator(func: CheckFunc) -> CheckFunc:
42         title = (func.__doc__ or '').split('\n', 1)[0].strip()
43
44         def run_check(conn: Connection, config: Configuration) -> CheckState:
45             print(title, end=' ... ')
46             ret = func(conn, config)
47             if isinstance(ret, tuple):
48                 ret, params = ret
49             else:
50                 params = {}
51             if ret == CheckState.OK:
52                 print('\033[92mOK\033[0m')
53             elif ret == CheckState.WARN:
54                 print('\033[93mWARNING\033[0m')
55                 if hint:
56                     print('')
57                     print(dedent(hint.format(**params)))
58             elif ret == CheckState.NOT_APPLICABLE:
59                 print('not applicable')
60             else:
61                 print('\x1B[31mFailed\033[0m')
62                 if hint:
63                     print(dedent(hint.format(**params)))
64             return ret
65
66         CHECKLIST.append(run_check)
67         return run_check
68
69     return decorator
70
71 class _BadConnection:
72
73     def __init__(self, msg: str) -> None:
74         self.msg = msg
75
76     def close(self) -> None:
77         """ Dummy function to provide the implementation.
78         """
79
80 def check_database(config: Configuration) -> int:
81     """ Run a number of checks on the database and return the status.
82     """
83     try:
84         conn = connect(config.get_libpq_dsn())
85     except UsageError as err:
86         conn = _BadConnection(str(err)) # type: ignore[assignment]
87
88     overall_result = 0
89     for check in CHECKLIST:
90         ret = check(conn, config)
91         if ret == CheckState.FATAL:
92             conn.close()
93             return 1
94         if ret in (CheckState.FATAL, CheckState.FAIL):
95             overall_result = 1
96
97     conn.close()
98     return overall_result
99
100
101 def _get_indexes(conn: Connection) -> List[str]:
102     indexes = ['idx_place_addressline_address_place_id',
103                'idx_placex_rank_search',
104                'idx_placex_rank_address',
105                'idx_placex_parent_place_id',
106                'idx_placex_geometry_reverse_lookuppolygon',
107                'idx_placex_geometry_placenode',
108                'idx_osmline_parent_place_id',
109                'idx_osmline_parent_osm_id',
110                'idx_postcode_id',
111                'idx_postcode_postcode'
112               ]
113     if table_exists(conn, 'search_name'):
114         indexes.extend(('idx_search_name_nameaddress_vector',
115                         'idx_search_name_name_vector',
116                         'idx_search_name_centroid'))
117         if server_version_tuple(conn) >= (11, 0, 0):
118             indexes.extend(('idx_placex_housenumber',
119                             'idx_osmline_parent_osm_id_with_hnr'))
120     if table_exists(conn, 'place'):
121         indexes.extend(('idx_location_area_country_place_id',
122                         'idx_place_osm_unique',
123                         'idx_placex_rank_address_sector',
124                         'idx_placex_rank_boundaries_sector'))
125
126     return indexes
127
128
129 # CHECK FUNCTIONS
130 #
131 # Functions are executed in the order they appear here.
132
133 @_check(hint="""\
134              {error}
135
136              Hints:
137              * Is the database server started?
138              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
139              * Try connecting to the database with the same settings
140
141              Project directory: {config.project_dir}
142              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
143              """)
144 def check_connection(conn: Any, config: Configuration) -> CheckResult:
145     """ Checking database connection
146     """
147     if isinstance(conn, _BadConnection):
148         return CheckState.FATAL, dict(error=conn.msg, config=config)
149
150     return CheckState.OK
151
152 @_check(hint="""\
153              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
154
155              Hints:
156              * Are you connecting to the correct database?
157
158              {instruction}
159
160              Check the Migration chapter of the Administration Guide.
161
162              Project directory: {config.project_dir}
163              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
164              """)
165 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
166     """ Checking database_version matches Nominatim software version
167     """
168
169     if table_exists(conn, 'nominatim_properties'):
170         db_version_str = properties.get_property(conn, 'database_version')
171     else:
172         db_version_str = None
173
174     if db_version_str is not None:
175         db_version = parse_version(db_version_str)
176
177         if db_version == NOMINATIM_VERSION:
178             return CheckState.OK
179
180         instruction = (
181             'Run migrations: nominatim admin --migrate'
182             if db_version < NOMINATIM_VERSION
183             else 'You need to upgrade the Nominatim software.'
184         )
185     else:
186         instruction = ''
187
188     return CheckState.FATAL, dict(db_version=db_version_str,
189                                   nom_version=NOMINATIM_VERSION,
190                                   instruction=instruction,
191                                   config=config)
192
193 @_check(hint="""\
194              placex table not found
195
196              Hints:
197              * Are you connecting to the correct database?
198              * Did the import process finish without errors?
199
200              Project directory: {config.project_dir}
201              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
202              """)
203 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
204     """ Checking for placex table
205     """
206     if table_exists(conn, 'placex'):
207         return CheckState.OK
208
209     return CheckState.FATAL, dict(config=config)
210
211
212 @_check(hint="""placex table has no data. Did the import finish successfully?""")
213 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
214     """ Checking for placex content
215     """
216     cnt = execute_scalar(conn, 'SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
217
218     return CheckState.OK if cnt > 0 else CheckState.FATAL
219
220
221 @_check(hint="""{msg}""")
222 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
223     """ Checking that tokenizer works
224     """
225     try:
226         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
227     except UsageError:
228         return CheckState.FAIL, dict(msg="""\
229             Cannot load tokenizer. Did the import finish successfully?""")
230
231     result = tokenizer.check_database(config)
232
233     if result is None:
234         return CheckState.OK
235
236     return CheckState.FAIL, dict(msg=result)
237
238
239 @_check(hint="""\
240              Wikipedia/Wikidata importance tables missing.
241              Quality of search results may be degraded. Reverse geocoding is unaffected.
242              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
243              """)
244 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
245     """ Checking for wikipedia/wikidata data
246     """
247     if not table_exists(conn, 'search_name') or not table_exists(conn, 'place'):
248         return CheckState.NOT_APPLICABLE
249
250     if table_exists(conn, 'wikimedia_importance'):
251         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikimedia_importance')
252     else:
253         cnt = execute_scalar(conn, 'SELECT count(*) FROM wikipedia_article')
254
255     return CheckState.WARN if cnt == 0 else CheckState.OK
256
257
258 @_check(hint="""\
259              The indexing didn't finish. {count} entries are not yet indexed.
260
261              To index the remaining entries, run:   {index_cmd}
262              """)
263 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
264     """ Checking indexing status
265     """
266     cnt = execute_scalar(conn, 'SELECT count(*) FROM placex WHERE indexed_status > 0')
267
268     if cnt == 0:
269         return CheckState.OK
270
271     if freeze.is_frozen(conn):
272         index_cmd="""\
273             Database is marked frozen, it cannot be updated.
274             Low counts of unindexed places are fine."""
275         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
276
277     if index_exists(conn, 'idx_placex_rank_search'):
278         # Likely just an interrupted update.
279         index_cmd = 'nominatim index'
280     else:
281         # Looks like the import process got interrupted.
282         index_cmd = 'nominatim import --continue indexing'
283
284     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
285
286
287 @_check(hint="""\
288              The following indexes are missing:
289                {indexes}
290
291              Rerun the index creation with:   nominatim import --continue db-postprocess
292              """)
293 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
294     """ Checking that database indexes are complete
295     """
296     missing = []
297     for index in _get_indexes(conn):
298         if not index_exists(conn, index):
299             missing.append(index)
300
301     if missing:
302         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
303
304     return CheckState.OK
305
306
307 @_check(hint="""\
308              At least one index is invalid. That can happen, e.g. when index creation was
309              disrupted and later restarted. You should delete the affected indices
310              and recreate them.
311
312              Invalid indexes:
313                {indexes}
314              """)
315 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
316     """ Checking that all database indexes are valid
317     """
318     with conn.cursor() as cur:
319         cur.execute(""" SELECT relname FROM pg_class, pg_index
320                         WHERE pg_index.indisvalid = false
321                         AND pg_index.indexrelid = pg_class.oid""")
322
323         broken = [c[0] for c in cur]
324
325     if broken:
326         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
327
328     return CheckState.OK
329
330
331 @_check(hint="""\
332              {error}
333              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
334              """)
335 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
336     """ Checking TIGER external data table.
337     """
338     if not config.get_bool('USE_US_TIGER_DATA'):
339         return CheckState.NOT_APPLICABLE
340
341     if not table_exists(conn, 'location_property_tiger'):
342         return CheckState.FAIL, dict(error='TIGER data table not found.')
343
344     if execute_scalar(conn, 'SELECT count(*) FROM location_property_tiger') == 0:
345         return CheckState.FAIL, dict(error='TIGER data table is empty.')
346
347     return CheckState.OK