]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tools/check_database.py
reduce from 3 to 2 packages
[nominatim.git] / src / nominatim_db / tools / check_database.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from ..config import Configuration
15 from ..db.connection import connect, Connection
16 from ..db import properties
17 from ..errors import UsageError
18 from ..tokenizer import factory as tokenizer_factory
19 from . import freeze
20 from ..version import NOMINATIM_VERSION, parse_version
21
22 CHECKLIST = []
23
24 class CheckState(Enum):
25     """ Possible states of a check. FATAL stops check execution entirely.
26     """
27     OK = 0
28     FAIL = 1
29     FATAL = 2
30     NOT_APPLICABLE = 3
31     WARN = 4
32
33 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
34 CheckFunc = Callable[[Connection, Configuration], CheckResult]
35
36 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
37     """ Decorator for checks. It adds the function to the list of
38         checks to execute and adds the code for printing progress messages.
39     """
40     def decorator(func: CheckFunc) -> CheckFunc:
41         title = (func.__doc__ or '').split('\n', 1)[0].strip()
42
43         def run_check(conn: Connection, config: Configuration) -> CheckState:
44             print(title, end=' ... ')
45             ret = func(conn, config)
46             if isinstance(ret, tuple):
47                 ret, params = ret
48             else:
49                 params = {}
50             if ret == CheckState.OK:
51                 print('\033[92mOK\033[0m')
52             elif ret == CheckState.WARN:
53                 print('\033[93mWARNING\033[0m')
54                 if hint:
55                     print('')
56                     print(dedent(hint.format(**params)))
57             elif ret == CheckState.NOT_APPLICABLE:
58                 print('not applicable')
59             else:
60                 print('\x1B[31mFailed\033[0m')
61                 if hint:
62                     print(dedent(hint.format(**params)))
63             return ret
64
65         CHECKLIST.append(run_check)
66         return run_check
67
68     return decorator
69
70 class _BadConnection:
71
72     def __init__(self, msg: str) -> None:
73         self.msg = msg
74
75     def close(self) -> None:
76         """ Dummy function to provide the implementation.
77         """
78
79 def check_database(config: Configuration) -> int:
80     """ Run a number of checks on the database and return the status.
81     """
82     try:
83         conn = connect(config.get_libpq_dsn()).connection
84     except UsageError as err:
85         conn = _BadConnection(str(err)) # type: ignore[assignment]
86
87     overall_result = 0
88     for check in CHECKLIST:
89         ret = check(conn, config)
90         if ret == CheckState.FATAL:
91             conn.close()
92             return 1
93         if ret in (CheckState.FATAL, CheckState.FAIL):
94             overall_result = 1
95
96     conn.close()
97     return overall_result
98
99
100 def _get_indexes(conn: Connection) -> List[str]:
101     indexes = ['idx_place_addressline_address_place_id',
102                'idx_placex_rank_search',
103                'idx_placex_rank_address',
104                'idx_placex_parent_place_id',
105                'idx_placex_geometry_reverse_lookuppolygon',
106                'idx_placex_geometry_placenode',
107                'idx_osmline_parent_place_id',
108                'idx_osmline_parent_osm_id',
109                'idx_postcode_id',
110                'idx_postcode_postcode'
111               ]
112     if conn.table_exists('search_name'):
113         indexes.extend(('idx_search_name_nameaddress_vector',
114                         'idx_search_name_name_vector',
115                         'idx_search_name_centroid'))
116         if conn.server_version_tuple() >= (11, 0, 0):
117             indexes.extend(('idx_placex_housenumber',
118                             'idx_osmline_parent_osm_id_with_hnr'))
119     if conn.table_exists('place'):
120         indexes.extend(('idx_location_area_country_place_id',
121                         'idx_place_osm_unique',
122                         'idx_placex_rank_address_sector',
123                         'idx_placex_rank_boundaries_sector'))
124
125     return indexes
126
127
128 # CHECK FUNCTIONS
129 #
130 # Functions are executed in the order they appear here.
131
132 @_check(hint="""\
133              {error}
134
135              Hints:
136              * Is the database server started?
137              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
138              * Try connecting to the database with the same settings
139
140              Project directory: {config.project_dir}
141              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
142              """)
143 def check_connection(conn: Any, config: Configuration) -> CheckResult:
144     """ Checking database connection
145     """
146     if isinstance(conn, _BadConnection):
147         return CheckState.FATAL, dict(error=conn.msg, config=config)
148
149     return CheckState.OK
150
151 @_check(hint="""\
152              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
153
154              Hints:
155              * Are you connecting to the correct database?
156              
157              {instruction}
158
159              Check the Migration chapter of the Administration Guide.
160
161              Project directory: {config.project_dir}
162              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
163              """)
164 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
165     """ Checking database_version matches Nominatim software version
166     """
167
168     if conn.table_exists('nominatim_properties'):
169         db_version_str = properties.get_property(conn, 'database_version')
170     else:
171         db_version_str = None
172
173     if db_version_str is not None:
174         db_version = parse_version(db_version_str)
175
176         if db_version == NOMINATIM_VERSION:
177             return CheckState.OK
178
179         instruction = (
180             'Run migrations: nominatim admin --migrate'
181             if db_version < NOMINATIM_VERSION
182             else 'You need to upgrade the Nominatim software.'
183         )
184     else:
185         instruction = ''
186
187     return CheckState.FATAL, dict(db_version=db_version_str,
188                                   nom_version=NOMINATIM_VERSION,
189                                   instruction=instruction,
190                                   config=config)
191
192 @_check(hint="""\
193              placex table not found
194
195              Hints:
196              * Are you connecting to the correct database?
197              * Did the import process finish without errors?
198
199              Project directory: {config.project_dir}
200              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
201              """)
202 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
203     """ Checking for placex table
204     """
205     if conn.table_exists('placex'):
206         return CheckState.OK
207
208     return CheckState.FATAL, dict(config=config)
209
210
211 @_check(hint="""placex table has no data. Did the import finish successfully?""")
212 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
213     """ Checking for placex content
214     """
215     with conn.cursor() as cur:
216         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
217
218     return CheckState.OK if cnt > 0 else CheckState.FATAL
219
220
221 @_check(hint="""{msg}""")
222 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
223     """ Checking that tokenizer works
224     """
225     try:
226         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
227     except UsageError:
228         return CheckState.FAIL, dict(msg="""\
229             Cannot load tokenizer. Did the import finish successfully?""")
230
231     result = tokenizer.check_database(config)
232
233     if result is None:
234         return CheckState.OK
235
236     return CheckState.FAIL, dict(msg=result)
237
238
239 @_check(hint="""\
240              Wikipedia/Wikidata importance tables missing.
241              Quality of search results may be degraded. Reverse geocoding is unaffected.
242              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
243              """)
244 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
245     """ Checking for wikipedia/wikidata data
246     """
247     if not conn.table_exists('search_name') or not conn.table_exists('place'):
248         return CheckState.NOT_APPLICABLE
249
250     with conn.cursor() as cur:
251         if conn.table_exists('wikimedia_importance'):
252             cnt = cur.scalar('SELECT count(*) FROM wikimedia_importance')
253         else:
254             cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
255
256         return CheckState.WARN if cnt == 0 else CheckState.OK
257
258
259 @_check(hint="""\
260              The indexing didn't finish. {count} entries are not yet indexed.
261
262              To index the remaining entries, run:   {index_cmd}
263              """)
264 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
265     """ Checking indexing status
266     """
267     with conn.cursor() as cur:
268         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
269
270     if cnt == 0:
271         return CheckState.OK
272
273     if freeze.is_frozen(conn):
274         index_cmd="""\
275             Database is marked frozen, it cannot be updated.
276             Low counts of unindexed places are fine."""
277         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
278
279     if conn.index_exists('idx_placex_rank_search'):
280         # Likely just an interrupted update.
281         index_cmd = 'nominatim index'
282     else:
283         # Looks like the import process got interrupted.
284         index_cmd = 'nominatim import --continue indexing'
285
286     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
287
288
289 @_check(hint="""\
290              The following indexes are missing:
291                {indexes}
292
293              Rerun the index creation with:   nominatim import --continue db-postprocess
294              """)
295 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
296     """ Checking that database indexes are complete
297     """
298     missing = []
299     for index in _get_indexes(conn):
300         if not conn.index_exists(index):
301             missing.append(index)
302
303     if missing:
304         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
305
306     return CheckState.OK
307
308
309 @_check(hint="""\
310              At least one index is invalid. That can happen, e.g. when index creation was
311              disrupted and later restarted. You should delete the affected indices
312              and recreate them.
313
314              Invalid indexes:
315                {indexes}
316              """)
317 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
318     """ Checking that all database indexes are valid
319     """
320     with conn.cursor() as cur:
321         cur.execute(""" SELECT relname FROM pg_class, pg_index
322                         WHERE pg_index.indisvalid = false
323                         AND pg_index.indexrelid = pg_class.oid""")
324
325         broken = [c[0] for c in cur]
326
327     if broken:
328         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
329
330     return CheckState.OK
331
332
333 @_check(hint="""\
334              {error}
335              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
336              """)
337 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
338     """ Checking TIGER external data table.
339     """
340     if not config.get_bool('USE_US_TIGER_DATA'):
341         return CheckState.NOT_APPLICABLE
342
343     if not conn.table_exists('location_property_tiger'):
344         return CheckState.FAIL, dict(error='TIGER data table not found.')
345
346     with conn.cursor() as cur:
347         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
348             return CheckState.FAIL, dict(error='TIGER data table is empty.')
349
350     return CheckState.OK