]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
Merge pull request #3367 from lonvia/address-word-counts
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from nominatim.config import Configuration
15 from nominatim.db.connection import connect, Connection
16 from nominatim.db import properties
17 from nominatim.errors import UsageError
18 from nominatim.tokenizer import factory as tokenizer_factory
19 from nominatim.tools import freeze
20 from nominatim.version import NOMINATIM_VERSION, parse_version
21
22 CHECKLIST = []
23
24 class CheckState(Enum):
25     """ Possible states of a check. FATAL stops check execution entirely.
26     """
27     OK = 0
28     FAIL = 1
29     FATAL = 2
30     NOT_APPLICABLE = 3
31     WARN = 4
32
33 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
34 CheckFunc = Callable[[Connection, Configuration], CheckResult]
35
36 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
37     """ Decorator for checks. It adds the function to the list of
38         checks to execute and adds the code for printing progress messages.
39     """
40     def decorator(func: CheckFunc) -> CheckFunc:
41         title = (func.__doc__ or '').split('\n', 1)[0].strip()
42
43         def run_check(conn: Connection, config: Configuration) -> CheckState:
44             print(title, end=' ... ')
45             ret = func(conn, config)
46             if isinstance(ret, tuple):
47                 ret, params = ret
48             else:
49                 params = {}
50             if ret == CheckState.OK:
51                 print('\033[92mOK\033[0m')
52             elif ret == CheckState.WARN:
53                 print('\033[93mWARNING\033[0m')
54                 if hint:
55                     print('')
56                     print(dedent(hint.format(**params)))
57             elif ret == CheckState.NOT_APPLICABLE:
58                 print('not applicable')
59             else:
60                 print('\x1B[31mFailed\033[0m')
61                 if hint:
62                     print(dedent(hint.format(**params)))
63             return ret
64
65         CHECKLIST.append(run_check)
66         return run_check
67
68     return decorator
69
70 class _BadConnection:
71
72     def __init__(self, msg: str) -> None:
73         self.msg = msg
74
75     def close(self) -> None:
76         """ Dummy function to provide the implementation.
77         """
78
79 def check_database(config: Configuration) -> int:
80     """ Run a number of checks on the database and return the status.
81     """
82     try:
83         conn = connect(config.get_libpq_dsn()).connection
84     except UsageError as err:
85         conn = _BadConnection(str(err)) # type: ignore[assignment]
86
87     overall_result = 0
88     for check in CHECKLIST:
89         ret = check(conn, config)
90         if ret == CheckState.FATAL:
91             conn.close()
92             return 1
93         if ret in (CheckState.FATAL, CheckState.FAIL):
94             overall_result = 1
95
96     conn.close()
97     return overall_result
98
99
100 def _get_indexes(conn: Connection) -> List[str]:
101     indexes = ['idx_place_addressline_address_place_id',
102                'idx_placex_rank_search',
103                'idx_placex_rank_address',
104                'idx_placex_parent_place_id',
105                'idx_placex_geometry_reverse_lookuppolygon',
106                'idx_placex_geometry_placenode',
107                'idx_osmline_parent_place_id',
108                'idx_osmline_parent_osm_id',
109                'idx_postcode_id',
110                'idx_postcode_postcode'
111               ]
112     if conn.table_exists('search_name'):
113         indexes.extend(('idx_search_name_nameaddress_vector',
114                         'idx_search_name_name_vector',
115                         'idx_search_name_centroid'))
116         if conn.server_version_tuple() >= (11, 0, 0):
117             indexes.extend(('idx_placex_housenumber',
118                             'idx_osmline_parent_osm_id_with_hnr'))
119     if conn.table_exists('place'):
120         indexes.extend(('idx_location_area_country_place_id',
121                         'idx_place_osm_unique',
122                         'idx_placex_rank_address_sector',
123                         'idx_placex_rank_boundaries_sector'))
124
125     return indexes
126
127
128 # CHECK FUNCTIONS
129 #
130 # Functions are executed in the order they appear here.
131
132 @_check(hint="""\
133              {error}
134
135              Hints:
136              * Is the database server started?
137              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
138              * Try connecting to the database with the same settings
139
140              Project directory: {config.project_dir}
141              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
142              """)
143 def check_connection(conn: Any, config: Configuration) -> CheckResult:
144     """ Checking database connection
145     """
146     if isinstance(conn, _BadConnection):
147         return CheckState.FATAL, dict(error=conn.msg, config=config)
148
149     return CheckState.OK
150
151 @_check(hint="""\
152              Database version ({db_version}) doesn't match Nominatim version ({nom_version})
153
154              Hints:
155              * Are you connecting to the correct database?
156              
157              {instruction}
158
159              Check the Migration chapter of the Administration Guide.
160
161              Project directory: {config.project_dir}
162              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
163              """)
164 def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
165     """ Checking database_version matches Nominatim software version
166     """
167
168     if conn.table_exists('nominatim_properties'):
169         db_version_str = properties.get_property(conn, 'database_version')
170     else:
171         db_version_str = None
172
173     if db_version_str is not None:
174         db_version = parse_version(db_version_str)
175
176         if db_version == NOMINATIM_VERSION:
177             return CheckState.OK
178
179         instruction = (
180             'Run migrations: nominatim admin --migrate'
181             if db_version < NOMINATIM_VERSION
182             else 'You need to upgrade the Nominatim software.'
183         )
184     else:
185         instruction = ''
186
187     return CheckState.FATAL, dict(db_version=db_version_str,
188                                   nom_version=NOMINATIM_VERSION,
189                                   instruction=instruction,
190                                   config=config)
191
192 @_check(hint="""\
193              placex table not found
194
195              Hints:
196              * Are you connecting to the correct database?
197              * Did the import process finish without errors?
198
199              Project directory: {config.project_dir}
200              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
201              """)
202 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
203     """ Checking for placex table
204     """
205     if conn.table_exists('placex'):
206         return CheckState.OK
207
208     return CheckState.FATAL, dict(config=config)
209
210
211 @_check(hint="""placex table has no data. Did the import finish successfully?""")
212 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
213     """ Checking for placex content
214     """
215     with conn.cursor() as cur:
216         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
217
218     return CheckState.OK if cnt > 0 else CheckState.FATAL
219
220
221 @_check(hint="""{msg}""")
222 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
223     """ Checking that tokenizer works
224     """
225     try:
226         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
227     except UsageError:
228         return CheckState.FAIL, dict(msg="""\
229             Cannot load tokenizer. Did the import finish successfully?""")
230
231     result = tokenizer.check_database(config)
232
233     if result is None:
234         return CheckState.OK
235
236     return CheckState.FAIL, dict(msg=result)
237
238
239 @_check(hint="""\
240              Wikipedia/Wikidata importance tables missing.
241              Quality of search results may be degraded. Reverse geocoding is unaffected.
242              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
243              """)
244 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
245     """ Checking for wikipedia/wikidata data
246     """
247     if not conn.table_exists('search_name') or not conn.table_exists('place'):
248         return CheckState.NOT_APPLICABLE
249
250     with conn.cursor() as cur:
251         cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
252
253         return CheckState.WARN if cnt == 0 else CheckState.OK
254
255
256 @_check(hint="""\
257              The indexing didn't finish. {count} entries are not yet indexed.
258
259              To index the remaining entries, run:   {index_cmd}
260              """)
261 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
262     """ Checking indexing status
263     """
264     with conn.cursor() as cur:
265         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
266
267     if cnt == 0:
268         return CheckState.OK
269
270     if freeze.is_frozen(conn):
271         index_cmd="""\
272             Database is marked frozen, it cannot be updated.
273             Low counts of unindexed places are fine."""
274         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
275
276     if conn.index_exists('idx_placex_rank_search'):
277         # Likely just an interrupted update.
278         index_cmd = 'nominatim index'
279     else:
280         # Looks like the import process got interrupted.
281         index_cmd = 'nominatim import --continue indexing'
282
283     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
284
285
286 @_check(hint="""\
287              The following indexes are missing:
288                {indexes}
289
290              Rerun the index creation with:   nominatim import --continue db-postprocess
291              """)
292 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
293     """ Checking that database indexes are complete
294     """
295     missing = []
296     for index in _get_indexes(conn):
297         if not conn.index_exists(index):
298             missing.append(index)
299
300     if missing:
301         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
302
303     return CheckState.OK
304
305
306 @_check(hint="""\
307              At least one index is invalid. That can happen, e.g. when index creation was
308              disrupted and later restarted. You should delete the affected indices
309              and recreate them.
310
311              Invalid indexes:
312                {indexes}
313              """)
314 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
315     """ Checking that all database indexes are valid
316     """
317     with conn.cursor() as cur:
318         cur.execute(""" SELECT relname FROM pg_class, pg_index
319                         WHERE pg_index.indisvalid = false
320                         AND pg_index.indexrelid = pg_class.oid""")
321
322         broken = [c[0] for c in cur]
323
324     if broken:
325         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
326
327     return CheckState.OK
328
329
330 @_check(hint="""\
331              {error}
332              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
333              """)
334 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
335     """ Checking TIGER external data table.
336     """
337     if not config.get_bool('USE_US_TIGER_DATA'):
338         return CheckState.NOT_APPLICABLE
339
340     if not conn.table_exists('location_property_tiger'):
341         return CheckState.FAIL, dict(error='TIGER data table not found.')
342
343     with conn.cursor() as cur:
344         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
345             return CheckState.FAIL, dict(error='TIGER data table is empty.')
346
347     return CheckState.OK