]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
fix handling of unused extra tags
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from nominatim.config import Configuration
15 from nominatim.db.connection import connect, Connection
16 from nominatim.errors import UsageError
17 from nominatim.tokenizer import factory as tokenizer_factory
18
19 CHECKLIST = []
20
21 class CheckState(Enum):
22     """ Possible states of a check. FATAL stops check execution entirely.
23     """
24     OK = 0
25     FAIL = 1
26     FATAL = 2
27     NOT_APPLICABLE = 3
28     WARN = 4
29
30 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
31 CheckFunc = Callable[[Connection, Configuration], CheckResult]
32
33 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
34     """ Decorator for checks. It adds the function to the list of
35         checks to execute and adds the code for printing progress messages.
36     """
37     def decorator(func: CheckFunc) -> CheckFunc:
38         title = (func.__doc__ or '').split('\n', 1)[0].strip()
39
40         def run_check(conn: Connection, config: Configuration) -> CheckState:
41             print(title, end=' ... ')
42             ret = func(conn, config)
43             if isinstance(ret, tuple):
44                 ret, params = ret
45             else:
46                 params = {}
47             if ret == CheckState.OK:
48                 print('\033[92mOK\033[0m')
49             elif ret == CheckState.WARN:
50                 print('\033[93mWARNING\033[0m')
51                 if hint:
52                     print('')
53                     print(dedent(hint.format(**params)))
54             elif ret == CheckState.NOT_APPLICABLE:
55                 print('not applicable')
56             else:
57                 print('\x1B[31mFailed\033[0m')
58                 if hint:
59                     print(dedent(hint.format(**params)))
60             return ret
61
62         CHECKLIST.append(run_check)
63         return run_check
64
65     return decorator
66
67 class _BadConnection:
68
69     def __init__(self, msg: str) -> None:
70         self.msg = msg
71
72     def close(self) -> None:
73         """ Dummy function to provide the implementation.
74         """
75
76 def check_database(config: Configuration) -> int:
77     """ Run a number of checks on the database and return the status.
78     """
79     try:
80         conn = connect(config.get_libpq_dsn()).connection
81     except UsageError as err:
82         conn = _BadConnection(str(err)) # type: ignore[assignment]
83
84     overall_result = 0
85     for check in CHECKLIST:
86         ret = check(conn, config)
87         if ret == CheckState.FATAL:
88             conn.close()
89             return 1
90         if ret in (CheckState.FATAL, CheckState.FAIL):
91             overall_result = 1
92
93     conn.close()
94     return overall_result
95
96
97 def _get_indexes(conn: Connection) -> List[str]:
98     indexes = ['idx_place_addressline_address_place_id',
99                'idx_placex_rank_search',
100                'idx_placex_rank_address',
101                'idx_placex_parent_place_id',
102                'idx_placex_geometry_reverse_lookuppolygon',
103                'idx_placex_geometry_placenode',
104                'idx_osmline_parent_place_id',
105                'idx_osmline_parent_osm_id',
106                'idx_postcode_id',
107                'idx_postcode_postcode'
108               ]
109     if conn.table_exists('search_name'):
110         indexes.extend(('idx_search_name_nameaddress_vector',
111                         'idx_search_name_name_vector',
112                         'idx_search_name_centroid'))
113         if conn.server_version_tuple() >= (11, 0, 0):
114             indexes.extend(('idx_placex_housenumber',
115                             'idx_osmline_parent_osm_id_with_hnr'))
116     if conn.table_exists('place'):
117         indexes.extend(('idx_location_area_country_place_id',
118                         'idx_place_osm_unique',
119                         'idx_placex_rank_address_sector',
120                         'idx_placex_rank_boundaries_sector'))
121
122     return indexes
123
124
125 # CHECK FUNCTIONS
126 #
127 # Functions are exectured in the order they appear here.
128
129 @_check(hint="""\
130              {error}
131
132              Hints:
133              * Is the database server started?
134              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
135              * Try connecting to the database with the same settings
136
137              Project directory: {config.project_dir}
138              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
139              """)
140 def check_connection(conn: Any, config: Configuration) -> CheckResult:
141     """ Checking database connection
142     """
143     if isinstance(conn, _BadConnection):
144         return CheckState.FATAL, dict(error=conn.msg, config=config)
145
146     return CheckState.OK
147
148 @_check(hint="""\
149              placex table not found
150
151              Hints:
152              * Are you connecting to the right database?
153              * Did the import process finish without errors?
154
155              Project directory: {config.project_dir}
156              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
157              """)
158 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
159     """ Checking for placex table
160     """
161     if conn.table_exists('placex'):
162         return CheckState.OK
163
164     return CheckState.FATAL, dict(config=config)
165
166
167 @_check(hint="""placex table has no data. Did the import finish successfully?""")
168 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
169     """ Checking for placex content
170     """
171     with conn.cursor() as cur:
172         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
173
174     return CheckState.OK if cnt > 0 else CheckState.FATAL
175
176
177 @_check(hint="""{msg}""")
178 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
179     """ Checking that tokenizer works
180     """
181     try:
182         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
183     except UsageError:
184         return CheckState.FAIL, dict(msg="""\
185             Cannot load tokenizer. Did the import finish successfully?""")
186
187     result = tokenizer.check_database(config)
188
189     if result is None:
190         return CheckState.OK
191
192     return CheckState.FAIL, dict(msg=result)
193
194
195 @_check(hint="""\
196              Wikipedia/Wikidata importance tables missing.
197              Quality of search results may be degraded. Reverse geocoding is unaffected.
198              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
199              """)
200 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
201     """ Checking for wikipedia/wikidata data
202     """
203     if not conn.table_exists('search_name') or not conn.table_exists('place'):
204         return CheckState.NOT_APPLICABLE
205
206     with conn.cursor() as cur:
207         cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
208
209         return CheckState.WARN if cnt == 0 else CheckState.OK
210
211
212 @_check(hint="""\
213              The indexing didn't finish. {count} entries are not yet indexed.
214
215              To index the remaining entries, run:   {index_cmd}
216              """)
217 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
218     """ Checking indexing status
219     """
220     with conn.cursor() as cur:
221         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
222
223     if cnt == 0:
224         return CheckState.OK
225
226     if conn.index_exists('idx_placex_rank_search'):
227         # Likely just an interrupted update.
228         index_cmd = 'nominatim index'
229     else:
230         # Looks like the import process got interrupted.
231         index_cmd = 'nominatim import --continue indexing'
232
233     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
234
235
236 @_check(hint="""\
237              The following indexes are missing:
238                {indexes}
239
240              Rerun the index creation with:   nominatim import --continue db-postprocess
241              """)
242 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
243     """ Checking that database indexes are complete
244     """
245     missing = []
246     for index in _get_indexes(conn):
247         if not conn.index_exists(index):
248             missing.append(index)
249
250     if missing:
251         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
252
253     return CheckState.OK
254
255
256 @_check(hint="""\
257              At least one index is invalid. That can happen, e.g. when index creation was
258              disrupted and later restarted. You should delete the affected indices
259              and recreate them.
260
261              Invalid indexes:
262                {indexes}
263              """)
264 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
265     """ Checking that all database indexes are valid
266     """
267     with conn.cursor() as cur:
268         cur.execute(""" SELECT relname FROM pg_class, pg_index
269                         WHERE pg_index.indisvalid = false
270                         AND pg_index.indexrelid = pg_class.oid""")
271
272         broken = [c[0] for c in cur]
273
274     if broken:
275         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
276
277     return CheckState.OK
278
279
280 @_check(hint="""\
281              {error}
282              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
283              """)
284 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
285     """ Checking TIGER external data table.
286     """
287     if not config.get_bool('USE_US_TIGER_DATA'):
288         return CheckState.NOT_APPLICABLE
289
290     if not conn.table_exists('location_property_tiger'):
291         return CheckState.FAIL, dict(error='TIGER data table not found.')
292
293     with conn.cursor() as cur:
294         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
295             return CheckState.FAIL, dict(error='TIGER data table is empty.')
296
297     return CheckState.OK