]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
restrict geometry size for SQLite
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from nominatim.config import Configuration
15 from nominatim.db.connection import connect, Connection
16 from nominatim.errors import UsageError
17 from nominatim.tokenizer import factory as tokenizer_factory
18 from nominatim.tools import freeze
19
20 CHECKLIST = []
21
22 class CheckState(Enum):
23     """ Possible states of a check. FATAL stops check execution entirely.
24     """
25     OK = 0
26     FAIL = 1
27     FATAL = 2
28     NOT_APPLICABLE = 3
29     WARN = 4
30
31 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
32 CheckFunc = Callable[[Connection, Configuration], CheckResult]
33
34 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
35     """ Decorator for checks. It adds the function to the list of
36         checks to execute and adds the code for printing progress messages.
37     """
38     def decorator(func: CheckFunc) -> CheckFunc:
39         title = (func.__doc__ or '').split('\n', 1)[0].strip()
40
41         def run_check(conn: Connection, config: Configuration) -> CheckState:
42             print(title, end=' ... ')
43             ret = func(conn, config)
44             if isinstance(ret, tuple):
45                 ret, params = ret
46             else:
47                 params = {}
48             if ret == CheckState.OK:
49                 print('\033[92mOK\033[0m')
50             elif ret == CheckState.WARN:
51                 print('\033[93mWARNING\033[0m')
52                 if hint:
53                     print('')
54                     print(dedent(hint.format(**params)))
55             elif ret == CheckState.NOT_APPLICABLE:
56                 print('not applicable')
57             else:
58                 print('\x1B[31mFailed\033[0m')
59                 if hint:
60                     print(dedent(hint.format(**params)))
61             return ret
62
63         CHECKLIST.append(run_check)
64         return run_check
65
66     return decorator
67
68 class _BadConnection:
69
70     def __init__(self, msg: str) -> None:
71         self.msg = msg
72
73     def close(self) -> None:
74         """ Dummy function to provide the implementation.
75         """
76
77 def check_database(config: Configuration) -> int:
78     """ Run a number of checks on the database and return the status.
79     """
80     try:
81         conn = connect(config.get_libpq_dsn()).connection
82     except UsageError as err:
83         conn = _BadConnection(str(err)) # type: ignore[assignment]
84
85     overall_result = 0
86     for check in CHECKLIST:
87         ret = check(conn, config)
88         if ret == CheckState.FATAL:
89             conn.close()
90             return 1
91         if ret in (CheckState.FATAL, CheckState.FAIL):
92             overall_result = 1
93
94     conn.close()
95     return overall_result
96
97
98 def _get_indexes(conn: Connection) -> List[str]:
99     indexes = ['idx_place_addressline_address_place_id',
100                'idx_placex_rank_search',
101                'idx_placex_rank_address',
102                'idx_placex_parent_place_id',
103                'idx_placex_geometry_reverse_lookuppolygon',
104                'idx_placex_geometry_placenode',
105                'idx_osmline_parent_place_id',
106                'idx_osmline_parent_osm_id',
107                'idx_postcode_id',
108                'idx_postcode_postcode'
109               ]
110     if conn.table_exists('search_name'):
111         indexes.extend(('idx_search_name_nameaddress_vector',
112                         'idx_search_name_name_vector',
113                         'idx_search_name_centroid'))
114         if conn.server_version_tuple() >= (11, 0, 0):
115             indexes.extend(('idx_placex_housenumber',
116                             'idx_osmline_parent_osm_id_with_hnr'))
117     if conn.table_exists('place'):
118         indexes.extend(('idx_location_area_country_place_id',
119                         'idx_place_osm_unique',
120                         'idx_placex_rank_address_sector',
121                         'idx_placex_rank_boundaries_sector'))
122
123     return indexes
124
125
126 # CHECK FUNCTIONS
127 #
128 # Functions are exectured in the order they appear here.
129
130 @_check(hint="""\
131              {error}
132
133              Hints:
134              * Is the database server started?
135              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
136              * Try connecting to the database with the same settings
137
138              Project directory: {config.project_dir}
139              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
140              """)
141 def check_connection(conn: Any, config: Configuration) -> CheckResult:
142     """ Checking database connection
143     """
144     if isinstance(conn, _BadConnection):
145         return CheckState.FATAL, dict(error=conn.msg, config=config)
146
147     return CheckState.OK
148
149 @_check(hint="""\
150              placex table not found
151
152              Hints:
153              * Are you connecting to the right database?
154              * Did the import process finish without errors?
155
156              Project directory: {config.project_dir}
157              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
158              """)
159 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
160     """ Checking for placex table
161     """
162     if conn.table_exists('placex'):
163         return CheckState.OK
164
165     return CheckState.FATAL, dict(config=config)
166
167
168 @_check(hint="""placex table has no data. Did the import finish successfully?""")
169 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
170     """ Checking for placex content
171     """
172     with conn.cursor() as cur:
173         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
174
175     return CheckState.OK if cnt > 0 else CheckState.FATAL
176
177
178 @_check(hint="""{msg}""")
179 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
180     """ Checking that tokenizer works
181     """
182     try:
183         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
184     except UsageError:
185         return CheckState.FAIL, dict(msg="""\
186             Cannot load tokenizer. Did the import finish successfully?""")
187
188     result = tokenizer.check_database(config)
189
190     if result is None:
191         return CheckState.OK
192
193     return CheckState.FAIL, dict(msg=result)
194
195
196 @_check(hint="""\
197              Wikipedia/Wikidata importance tables missing.
198              Quality of search results may be degraded. Reverse geocoding is unaffected.
199              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
200              """)
201 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
202     """ Checking for wikipedia/wikidata data
203     """
204     if not conn.table_exists('search_name') or not conn.table_exists('place'):
205         return CheckState.NOT_APPLICABLE
206
207     with conn.cursor() as cur:
208         cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
209
210         return CheckState.WARN if cnt == 0 else CheckState.OK
211
212
213 @_check(hint="""\
214              The indexing didn't finish. {count} entries are not yet indexed.
215
216              To index the remaining entries, run:   {index_cmd}
217              """)
218 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
219     """ Checking indexing status
220     """
221     with conn.cursor() as cur:
222         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
223
224     if cnt == 0:
225         return CheckState.OK
226
227     if freeze.is_frozen(conn):
228         index_cmd="""\
229             Database is marked frozen, it cannot be updated.
230             Low counts of unindexed places are fine."""
231         return CheckState.WARN, dict(count=cnt, index_cmd=index_cmd)
232
233     if conn.index_exists('idx_placex_rank_search'):
234         # Likely just an interrupted update.
235         index_cmd = 'nominatim index'
236     else:
237         # Looks like the import process got interrupted.
238         index_cmd = 'nominatim import --continue indexing'
239
240     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
241
242
243 @_check(hint="""\
244              The following indexes are missing:
245                {indexes}
246
247              Rerun the index creation with:   nominatim import --continue db-postprocess
248              """)
249 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
250     """ Checking that database indexes are complete
251     """
252     missing = []
253     for index in _get_indexes(conn):
254         if not conn.index_exists(index):
255             missing.append(index)
256
257     if missing:
258         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
259
260     return CheckState.OK
261
262
263 @_check(hint="""\
264              At least one index is invalid. That can happen, e.g. when index creation was
265              disrupted and later restarted. You should delete the affected indices
266              and recreate them.
267
268              Invalid indexes:
269                {indexes}
270              """)
271 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
272     """ Checking that all database indexes are valid
273     """
274     with conn.cursor() as cur:
275         cur.execute(""" SELECT relname FROM pg_class, pg_index
276                         WHERE pg_index.indisvalid = false
277                         AND pg_index.indexrelid = pg_class.oid""")
278
279         broken = [c[0] for c in cur]
280
281     if broken:
282         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
283
284     return CheckState.OK
285
286
287 @_check(hint="""\
288              {error}
289              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
290              """)
291 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
292     """ Checking TIGER external data table.
293     """
294     if not config.get_bool('USE_US_TIGER_DATA'):
295         return CheckState.NOT_APPLICABLE
296
297     if not conn.table_exists('location_property_tiger'):
298         return CheckState.FAIL, dict(error='TIGER data table not found.')
299
300     with conn.cursor() as cur:
301         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
302             return CheckState.FAIL, dict(error='TIGER data table is empty.')
303
304     return CheckState.OK