]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
Merge pull request #2799 from lonvia/fix-inclusions-with-extratags
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from typing import Callable, Optional, Any, Union, Tuple, Mapping, List
11 from enum import Enum
12 from textwrap import dedent
13
14 from nominatim.config import Configuration
15 from nominatim.db.connection import connect, Connection
16 from nominatim.errors import UsageError
17 from nominatim.tokenizer import factory as tokenizer_factory
18
19 CHECKLIST = []
20
21 class CheckState(Enum):
22     """ Possible states of a check. FATAL stops check execution entirely.
23     """
24     OK = 0
25     FAIL = 1
26     FATAL = 2
27     NOT_APPLICABLE = 3
28     WARN = 4
29
30 CheckResult = Union[CheckState, Tuple[CheckState, Mapping[str, Any]]]
31 CheckFunc = Callable[[Connection, Configuration], CheckResult]
32
33 def _check(hint: Optional[str] = None) -> Callable[[CheckFunc], CheckFunc]:
34     """ Decorator for checks. It adds the function to the list of
35         checks to execute and adds the code for printing progress messages.
36     """
37     def decorator(func: CheckFunc) -> CheckFunc:
38         title = (func.__doc__ or '').split('\n', 1)[0].strip()
39
40         def run_check(conn: Connection, config: Configuration) -> CheckState:
41             print(title, end=' ... ')
42             ret = func(conn, config)
43             if isinstance(ret, tuple):
44                 ret, params = ret
45             else:
46                 params = {}
47             if ret == CheckState.OK:
48                 print('\033[92mOK\033[0m')
49             elif ret == CheckState.WARN:
50                 print('\033[93mWARNING\033[0m')
51                 if hint:
52                     print('')
53                     print(dedent(hint.format(**params)))
54             elif ret == CheckState.NOT_APPLICABLE:
55                 print('not applicable')
56             else:
57                 print('\x1B[31mFailed\033[0m')
58                 if hint:
59                     print(dedent(hint.format(**params)))
60             return ret
61
62         CHECKLIST.append(run_check)
63         return run_check
64
65     return decorator
66
67 class _BadConnection:
68
69     def __init__(self, msg: str) -> None:
70         self.msg = msg
71
72     def close(self) -> None:
73         """ Dummy function to provide the implementation.
74         """
75
76 def check_database(config: Configuration) -> int:
77     """ Run a number of checks on the database and return the status.
78     """
79     try:
80         conn = connect(config.get_libpq_dsn()).connection
81     except UsageError as err:
82         conn = _BadConnection(str(err)) # type: ignore[assignment]
83
84     overall_result = 0
85     for check in CHECKLIST:
86         ret = check(conn, config)
87         if ret == CheckState.FATAL:
88             conn.close()
89             return 1
90         if ret in (CheckState.FATAL, CheckState.FAIL):
91             overall_result = 1
92
93     conn.close()
94     return overall_result
95
96
97 def _get_indexes(conn: Connection) -> List[str]:
98     indexes = ['idx_place_addressline_address_place_id',
99                'idx_placex_rank_search',
100                'idx_placex_rank_address',
101                'idx_placex_parent_place_id',
102                'idx_placex_geometry_reverse_lookuppolygon',
103                'idx_placex_geometry_placenode',
104                'idx_osmline_parent_place_id',
105                'idx_osmline_parent_osm_id',
106                'idx_postcode_id',
107                'idx_postcode_postcode'
108               ]
109     if conn.table_exists('search_name'):
110         indexes.extend(('idx_search_name_nameaddress_vector',
111                         'idx_search_name_name_vector',
112                         'idx_search_name_centroid'))
113         if conn.server_version_tuple() >= (11, 0, 0):
114             indexes.extend(('idx_placex_housenumber',
115                             'idx_osmline_parent_osm_id_with_hnr'))
116     if conn.table_exists('place'):
117         indexes.extend(('idx_placex_pendingsector',
118                         'idx_location_area_country_place_id',
119                         'idx_place_osm_unique'))
120
121     return indexes
122
123
124 # CHECK FUNCTIONS
125 #
126 # Functions are exectured in the order they appear here.
127
128 @_check(hint="""\
129              {error}
130
131              Hints:
132              * Is the database server started?
133              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
134              * Try connecting to the database with the same settings
135
136              Project directory: {config.project_dir}
137              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
138              """)
139 def check_connection(conn: Any, config: Configuration) -> CheckResult:
140     """ Checking database connection
141     """
142     if isinstance(conn, _BadConnection):
143         return CheckState.FATAL, dict(error=conn.msg, config=config)
144
145     return CheckState.OK
146
147 @_check(hint="""\
148              placex table not found
149
150              Hints:
151              * Are you connecting to the right database?
152              * Did the import process finish without errors?
153
154              Project directory: {config.project_dir}
155              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
156              """)
157 def check_placex_table(conn: Connection, config: Configuration) -> CheckResult:
158     """ Checking for placex table
159     """
160     if conn.table_exists('placex'):
161         return CheckState.OK
162
163     return CheckState.FATAL, dict(config=config)
164
165
166 @_check(hint="""placex table has no data. Did the import finish successfully?""")
167 def check_placex_size(conn: Connection, _: Configuration) -> CheckResult:
168     """ Checking for placex content
169     """
170     with conn.cursor() as cur:
171         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
172
173     return CheckState.OK if cnt > 0 else CheckState.FATAL
174
175
176 @_check(hint="""{msg}""")
177 def check_tokenizer(_: Connection, config: Configuration) -> CheckResult:
178     """ Checking that tokenizer works
179     """
180     try:
181         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
182     except UsageError:
183         return CheckState.FAIL, dict(msg="""\
184             Cannot load tokenizer. Did the import finish successfully?""")
185
186     result = tokenizer.check_database(config)
187
188     if result is None:
189         return CheckState.OK
190
191     return CheckState.FAIL, dict(msg=result)
192
193
194 @_check(hint="""\
195              Wikipedia/Wikidata importance tables missing.
196              Quality of search results may be degraded. Reverse geocoding is unaffected.
197              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
198              """)
199 def check_existance_wikipedia(conn: Connection, _: Configuration) -> CheckResult:
200     """ Checking for wikipedia/wikidata data
201     """
202     if not conn.table_exists('search_name'):
203         return CheckState.NOT_APPLICABLE
204
205     with conn.cursor() as cur:
206         cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
207
208         return CheckState.WARN if cnt == 0 else CheckState.OK
209
210
211 @_check(hint="""\
212              The indexing didn't finish. {count} entries are not yet indexed.
213
214              To index the remaining entries, run:   {index_cmd}
215              """)
216 def check_indexing(conn: Connection, _: Configuration) -> CheckResult:
217     """ Checking indexing status
218     """
219     with conn.cursor() as cur:
220         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
221
222     if cnt == 0:
223         return CheckState.OK
224
225     if conn.index_exists('idx_placex_rank_search'):
226         # Likely just an interrupted update.
227         index_cmd = 'nominatim index'
228     else:
229         # Looks like the import process got interrupted.
230         index_cmd = 'nominatim import --continue indexing'
231
232     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
233
234
235 @_check(hint="""\
236              The following indexes are missing:
237                {indexes}
238
239              Rerun the index creation with:   nominatim import --continue db-postprocess
240              """)
241 def check_database_indexes(conn: Connection, _: Configuration) -> CheckResult:
242     """ Checking that database indexes are complete
243     """
244     missing = []
245     for index in _get_indexes(conn):
246         if not conn.index_exists(index):
247             missing.append(index)
248
249     if missing:
250         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
251
252     return CheckState.OK
253
254
255 @_check(hint="""\
256              At least one index is invalid. That can happen, e.g. when index creation was
257              disrupted and later restarted. You should delete the affected indices
258              and recreate them.
259
260              Invalid indexes:
261                {indexes}
262              """)
263 def check_database_index_valid(conn: Connection, _: Configuration) -> CheckResult:
264     """ Checking that all database indexes are valid
265     """
266     with conn.cursor() as cur:
267         cur.execute(""" SELECT relname FROM pg_class, pg_index
268                         WHERE pg_index.indisvalid = false
269                         AND pg_index.indexrelid = pg_class.oid""")
270
271         broken = list(cur)
272
273     if broken:
274         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
275
276     return CheckState.OK
277
278
279 @_check(hint="""\
280              {error}
281              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
282              """)
283 def check_tiger_table(conn: Connection, config: Configuration) -> CheckResult:
284     """ Checking TIGER external data table.
285     """
286     if not config.get_bool('USE_US_TIGER_DATA'):
287         return CheckState.NOT_APPLICABLE
288
289     if not conn.table_exists('location_property_tiger'):
290         return CheckState.FAIL, dict(error='TIGER data table not found.')
291
292     with conn.cursor() as cur:
293         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
294             return CheckState.FAIL, dict(error='TIGER data table is empty.')
295
296     return CheckState.OK