]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
Merge pull request #2684 from lonvia/translit-keep-spacing-marks
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from enum import Enum
11 from textwrap import dedent
12
13 from nominatim.db.connection import connect
14 from nominatim.errors import UsageError
15 from nominatim.tokenizer import factory as tokenizer_factory
16
17 CHECKLIST = []
18
19 class CheckState(Enum):
20     """ Possible states of a check. FATAL stops check execution entirely.
21     """
22     OK = 0
23     FAIL = 1
24     FATAL = 2
25     NOT_APPLICABLE = 3
26
27 def _check(hint=None):
28     """ Decorator for checks. It adds the function to the list of
29         checks to execute and adds the code for printing progress messages.
30     """
31     def decorator(func):
32         title = func.__doc__.split('\n', 1)[0].strip()
33
34         def run_check(conn, config):
35             print(title, end=' ... ')
36             ret = func(conn, config)
37             if isinstance(ret, tuple):
38                 ret, params = ret
39             else:
40                 params = {}
41             if ret == CheckState.OK:
42                 print('\033[92mOK\033[0m')
43             elif ret == CheckState.NOT_APPLICABLE:
44                 print('not applicable')
45             else:
46                 print('\x1B[31mFailed\033[0m')
47                 if hint:
48                     print(dedent(hint.format(**params)))
49             return ret
50
51         CHECKLIST.append(run_check)
52         return run_check
53
54     return decorator
55
56 class _BadConnection:
57
58     def __init__(self, msg):
59         self.msg = msg
60
61     def close(self):
62         """ Dummy function to provide the implementation.
63         """
64
65 def check_database(config):
66     """ Run a number of checks on the database and return the status.
67     """
68     try:
69         conn = connect(config.get_libpq_dsn()).connection
70     except UsageError as err:
71         conn = _BadConnection(str(err))
72
73     overall_result = 0
74     for check in CHECKLIST:
75         ret = check(conn, config)
76         if ret == CheckState.FATAL:
77             conn.close()
78             return 1
79         if ret in (CheckState.FATAL, CheckState.FAIL):
80             overall_result = 1
81
82     conn.close()
83     return overall_result
84
85
86 def _get_indexes(conn):
87     indexes = ['idx_place_addressline_address_place_id',
88                'idx_placex_rank_search',
89                'idx_placex_rank_address',
90                'idx_placex_parent_place_id',
91                'idx_placex_geometry_reverse_lookuppolygon',
92                'idx_placex_geometry_placenode',
93                'idx_osmline_parent_place_id',
94                'idx_osmline_parent_osm_id',
95                'idx_postcode_id',
96                'idx_postcode_postcode'
97               ]
98     if conn.table_exists('search_name'):
99         indexes.extend(('idx_search_name_nameaddress_vector',
100                         'idx_search_name_name_vector',
101                         'idx_search_name_centroid'))
102         if conn.server_version_tuple() >= (11, 0, 0):
103             indexes.extend(('idx_placex_housenumber',
104                             'idx_osmline_parent_osm_id_with_hnr'))
105     if conn.table_exists('place'):
106         indexes.extend(('idx_placex_pendingsector',
107                         'idx_location_area_country_place_id',
108                         'idx_place_osm_unique'))
109
110     return indexes
111
112
113 # CHECK FUNCTIONS
114 #
115 # Functions are exectured in the order they appear here.
116
117 @_check(hint="""\
118              {error}
119
120              Hints:
121              * Is the database server started?
122              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
123              * Try connecting to the database with the same settings
124
125              Project directory: {config.project_dir}
126              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
127              """)
128 def check_connection(conn, config):
129     """ Checking database connection
130     """
131     if isinstance(conn, _BadConnection):
132         return CheckState.FATAL, dict(error=conn.msg, config=config)
133
134     return CheckState.OK
135
136 @_check(hint="""\
137              placex table not found
138
139              Hints:
140              * Are you connecting to the right database?
141              * Did the import process finish without errors?
142
143              Project directory: {config.project_dir}
144              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
145              """)
146 def check_placex_table(conn, config):
147     """ Checking for placex table
148     """
149     if conn.table_exists('placex'):
150         return CheckState.OK
151
152     return CheckState.FATAL, dict(config=config)
153
154
155 @_check(hint="""placex table has no data. Did the import finish sucessfully?""")
156 def check_placex_size(conn, _):
157     """ Checking for placex content
158     """
159     with conn.cursor() as cur:
160         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
161
162     return CheckState.OK if cnt > 0 else CheckState.FATAL
163
164
165 @_check(hint="""{msg}""")
166 def check_tokenizer(_, config):
167     """ Checking that tokenizer works
168     """
169     try:
170         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
171     except UsageError:
172         return CheckState.FAIL, dict(msg="""\
173             Cannot load tokenizer. Did the import finish sucessfully?""")
174
175     result = tokenizer.check_database(config)
176
177     if result is None:
178         return CheckState.OK
179
180     return CheckState.FAIL, dict(msg=result)
181
182
183 @_check(hint="""\
184              The indexing didn't finish. {count} entries are not yet indexed.
185
186              To index the remaining entries, run:   {index_cmd}
187              """)
188 def check_indexing(conn, _):
189     """ Checking indexing status
190     """
191     with conn.cursor() as cur:
192         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
193
194     if cnt == 0:
195         return CheckState.OK
196
197     if conn.index_exists('idx_placex_rank_search'):
198         # Likely just an interrupted update.
199         index_cmd = 'nominatim index'
200     else:
201         # Looks like the import process got interrupted.
202         index_cmd = 'nominatim import --continue indexing'
203
204     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
205
206
207 @_check(hint="""\
208              The following indexes are missing:
209                {indexes}
210
211              Rerun the index creation with:   nominatim import --continue db-postprocess
212              """)
213 def check_database_indexes(conn, _):
214     """ Checking that database indexes are complete
215     """
216     missing = []
217     for index in _get_indexes(conn):
218         if not conn.index_exists(index):
219             missing.append(index)
220
221     if missing:
222         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
223
224     return CheckState.OK
225
226
227 @_check(hint="""\
228              At least one index is invalid. That can happen, e.g. when index creation was
229              disrupted and later restarted. You should delete the affected indices
230              and recreate them.
231
232              Invalid indexes:
233                {indexes}
234              """)
235 def check_database_index_valid(conn, _):
236     """ Checking that all database indexes are valid
237     """
238     with conn.cursor() as cur:
239         cur.execute(""" SELECT relname FROM pg_class, pg_index
240                         WHERE pg_index.indisvalid = false
241                         AND pg_index.indexrelid = pg_class.oid""")
242
243         broken = list(cur)
244
245     if broken:
246         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
247
248     return CheckState.OK
249
250
251 @_check(hint="""\
252              {error}
253              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
254              """)
255 def check_tiger_table(conn, config):
256     """ Checking TIGER external data table.
257     """
258     if not config.get_bool('USE_US_TIGER_DATA'):
259         return CheckState.NOT_APPLICABLE
260
261     if not conn.table_exists('location_property_tiger'):
262         return CheckState.FAIL, dict(error='TIGER data table not found.')
263
264     with conn.cursor() as cur:
265         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
266             return CheckState.FAIL, dict(error='TIGER data table is empty.')
267
268     return CheckState.OK