]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
postcodes: strip leading country codes
[nominatim.git] / nominatim / tools / check_database.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Collection of functions that check if the database is complete and functional.
9 """
10 from enum import Enum
11 from textwrap import dedent
12
13 from nominatim.db.connection import connect
14 from nominatim.errors import UsageError
15 from nominatim.tokenizer import factory as tokenizer_factory
16
17 CHECKLIST = []
18
19 class CheckState(Enum):
20     """ Possible states of a check. FATAL stops check execution entirely.
21     """
22     OK = 0
23     FAIL = 1
24     FATAL = 2
25     NOT_APPLICABLE = 3
26     WARN = 4
27
28 def _check(hint=None):
29     """ Decorator for checks. It adds the function to the list of
30         checks to execute and adds the code for printing progress messages.
31     """
32     def decorator(func):
33         title = func.__doc__.split('\n', 1)[0].strip()
34
35         def run_check(conn, config):
36             print(title, end=' ... ')
37             ret = func(conn, config)
38             if isinstance(ret, tuple):
39                 ret, params = ret
40             else:
41                 params = {}
42             if ret == CheckState.OK:
43                 print('\033[92mOK\033[0m')
44             elif ret == CheckState.WARN:
45                 print('\033[93mWARNING\033[0m')
46                 if hint:
47                     print('')
48                     print(dedent(hint.format(**params)))
49             elif ret == CheckState.NOT_APPLICABLE:
50                 print('not applicable')
51             else:
52                 print('\x1B[31mFailed\033[0m')
53                 if hint:
54                     print(dedent(hint.format(**params)))
55             return ret
56
57         CHECKLIST.append(run_check)
58         return run_check
59
60     return decorator
61
62 class _BadConnection:
63
64     def __init__(self, msg):
65         self.msg = msg
66
67     def close(self):
68         """ Dummy function to provide the implementation.
69         """
70
71 def check_database(config):
72     """ Run a number of checks on the database and return the status.
73     """
74     try:
75         conn = connect(config.get_libpq_dsn()).connection
76     except UsageError as err:
77         conn = _BadConnection(str(err))
78
79     overall_result = 0
80     for check in CHECKLIST:
81         ret = check(conn, config)
82         if ret == CheckState.FATAL:
83             conn.close()
84             return 1
85         if ret in (CheckState.FATAL, CheckState.FAIL):
86             overall_result = 1
87
88     conn.close()
89     return overall_result
90
91
92 def _get_indexes(conn):
93     indexes = ['idx_place_addressline_address_place_id',
94                'idx_placex_rank_search',
95                'idx_placex_rank_address',
96                'idx_placex_parent_place_id',
97                'idx_placex_geometry_reverse_lookuppolygon',
98                'idx_placex_geometry_placenode',
99                'idx_osmline_parent_place_id',
100                'idx_osmline_parent_osm_id',
101                'idx_postcode_id',
102                'idx_postcode_postcode'
103               ]
104     if conn.table_exists('search_name'):
105         indexes.extend(('idx_search_name_nameaddress_vector',
106                         'idx_search_name_name_vector',
107                         'idx_search_name_centroid'))
108         if conn.server_version_tuple() >= (11, 0, 0):
109             indexes.extend(('idx_placex_housenumber',
110                             'idx_osmline_parent_osm_id_with_hnr'))
111     if conn.table_exists('place'):
112         indexes.extend(('idx_placex_pendingsector',
113                         'idx_location_area_country_place_id',
114                         'idx_place_osm_unique'))
115
116     return indexes
117
118
119 # CHECK FUNCTIONS
120 #
121 # Functions are exectured in the order they appear here.
122
123 @_check(hint="""\
124              {error}
125
126              Hints:
127              * Is the database server started?
128              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
129              * Try connecting to the database with the same settings
130
131              Project directory: {config.project_dir}
132              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
133              """)
134 def check_connection(conn, config):
135     """ Checking database connection
136     """
137     if isinstance(conn, _BadConnection):
138         return CheckState.FATAL, dict(error=conn.msg, config=config)
139
140     return CheckState.OK
141
142 @_check(hint="""\
143              placex table not found
144
145              Hints:
146              * Are you connecting to the right database?
147              * Did the import process finish without errors?
148
149              Project directory: {config.project_dir}
150              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
151              """)
152 def check_placex_table(conn, config):
153     """ Checking for placex table
154     """
155     if conn.table_exists('placex'):
156         return CheckState.OK
157
158     return CheckState.FATAL, dict(config=config)
159
160
161 @_check(hint="""placex table has no data. Did the import finish sucessfully?""")
162 def check_placex_size(conn, _):
163     """ Checking for placex content
164     """
165     with conn.cursor() as cur:
166         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
167
168     return CheckState.OK if cnt > 0 else CheckState.FATAL
169
170
171 @_check(hint="""{msg}""")
172 def check_tokenizer(_, config):
173     """ Checking that tokenizer works
174     """
175     try:
176         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
177     except UsageError:
178         return CheckState.FAIL, dict(msg="""\
179             Cannot load tokenizer. Did the import finish sucessfully?""")
180
181     result = tokenizer.check_database(config)
182
183     if result is None:
184         return CheckState.OK
185
186     return CheckState.FAIL, dict(msg=result)
187
188
189 @_check(hint="""\
190              Wikipedia/Wikidata importance tables missing.
191              Quality of search results may be degraded. Reverse geocoding is unaffected.
192              See https://nominatim.org/release-docs/latest/admin/Import/#wikipediawikidata-rankings
193              """)
194 def check_existance_wikipedia(conn, _):
195     """ Checking for wikipedia/wikidata data
196     """
197     if not conn.table_exists('search_name'):
198         return CheckState.NOT_APPLICABLE
199
200     with conn.cursor() as cur:
201         cnt = cur.scalar('SELECT count(*) FROM wikipedia_article')
202
203         return CheckState.WARN if cnt == 0 else CheckState.OK
204
205
206 @_check(hint="""\
207              The indexing didn't finish. {count} entries are not yet indexed.
208
209              To index the remaining entries, run:   {index_cmd}
210              """)
211 def check_indexing(conn, _):
212     """ Checking indexing status
213     """
214     with conn.cursor() as cur:
215         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
216
217     if cnt == 0:
218         return CheckState.OK
219
220     if conn.index_exists('idx_placex_rank_search'):
221         # Likely just an interrupted update.
222         index_cmd = 'nominatim index'
223     else:
224         # Looks like the import process got interrupted.
225         index_cmd = 'nominatim import --continue indexing'
226
227     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
228
229
230 @_check(hint="""\
231              The following indexes are missing:
232                {indexes}
233
234              Rerun the index creation with:   nominatim import --continue db-postprocess
235              """)
236 def check_database_indexes(conn, _):
237     """ Checking that database indexes are complete
238     """
239     missing = []
240     for index in _get_indexes(conn):
241         if not conn.index_exists(index):
242             missing.append(index)
243
244     if missing:
245         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
246
247     return CheckState.OK
248
249
250 @_check(hint="""\
251              At least one index is invalid. That can happen, e.g. when index creation was
252              disrupted and later restarted. You should delete the affected indices
253              and recreate them.
254
255              Invalid indexes:
256                {indexes}
257              """)
258 def check_database_index_valid(conn, _):
259     """ Checking that all database indexes are valid
260     """
261     with conn.cursor() as cur:
262         cur.execute(""" SELECT relname FROM pg_class, pg_index
263                         WHERE pg_index.indisvalid = false
264                         AND pg_index.indexrelid = pg_class.oid""")
265
266         broken = list(cur)
267
268     if broken:
269         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
270
271     return CheckState.OK
272
273
274 @_check(hint="""\
275              {error}
276              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
277              """)
278 def check_tiger_table(conn, config):
279     """ Checking TIGER external data table.
280     """
281     if not config.get_bool('USE_US_TIGER_DATA'):
282         return CheckState.NOT_APPLICABLE
283
284     if not conn.table_exists('location_property_tiger'):
285         return CheckState.FAIL, dict(error='TIGER data table not found.')
286
287     with conn.cursor() as cur:
288         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
289             return CheckState.FAIL, dict(error='TIGER data table is empty.')
290
291     return CheckState.OK