]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tools/check_database.py
introduce sanitizer step before token analysis
[nominatim.git] / nominatim / tools / check_database.py
1 """
2 Collection of functions that check if the database is complete and functional.
3 """
4 from enum import Enum
5 from textwrap import dedent
6
7 from nominatim.db.connection import connect
8 from nominatim.errors import UsageError
9 from nominatim.tokenizer import factory as tokenizer_factory
10
11 CHECKLIST = []
12
13 class CheckState(Enum):
14     """ Possible states of a check. FATAL stops check execution entirely.
15     """
16     OK = 0
17     FAIL = 1
18     FATAL = 2
19     NOT_APPLICABLE = 3
20
21 def _check(hint=None):
22     """ Decorator for checks. It adds the function to the list of
23         checks to execute and adds the code for printing progress messages.
24     """
25     def decorator(func):
26         title = func.__doc__.split('\n', 1)[0].strip()
27
28         def run_check(conn, config):
29             print(title, end=' ... ')
30             ret = func(conn, config)
31             if isinstance(ret, tuple):
32                 ret, params = ret
33             else:
34                 params = {}
35             if ret == CheckState.OK:
36                 print('\033[92mOK\033[0m')
37             elif ret == CheckState.NOT_APPLICABLE:
38                 print('not applicable')
39             else:
40                 print('\x1B[31mFailed\033[0m')
41                 if hint:
42                     print(dedent(hint.format(**params)))
43             return ret
44
45         CHECKLIST.append(run_check)
46         return run_check
47
48     return decorator
49
50 class _BadConnection:
51
52     def __init__(self, msg):
53         self.msg = msg
54
55     def close(self):
56         """ Dummy function to provide the implementation.
57         """
58
59 def check_database(config):
60     """ Run a number of checks on the database and return the status.
61     """
62     try:
63         conn = connect(config.get_libpq_dsn()).connection
64     except UsageError as err:
65         conn = _BadConnection(str(err))
66
67     overall_result = 0
68     for check in CHECKLIST:
69         ret = check(conn, config)
70         if ret == CheckState.FATAL:
71             conn.close()
72             return 1
73         if ret in (CheckState.FATAL, CheckState.FAIL):
74             overall_result = 1
75
76     conn.close()
77     return overall_result
78
79
80 def _get_indexes(conn):
81     indexes = ['idx_place_addressline_address_place_id',
82                'idx_placex_rank_search',
83                'idx_placex_rank_address',
84                'idx_placex_parent_place_id',
85                'idx_placex_geometry_reverse_lookuppolygon',
86                'idx_placex_geometry_placenode',
87                'idx_osmline_parent_place_id',
88                'idx_osmline_parent_osm_id',
89                'idx_postcode_id',
90                'idx_postcode_postcode'
91               ]
92     if conn.table_exists('search_name'):
93         indexes.extend(('idx_search_name_nameaddress_vector',
94                         'idx_search_name_name_vector',
95                         'idx_search_name_centroid'))
96         if conn.server_version_tuple() >= (11, 0, 0):
97             indexes.extend(('idx_placex_housenumber',
98                             'idx_osmline_parent_osm_id_with_hnr'))
99     if conn.table_exists('place'):
100         indexes.extend(('idx_placex_pendingsector',
101                         'idx_location_area_country_place_id',
102                         'idx_place_osm_unique'))
103
104     return indexes
105
106
107 # CHECK FUNCTIONS
108 #
109 # Functions are exectured in the order they appear here.
110
111 @_check(hint="""\
112              {error}
113
114              Hints:
115              * Is the database server started?
116              * Check the NOMINATIM_DATABASE_DSN variable in your local .env
117              * Try connecting to the database with the same settings
118
119              Project directory: {config.project_dir}
120              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
121              """)
122 def check_connection(conn, config):
123     """ Checking database connection
124     """
125     if isinstance(conn, _BadConnection):
126         return CheckState.FATAL, dict(error=conn.msg, config=config)
127
128     return CheckState.OK
129
130 @_check(hint="""\
131              placex table not found
132
133              Hints:
134              * Are you connecting to the right database?
135              * Did the import process finish without errors?
136
137              Project directory: {config.project_dir}
138              Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
139              """)
140 def check_placex_table(conn, config):
141     """ Checking for placex table
142     """
143     if conn.table_exists('placex'):
144         return CheckState.OK
145
146     return CheckState.FATAL, dict(config=config)
147
148
149 @_check(hint="""placex table has no data. Did the import finish sucessfully?""")
150 def check_placex_size(conn, _):
151     """ Checking for placex content
152     """
153     with conn.cursor() as cur:
154         cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x')
155
156     return CheckState.OK if cnt > 0 else CheckState.FATAL
157
158
159 @_check(hint="""{msg}""")
160 def check_tokenizer(_, config):
161     """ Checking that tokenizer works
162     """
163     try:
164         tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
165     except UsageError:
166         return CheckState.FAIL, dict(msg="""\
167             Cannot load tokenizer. Did the import finish sucessfully?""")
168
169     result = tokenizer.check_database(config)
170
171     if result is None:
172         return CheckState.OK
173
174     return CheckState.FAIL, dict(msg=result)
175
176
177 @_check(hint="""\
178              The indexing didn't finish. {count} entries are not yet indexed.
179
180              To index the remaining entries, run:   {index_cmd}
181              """)
182 def check_indexing(conn, _):
183     """ Checking indexing status
184     """
185     with conn.cursor() as cur:
186         cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0')
187
188     if cnt == 0:
189         return CheckState.OK
190
191     if conn.index_exists('idx_placex_rank_search'):
192         # Likely just an interrupted update.
193         index_cmd = 'nominatim index'
194     else:
195         # Looks like the import process got interrupted.
196         index_cmd = 'nominatim import --continue indexing'
197
198     return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd)
199
200
201 @_check(hint="""\
202              The following indexes are missing:
203                {indexes}
204
205              Rerun the index creation with:   nominatim import --continue db-postprocess
206              """)
207 def check_database_indexes(conn, _):
208     """ Checking that database indexes are complete
209     """
210     missing = []
211     for index in _get_indexes(conn):
212         if not conn.index_exists(index):
213             missing.append(index)
214
215     if missing:
216         return CheckState.FAIL, dict(indexes='\n  '.join(missing))
217
218     return CheckState.OK
219
220
221 @_check(hint="""\
222              At least one index is invalid. That can happen, e.g. when index creation was
223              disrupted and later restarted. You should delete the affected indices
224              and recreate them.
225
226              Invalid indexes:
227                {indexes}
228              """)
229 def check_database_index_valid(conn, _):
230     """ Checking that all database indexes are valid
231     """
232     with conn.cursor() as cur:
233         cur.execute(""" SELECT relname FROM pg_class, pg_index
234                         WHERE pg_index.indisvalid = false
235                         AND pg_index.indexrelid = pg_class.oid""")
236
237         broken = list(cur)
238
239     if broken:
240         return CheckState.FAIL, dict(indexes='\n  '.join(broken))
241
242     return CheckState.OK
243
244
245 @_check(hint="""\
246              {error}
247              Run TIGER import again:   nominatim add-data --tiger-data <DIR>
248              """)
249 def check_tiger_table(conn, config):
250     """ Checking TIGER external data table.
251     """
252     if not config.get_bool('USE_US_TIGER_DATA'):
253         return CheckState.NOT_APPLICABLE
254
255     if not conn.table_exists('location_property_tiger'):
256         return CheckState.FAIL, dict(error='TIGER data table not found.')
257
258     with conn.cursor() as cur:
259         if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0:
260             return CheckState.FAIL, dict(error='TIGER data table is empty.')
261
262     return CheckState.OK