1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 from itertools import chain
10 import psycopg2.extras
12 from place_inserter import PlaceColumn
13 from table_compare import NominatimID, DBRow
15 from nominatim.indexer import indexer
16 from nominatim.tokenizer import factory as tokenizer_factory
18 def check_database_integrity(context):
19 """ Check some generic constraints on the tables.
21 with context.db.cursor() as cur:
22 # place_addressline should not have duplicate (place_id, address_place_id)
23 cur.execute("""SELECT count(*) FROM
24 (SELECT place_id, address_place_id, count(*) as c
25 FROM place_addressline GROUP BY place_id, address_place_id) x
27 assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
29 # word table must not have empty word_tokens
30 cur.execute("SELECT count(*) FROM word WHERE word_token = ''")
31 assert cur.fetchone()[0] == 0, "Empty word tokens found in word table"
35 ################################ GIVEN ##################################
37 @given("the (?P<named>named )?places")
38 def add_data_to_place_table(context, named):
39 """ Add entries into the place table. 'named places' makes sure that
40 the entries get a random name when none is explicitly given.
42 with context.db.cursor() as cur:
43 cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
44 for row in context.table:
45 PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
46 cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
48 @given("the relations")
49 def add_data_to_planet_relations(context):
50 """ Add entries into the osm2pgsql relation middle table. This is needed
51 for tests on data that looks up members.
53 with context.db.cursor() as cur:
54 for r in context.table:
60 for m in r['members'].split(','):
63 parts.insert(last_node, int(mid.oid))
67 parts.insert(last_way, int(mid.oid))
70 parts.append(int(mid.oid))
72 members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
76 tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
78 cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
79 VALUES (%s, %s, %s, %s, %s, %s)""",
80 (r['id'], last_node, last_way, parts, members, list(tags)))
83 def add_data_to_planet_ways(context):
84 """ Add entries into the osm2pgsql way middle table. This is necessary for
85 tests on that that looks up node ids in this table.
87 with context.db.cursor() as cur:
88 for r in context.table:
89 tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
90 nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
92 cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
93 (r['id'], nodes, list(tags)))
95 ################################ WHEN ##################################
98 def import_and_index_data_from_place_table(context):
99 """ Import data previously set up in the place table.
101 context.nominatim.run_nominatim('import', '--continue', 'load-data',
102 '--index-noanalyse', '-q',
105 check_database_integrity(context)
107 # Remove the output of the input, when all was right. Otherwise it will be
108 # output when there are errors that had nothing to do with the import
110 context.log_capture.buffer.clear()
112 @when("updating places")
113 def update_place_table(context):
114 """ Update the place table with the given data. Also runs all triggers
115 related to updates and reindexes the new data.
117 context.nominatim.run_nominatim('refresh', '--functions')
118 with context.db.cursor() as cur:
119 for row in context.table:
120 PlaceColumn(context).add_row(row, False).db_insert(cur)
122 context.nominatim.reindex_placex(context.db)
123 check_database_integrity(context)
125 # Remove the output of the input, when all was right. Otherwise it will be
126 # output when there are errors that had nothing to do with the import
128 context.log_capture.buffer.clear()
131 @when("updating postcodes")
132 def update_postcodes(context):
133 """ Rerun the calculation of postcodes.
135 context.nominatim.run_nominatim('refresh', '--postcodes')
137 @when("marking for delete (?P<oids>.*)")
138 def delete_places(context, oids):
139 """ Remove entries from the place table. Multiple ids may be given
140 separated by commas. Also runs all triggers
141 related to updates and reindexes the new data.
143 context.nominatim.run_nominatim('refresh', '--functions')
144 with context.db.cursor() as cur:
145 for oid in oids.split(','):
146 NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
148 context.nominatim.reindex_placex(context.db)
150 # Remove the output of the input, when all was right. Otherwise it will be
151 # output when there are errors that had nothing to do with the import
153 context.log_capture.buffer.clear()
155 ################################ THEN ##################################
157 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
158 def check_place_contents(context, table, exact):
159 """ Check contents of place/placex tables. Each row represents a table row
160 and all data must match. Data not present in the expected table, may
161 be arbitry. The rows are identified via the 'object' column which must
162 have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
163 rows match (for example because 'class' was left out and there are
164 multiple entries for the given OSM object) then all must match. All
165 expected rows are expected to be present with at least one database row.
166 When 'exactly' is given, there must not be additional rows in the database.
168 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
169 expected_content = set()
170 for row in context.table:
171 nid = NominatimID(row['object'])
172 query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
173 if table == 'placex':
174 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
175 query += " FROM %s WHERE {}" % (table, )
176 nid.query_osm_id(cur, query)
177 assert cur.rowcount > 0, "No rows found for " + row['object']
181 expected_content.add((res['osm_type'], res['osm_id'], res['class']))
183 DBRow(nid, res, context).assert_row(row, ['object'])
186 cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
187 assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
190 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
191 def check_place_has_entry(context, table, oid):
192 """ Ensure that no database row for the given object exists. The ID
193 must be of the form '<NRW><osm id>[:<class>]'.
195 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
196 NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
197 assert cur.rowcount == 0, \
198 "Found {} entries for ID {}".format(cur.rowcount, oid)
201 @then("search_name contains(?P<exclude> not)?")
202 def check_search_name_contents(context, exclude):
203 """ Check contents of place/placex tables. Each row represents a table row
204 and all data must match. Data not present in the expected table, may
205 be arbitry. The rows are identified via the 'object' column which must
206 have an identifier of the form '<NRW><osm id>[:<class>]'. All
207 expected rows are expected to be present with at least one database row.
209 tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
211 with tokenizer.name_analyzer() as analyzer:
212 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
213 for row in context.table:
214 nid = NominatimID(row['object'])
215 nid.row_by_place_id(cur, 'search_name',
216 ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
217 assert cur.rowcount > 0, "No rows found for " + row['object']
220 db_row = DBRow(nid, res, context)
221 for name, value in zip(row.headings, row.cells):
222 if name in ('name_vector', 'nameaddress_vector'):
223 items = [x.strip() for x in value.split(',')]
224 tokens = analyzer.get_word_token_info(items)
227 assert len(tokens) >= len(items), \
228 "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
229 for word, token, wid in tokens:
231 assert wid not in res[name], \
232 "Found term for {}/{}: {}".format(nid, name, wid)
234 assert wid in res[name], \
235 "Missing term for {}/{}: {}".format(nid, name, wid)
236 elif name != 'object':
237 assert db_row.contains(name, value), db_row.assert_msg(name, value)
239 @then("search_name has no entry for (?P<oid>.*)")
240 def check_search_name_has_entry(context, oid):
241 """ Check that there is noentry in the search_name table for the given
242 objects. IDs are in format '<NRW><osm id>[:<class>]'.
244 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
245 NominatimID(oid).row_by_place_id(cur, 'search_name')
247 assert cur.rowcount == 0, \
248 "Found {} entries for ID {}".format(cur.rowcount, oid)
250 @then("location_postcode contains exactly")
251 def check_location_postcode(context):
252 """ Check full contents for location_postcode table. Each row represents a table row
253 and all data must match. Data not present in the expected table, may
254 be arbitry. The rows are identified via 'country' and 'postcode' columns.
255 All rows must be present as excepted and there must not be additional
258 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
259 cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
260 assert cur.rowcount == len(list(context.table)), \
261 "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
265 key = (row['country_code'], row['postcode'])
266 assert key not in results, "Postcode table has duplicate entry: {}".format(row)
267 results[key] = DBRow((row['country_code'],row['postcode']), row, context)
269 for row in context.table:
270 db_row = results.get((row['country'],row['postcode']))
271 assert db_row is not None, \
272 f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
274 db_row.assert_row(row, ('country', 'postcode'))
276 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
277 def check_word_table_for_postcodes(context, exclude, postcodes):
278 """ Check that the tokenizer produces postcode tokens for the given
279 postcodes. The postcodes are a comma-separated list of postcodes.
282 nctx = context.nominatim
283 tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
284 with tokenizer.name_analyzer() as ana:
285 plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
289 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
290 if nctx.tokenizer != 'legacy':
291 cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
294 cur.execute("""SELECT word FROM word WHERE word = any(%s)
295 and class = 'place' and type = 'postcode'""",
298 found = [row[0] for row in cur]
299 assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
302 assert len(found) == 0, f"Unexpected postcodes: {found}"
304 assert set(found) == set(plist), \
305 f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
307 @then("place_addressline contains")
308 def check_place_addressline(context):
309 """ Check the contents of the place_addressline table. Each row represents
310 a table row and all data must match. Data not present in the expected
311 table, may be arbitry. The rows are identified via the 'object' column,
312 representing the addressee and the 'address' column, representing the
315 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
316 for row in context.table:
317 nid = NominatimID(row['object'])
318 pid = nid.get_place_id(cur)
319 apid = NominatimID(row['address']).get_place_id(cur)
320 cur.execute(""" SELECT * FROM place_addressline
321 WHERE place_id = %s AND address_place_id = %s""",
323 assert cur.rowcount > 0, \
324 "No rows found for place %s and address %s" % (row['object'], row['address'])
327 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
329 @then("place_addressline doesn't contain")
330 def check_place_addressline_exclude(context):
331 """ Check that the place_addressline doesn't contain any entries for the
332 given addressee/address item pairs.
334 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
335 for row in context.table:
336 pid = NominatimID(row['object']).get_place_id(cur)
337 apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
339 cur.execute(""" SELECT * FROM place_addressline
340 WHERE place_id = %s AND address_place_id = %s""",
342 assert cur.rowcount == 0, \
343 "Row found for place %s and address %s" % (row['object'], row['address'])
345 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
346 def check_location_property_osmline(context, oid, neg):
347 """ Check that the given way is present in the interpolation table.
349 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
350 cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
351 FROM location_property_osmline
352 WHERE osm_id = %s AND startnumber IS NOT NULL""",
356 assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
359 todo = list(range(len(list(context.table))))
362 row = context.table[i]
363 if (int(row['start']) == res['startnumber']
364 and int(row['end']) == res['endnumber']):
368 assert False, "Unexpected row " + str(res)
370 DBRow(oid, res, context).assert_row(row, ('start', 'end'))