1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 from itertools import chain
10 import psycopg2.extras
12 from place_inserter import PlaceColumn
13 from table_compare import NominatimID, DBRow
15 from nominatim.indexer import indexer
16 from nominatim.tokenizer import factory as tokenizer_factory
18 def check_database_integrity(context):
19 """ Check some generic constraints on the tables.
21 # place_addressline should not have duplicate (place_id, address_place_id)
22 cur = context.db.cursor()
23 cur.execute("""SELECT count(*) FROM
24 (SELECT place_id, address_place_id, count(*) as c
25 FROM place_addressline GROUP BY place_id, address_place_id) x
27 assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
30 ################################ GIVEN ##################################
32 @given("the (?P<named>named )?places")
33 def add_data_to_place_table(context, named):
34 """ Add entries into the place table. 'named places' makes sure that
35 the entries get a random name when none is explicitly given.
37 with context.db.cursor() as cur:
38 cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
39 for row in context.table:
40 PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
41 cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
43 @given("the relations")
44 def add_data_to_planet_relations(context):
45 """ Add entries into the osm2pgsql relation middle table. This is needed
46 for tests on data that looks up members.
48 with context.db.cursor() as cur:
49 for r in context.table:
55 for m in r['members'].split(','):
58 parts.insert(last_node, int(mid.oid))
62 parts.insert(last_way, int(mid.oid))
65 parts.append(int(mid.oid))
67 members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
71 tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
73 cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
74 VALUES (%s, %s, %s, %s, %s, %s)""",
75 (r['id'], last_node, last_way, parts, members, list(tags)))
78 def add_data_to_planet_ways(context):
79 """ Add entries into the osm2pgsql way middle table. This is necessary for
80 tests on that that looks up node ids in this table.
82 with context.db.cursor() as cur:
83 for r in context.table:
84 tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
85 nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
87 cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
88 (r['id'], nodes, list(tags)))
90 ################################ WHEN ##################################
93 def import_and_index_data_from_place_table(context):
94 """ Import data previously set up in the place table.
96 nctx = context.nominatim
98 tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
99 context.nominatim.copy_from_place(context.db)
101 # XXX use tool function as soon as it is ported
102 with context.db.cursor() as cur:
103 with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
104 cur.execute(fd.read())
106 INSERT INTO location_postcode
107 (place_id, indexed_status, country_code, postcode, geometry)
108 SELECT nextval('seq_place'), 1, country_code,
109 upper(trim (both ' ' from address->'postcode')) as pc,
110 ST_Centroid(ST_Collect(ST_Centroid(geometry)))
112 WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
113 AND geometry IS NOT null
114 GROUP BY country_code, pc""")
116 # Call directly as the refresh function does not include postcodes.
117 indexer.LOG.setLevel(logging.ERROR)
118 indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
120 check_database_integrity(context)
122 @when("updating places")
123 def update_place_table(context):
124 """ Update the place table with the given data. Also runs all triggers
125 related to updates and reindexes the new data.
127 context.nominatim.run_nominatim('refresh', '--functions')
128 with context.db.cursor() as cur:
129 for row in context.table:
130 PlaceColumn(context).add_row(row, False).db_insert(cur)
132 context.nominatim.reindex_placex(context.db)
133 check_database_integrity(context)
135 @when("updating postcodes")
136 def update_postcodes(context):
137 """ Rerun the calculation of postcodes.
139 context.nominatim.run_nominatim('refresh', '--postcodes')
141 @when("marking for delete (?P<oids>.*)")
142 def delete_places(context, oids):
143 """ Remove entries from the place table. Multiple ids may be given
144 separated by commas. Also runs all triggers
145 related to updates and reindexes the new data.
147 context.nominatim.run_nominatim('refresh', '--functions')
148 with context.db.cursor() as cur:
149 for oid in oids.split(','):
150 NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
152 context.nominatim.reindex_placex(context.db)
154 ################################ THEN ##################################
156 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
157 def check_place_contents(context, table, exact):
158 """ Check contents of place/placex tables. Each row represents a table row
159 and all data must match. Data not present in the expected table, may
160 be arbitry. The rows are identified via the 'object' column which must
161 have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
162 rows match (for example because 'class' was left out and there are
163 multiple entries for the given OSM object) then all must match. All
164 expected rows are expected to be present with at least one database row.
165 When 'exactly' is given, there must not be additional rows in the database.
167 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
168 expected_content = set()
169 for row in context.table:
170 nid = NominatimID(row['object'])
171 query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
172 if table == 'placex':
173 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
174 query += " FROM %s WHERE {}" % (table, )
175 nid.query_osm_id(cur, query)
176 assert cur.rowcount > 0, "No rows found for " + row['object']
180 expected_content.add((res['osm_type'], res['osm_id'], res['class']))
182 DBRow(nid, res, context).assert_row(row, ['object'])
185 cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
186 assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
189 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
190 def check_place_has_entry(context, table, oid):
191 """ Ensure that no database row for the given object exists. The ID
192 must be of the form '<NRW><osm id>[:<class>]'.
194 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
195 NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
196 assert cur.rowcount == 0, \
197 "Found {} entries for ID {}".format(cur.rowcount, oid)
200 @then("search_name contains(?P<exclude> not)?")
201 def check_search_name_contents(context, exclude):
202 """ Check contents of place/placex tables. Each row represents a table row
203 and all data must match. Data not present in the expected table, may
204 be arbitry. The rows are identified via the 'object' column which must
205 have an identifier of the form '<NRW><osm id>[:<class>]'. All
206 expected rows are expected to be present with at least one database row.
208 tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
210 with tokenizer.name_analyzer() as analyzer:
211 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
212 for row in context.table:
213 nid = NominatimID(row['object'])
214 nid.row_by_place_id(cur, 'search_name',
215 ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
216 assert cur.rowcount > 0, "No rows found for " + row['object']
219 db_row = DBRow(nid, res, context)
220 for name, value in zip(row.headings, row.cells):
221 if name in ('name_vector', 'nameaddress_vector'):
222 items = [x.strip() for x in value.split(',')]
223 tokens = analyzer.get_word_token_info(items)
226 assert len(tokens) >= len(items), \
227 "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
228 for word, token, wid in tokens:
230 assert wid not in res[name], \
231 "Found term for {}/{}: {}".format(nid, name, wid)
233 assert wid in res[name], \
234 "Missing term for {}/{}: {}".format(nid, name, wid)
235 elif name != 'object':
236 assert db_row.contains(name, value), db_row.assert_msg(name, value)
238 @then("search_name has no entry for (?P<oid>.*)")
239 def check_search_name_has_entry(context, oid):
240 """ Check that there is noentry in the search_name table for the given
241 objects. IDs are in format '<NRW><osm id>[:<class>]'.
243 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
244 NominatimID(oid).row_by_place_id(cur, 'search_name')
246 assert cur.rowcount == 0, \
247 "Found {} entries for ID {}".format(cur.rowcount, oid)
249 @then("location_postcode contains exactly")
250 def check_location_postcode(context):
251 """ Check full contents for location_postcode table. Each row represents a table row
252 and all data must match. Data not present in the expected table, may
253 be arbitry. The rows are identified via 'country' and 'postcode' columns.
254 All rows must be present as excepted and there must not be additional
257 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
258 cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
259 assert cur.rowcount == len(list(context.table)), \
260 "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
264 key = (row['country_code'], row['postcode'])
265 assert key not in results, "Postcode table has duplicate entry: {}".format(row)
266 results[key] = DBRow((row['country_code'],row['postcode']), row, context)
268 for row in context.table:
269 db_row = results.get((row['country'],row['postcode']))
270 assert db_row is not None, \
271 "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
273 db_row.assert_row(row, ('country', 'postcode'))
275 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
276 def check_word_table_for_postcodes(context, exclude, postcodes):
277 """ Check that the tokenizer produces postcode tokens for the given
278 postcodes. The postcodes are a comma-separated list of postcodes.
281 nctx = context.nominatim
282 tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
283 with tokenizer.name_analyzer() as ana:
284 plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
288 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
289 if nctx.tokenizer == 'icu':
290 cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
293 cur.execute("""SELECT word FROM word WHERE word = any(%s)
294 and class = 'place' and type = 'postcode'""",
297 found = [row[0] for row in cur]
298 assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
301 assert len(found) == 0, f"Unexpected postcodes: {found}"
303 assert set(found) == set(plist), \
304 f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
306 @then("place_addressline contains")
307 def check_place_addressline(context):
308 """ Check the contents of the place_addressline table. Each row represents
309 a table row and all data must match. Data not present in the expected
310 table, may be arbitry. The rows are identified via the 'object' column,
311 representing the addressee and the 'address' column, representing the
314 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
315 for row in context.table:
316 nid = NominatimID(row['object'])
317 pid = nid.get_place_id(cur)
318 apid = NominatimID(row['address']).get_place_id(cur)
319 cur.execute(""" SELECT * FROM place_addressline
320 WHERE place_id = %s AND address_place_id = %s""",
322 assert cur.rowcount > 0, \
323 "No rows found for place %s and address %s" % (row['object'], row['address'])
326 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
328 @then("place_addressline doesn't contain")
329 def check_place_addressline_exclude(context):
330 """ Check that the place_addressline doesn't contain any entries for the
331 given addressee/address item pairs.
333 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
334 for row in context.table:
335 pid = NominatimID(row['object']).get_place_id(cur)
336 apid = NominatimID(row['address']).get_place_id(cur)
337 cur.execute(""" SELECT * FROM place_addressline
338 WHERE place_id = %s AND address_place_id = %s""",
340 assert cur.rowcount == 0, \
341 "Row found for place %s and address %s" % (row['object'], row['address'])
343 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
344 def check_location_property_osmline(context, oid, neg):
345 """ Check that the given way is present in the interpolation table.
347 with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
348 cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
349 FROM location_property_osmline
350 WHERE osm_id = %s AND startnumber IS NOT NULL""",
354 assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
357 todo = list(range(len(list(context.table))))
360 row = context.table[i]
361 if (int(row['start']) == res['startnumber']
362 and int(row['end']) == res['endnumber']):
366 assert False, "Unexpected row " + str(res)
368 DBRow(oid, res, context).assert_row(row, ('start', 'end'))