test/bdd/steps/steps_db_ops.py

   1 # SPDX-License-Identifier: GPL-2.0-only
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2022 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 import logging
   8 from itertools import chain
   9
  10 import psycopg2.extras
  11
  12 from place_inserter import PlaceColumn
  13 from table_compare import NominatimID, DBRow
  14
  15 from nominatim.indexer import indexer
  16 from nominatim.tokenizer import factory as tokenizer_factory
  17
  18 def check_database_integrity(context):
  19     """ Check some generic constraints on the tables.
  20     """
  21     with context.db.cursor() as cur:
  22         # place_addressline should not have duplicate (place_id, address_place_id)
  23         cur.execute("""SELECT count(*) FROM
  24                         (SELECT place_id, address_place_id, count(*) as c
  25                          FROM place_addressline GROUP BY place_id, address_place_id) x
  26                        WHERE c > 1""")
  27         assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
  28
  29         # word table must not have empty word_tokens
  30         cur.execute("SELECT count(*) FROM word WHERE word_token = ''")
  31         assert cur.fetchone()[0] == 0, "Empty word tokens found in word table"
  32
  33
  34
  35 ################################ GIVEN ##################################
  36
  37 @given("the (?P<named>named )?places")
  38 def add_data_to_place_table(context, named):
  39     """ Add entries into the place table. 'named places' makes sure that
  40         the entries get a random name when none is explicitly given.
  41     """
  42     with context.db.cursor() as cur:
  43         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
  44         for row in context.table:
  45             PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
  46         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
  47
  48 @given("the relations")
  49 def add_data_to_planet_relations(context):
  50     """ Add entries into the osm2pgsql relation middle table. This is needed
  51         for tests on data that looks up members.
  52     """
  53     with context.db.cursor() as cur:
  54         for r in context.table:
  55             last_node = 0
  56             last_way = 0
  57             parts = []
  58             if r['members']:
  59                 members = []
  60                 for m in r['members'].split(','):
  61                     mid = NominatimID(m)
  62                     if mid.typ == 'N':
  63                         parts.insert(last_node, int(mid.oid))
  64                         last_node += 1
  65                         last_way += 1
  66                     elif mid.typ == 'W':
  67                         parts.insert(last_way, int(mid.oid))
  68                         last_way += 1
  69                     else:
  70                         parts.append(int(mid.oid))
  71
  72                     members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
  73             else:
  74                 members = None
  75
  76             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  77
  78             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
  79                            VALUES (%s, %s, %s, %s, %s, %s)""",
  80                         (r['id'], last_node, last_way, parts, members, list(tags)))
  81
  82 @given("the ways")
  83 def add_data_to_planet_ways(context):
  84     """ Add entries into the osm2pgsql way middle table. This is necessary for
  85         tests on that that looks up node ids in this table.
  86     """
  87     with context.db.cursor() as cur:
  88         for r in context.table:
  89             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  90             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
  91
  92             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
  93                         (r['id'], nodes, list(tags)))
  94
  95 ################################ WHEN ##################################
  96
  97 @when("importing")
  98 def import_and_index_data_from_place_table(context):
  99     """ Import data previously set up in the place table.
 100     """
 101     context.nominatim.run_nominatim('import', '--continue', 'load-data',
 102                                               '--index-noanalyse', '-q',
 103                                               '--offline')
 104
 105     check_database_integrity(context)
 106
 107     # Remove the output of the input, when all was right. Otherwise it will be
 108     # output when there are errors that had nothing to do with the import
 109     # itself.
 110     context.log_capture.buffer.clear()
 111
 112 @when("updating places")
 113 def update_place_table(context):
 114     """ Update the place table with the given data. Also runs all triggers
 115         related to updates and reindexes the new data.
 116     """
 117     context.nominatim.run_nominatim('refresh', '--functions')
 118     with context.db.cursor() as cur:
 119         for row in context.table:
 120             PlaceColumn(context).add_row(row, False).db_insert(cur)
 121
 122     context.nominatim.reindex_placex(context.db)
 123     check_database_integrity(context)
 124
 125     # Remove the output of the input, when all was right. Otherwise it will be
 126     # output when there are errors that had nothing to do with the import
 127     # itself.
 128     context.log_capture.buffer.clear()
 129
 130
 131 @when("updating postcodes")
 132 def update_postcodes(context):
 133     """ Rerun the calculation of postcodes.
 134     """
 135     context.nominatim.run_nominatim('refresh', '--postcodes')
 136
 137 @when("marking for delete (?P<oids>.*)")
 138 def delete_places(context, oids):
 139     """ Remove entries from the place table. Multiple ids may be given
 140         separated by commas. Also runs all triggers
 141         related to updates and reindexes the new data.
 142     """
 143     context.nominatim.run_nominatim('refresh', '--functions')
 144     with context.db.cursor() as cur:
 145         for oid in oids.split(','):
 146             NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 147
 148     context.nominatim.reindex_placex(context.db)
 149
 150     # Remove the output of the input, when all was right. Otherwise it will be
 151     # output when there are errors that had nothing to do with the import
 152     # itself.
 153     context.log_capture.buffer.clear()
 154
 155 ################################ THEN ##################################
 156
 157 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 158 def check_place_contents(context, table, exact):
 159     """ Check contents of place/placex tables. Each row represents a table row
 160         and all data must match. Data not present in the expected table, may
 161         be arbitry. The rows are identified via the 'object' column which must
 162         have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
 163         rows match (for example because 'class' was left out and there are
 164         multiple entries for the given OSM object) then all must match. All
 165         expected rows are expected to be present with at least one database row.
 166         When 'exactly' is given, there must not be additional rows in the database.
 167     """
 168     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 169         expected_content = set()
 170         for row in context.table:
 171             nid = NominatimID(row['object'])
 172             query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
 173             if table == 'placex':
 174                 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
 175             query += " FROM %s WHERE {}" % (table, )
 176             nid.query_osm_id(cur, query)
 177             assert cur.rowcount > 0, "No rows found for " + row['object']
 178
 179             for res in cur:
 180                 if exact:
 181                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
 182
 183                 DBRow(nid, res, context).assert_row(row, ['object'])
 184
 185         if exact:
 186             cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
 187             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 188
 189
 190 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 191 def check_place_has_entry(context, table, oid):
 192     """ Ensure that no database row for the given object exists. The ID
 193         must be of the form '<NRW><osm id>[:<class>]'.
 194     """
 195     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 196         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
 197         assert cur.rowcount == 0, \
 198                "Found {} entries for ID {}".format(cur.rowcount, oid)
 199
 200
 201 @then("search_name contains(?P<exclude> not)?")
 202 def check_search_name_contents(context, exclude):
 203     """ Check contents of place/placex tables. Each row represents a table row
 204         and all data must match. Data not present in the expected table, may
 205         be arbitry. The rows are identified via the 'object' column which must
 206         have an identifier of the form '<NRW><osm id>[:<class>]'. All
 207         expected rows are expected to be present with at least one database row.
 208     """
 209     tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
 210
 211     with tokenizer.name_analyzer() as analyzer:
 212         with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 213             for row in context.table:
 214                 nid = NominatimID(row['object'])
 215                 nid.row_by_place_id(cur, 'search_name',
 216                                     ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
 217                 assert cur.rowcount > 0, "No rows found for " + row['object']
 218
 219                 for res in cur:
 220                     db_row = DBRow(nid, res, context)
 221                     for name, value in zip(row.headings, row.cells):
 222                         if name in ('name_vector', 'nameaddress_vector'):
 223                             items = [x.strip() for x in value.split(',')]
 224                             tokens = analyzer.get_word_token_info(items)
 225
 226                             if not exclude:
 227                                 assert len(tokens) >= len(items), \
 228                                        "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
 229                             for word, token, wid in tokens:
 230                                 if exclude:
 231                                     assert wid not in res[name], \
 232                                            "Found term for {}/{}: {}".format(nid, name, wid)
 233                                 else:
 234                                     assert wid in res[name], \
 235                                            "Missing term for {}/{}: {}".format(nid, name, wid)
 236                         elif name != 'object':
 237                             assert db_row.contains(name, value), db_row.assert_msg(name, value)
 238
 239 @then("search_name has no entry for (?P<oid>.*)")
 240 def check_search_name_has_entry(context, oid):
 241     """ Check that there is noentry in the search_name table for the given
 242         objects. IDs are in format '<NRW><osm id>[:<class>]'.
 243     """
 244     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 245         NominatimID(oid).row_by_place_id(cur, 'search_name')
 246
 247         assert cur.rowcount == 0, \
 248                "Found {} entries for ID {}".format(cur.rowcount, oid)
 249
 250 @then("location_postcode contains exactly")
 251 def check_location_postcode(context):
 252     """ Check full contents for location_postcode table. Each row represents a table row
 253         and all data must match. Data not present in the expected table, may
 254         be arbitry. The rows are identified via 'country' and 'postcode' columns.
 255         All rows must be present as excepted and there must not be additional
 256         rows.
 257     """
 258     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 259         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
 260         assert cur.rowcount == len(list(context.table)), \
 261             "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 262
 263         results = {}
 264         for row in cur:
 265             key = (row['country_code'], row['postcode'])
 266             assert key not in results, "Postcode table has duplicate entry: {}".format(row)
 267             results[key] = DBRow((row['country_code'],row['postcode']), row, context)
 268
 269         for row in context.table:
 270             db_row = results.get((row['country'],row['postcode']))
 271             assert db_row is not None, \
 272                 f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
 273
 274             db_row.assert_row(row, ('country', 'postcode'))
 275
 276 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
 277 def check_word_table_for_postcodes(context, exclude, postcodes):
 278     """ Check that the tokenizer produces postcode tokens for the given
 279         postcodes. The postcodes are a comma-separated list of postcodes.
 280         Whitespace matters.
 281     """
 282     nctx = context.nominatim
 283     tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
 284     with tokenizer.name_analyzer() as ana:
 285         plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
 286
 287     plist.sort()
 288
 289     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 290         if nctx.tokenizer != 'legacy':
 291             cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
 292                         (plist,))
 293         else:
 294             cur.execute("""SELECT word FROM word WHERE word = any(%s)
 295                              and class = 'place' and type = 'postcode'""",
 296                         (plist,))
 297
 298         found = [row[0] for row in cur]
 299         assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
 300
 301     if exclude:
 302         assert len(found) == 0, f"Unexpected postcodes: {found}"
 303     else:
 304         assert set(found) == set(plist), \
 305         f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
 306
 307 @then("place_addressline contains")
 308 def check_place_addressline(context):
 309     """ Check the contents of the place_addressline table. Each row represents
 310         a table row and all data must match. Data not present in the expected
 311         table, may be arbitry. The rows are identified via the 'object' column,
 312         representing the addressee and the 'address' column, representing the
 313         address item.
 314     """
 315     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 316         for row in context.table:
 317             nid = NominatimID(row['object'])
 318             pid = nid.get_place_id(cur)
 319             apid = NominatimID(row['address']).get_place_id(cur)
 320             cur.execute(""" SELECT * FROM place_addressline
 321                             WHERE place_id = %s AND address_place_id = %s""",
 322                         (pid, apid))
 323             assert cur.rowcount > 0, \
 324                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 325
 326             for res in cur:
 327                 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 328
 329 @then("place_addressline doesn't contain")
 330 def check_place_addressline_exclude(context):
 331     """ Check that the place_addressline doesn't contain any entries for the
 332         given addressee/address item pairs.
 333     """
 334     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 335         for row in context.table:
 336             pid = NominatimID(row['object']).get_place_id(cur)
 337             apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
 338             if apid is not None:
 339                 cur.execute(""" SELECT * FROM place_addressline
 340                                 WHERE place_id = %s AND address_place_id = %s""",
 341                             (pid, apid))
 342                 assert cur.rowcount == 0, \
 343                     "Row found for place %s and address %s" % (row['object'], row['address'])
 344
 345 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 346 def check_location_property_osmline(context, oid, neg):
 347     """ Check that the given way is present in the interpolation table.
 348     """
 349     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 350         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
 351                        FROM location_property_osmline
 352                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
 353                     (oid, ))
 354
 355         if neg:
 356             assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
 357             return
 358
 359         todo = list(range(len(list(context.table))))
 360         for res in cur:
 361             for i in todo:
 362                 row = context.table[i]
 363                 if (int(row['start']) == res['startnumber']
 364                     and int(row['end']) == res['endnumber']):
 365                     todo.remove(i)
 366                     break
 367             else:
 368                 assert False, "Unexpected row " + str(res)
 369
 370             DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 371
 372         assert not todo
 373
 374