test/bdd/steps/steps_db_ops.py

   1 # SPDX-License-Identifier: GPL-2.0-only
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2022 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 import logging
   8 from itertools import chain
   9
  10 import psycopg2.extras
  11
  12 from place_inserter import PlaceColumn
  13 from table_compare import NominatimID, DBRow
  14
  15 from nominatim.indexer import indexer
  16 from nominatim.tokenizer import factory as tokenizer_factory
  17
  18 def check_database_integrity(context):
  19     """ Check some generic constraints on the tables.
  20     """
  21     # place_addressline should not have duplicate (place_id, address_place_id)
  22     cur = context.db.cursor()
  23     cur.execute("""SELECT count(*) FROM
  24                     (SELECT place_id, address_place_id, count(*) as c
  25                      FROM place_addressline GROUP BY place_id, address_place_id) x
  26                    WHERE c > 1""")
  27     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
  28
  29
  30 ################################ GIVEN ##################################
  31
  32 @given("the (?P<named>named )?places")
  33 def add_data_to_place_table(context, named):
  34     """ Add entries into the place table. 'named places' makes sure that
  35         the entries get a random name when none is explicitly given.
  36     """
  37     with context.db.cursor() as cur:
  38         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
  39         for row in context.table:
  40             PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
  41         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
  42
  43 @given("the relations")
  44 def add_data_to_planet_relations(context):
  45     """ Add entries into the osm2pgsql relation middle table. This is needed
  46         for tests on data that looks up members.
  47     """
  48     with context.db.cursor() as cur:
  49         for r in context.table:
  50             last_node = 0
  51             last_way = 0
  52             parts = []
  53             if r['members']:
  54                 members = []
  55                 for m in r['members'].split(','):
  56                     mid = NominatimID(m)
  57                     if mid.typ == 'N':
  58                         parts.insert(last_node, int(mid.oid))
  59                         last_node += 1
  60                         last_way += 1
  61                     elif mid.typ == 'W':
  62                         parts.insert(last_way, int(mid.oid))
  63                         last_way += 1
  64                     else:
  65                         parts.append(int(mid.oid))
  66
  67                     members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
  68             else:
  69                 members = None
  70
  71             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  72
  73             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
  74                            VALUES (%s, %s, %s, %s, %s, %s)""",
  75                         (r['id'], last_node, last_way, parts, members, list(tags)))
  76
  77 @given("the ways")
  78 def add_data_to_planet_ways(context):
  79     """ Add entries into the osm2pgsql way middle table. This is necessary for
  80         tests on that that looks up node ids in this table.
  81     """
  82     with context.db.cursor() as cur:
  83         for r in context.table:
  84             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  85             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
  86
  87             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
  88                         (r['id'], nodes, list(tags)))
  89
  90 ################################ WHEN ##################################
  91
  92 @when("importing")
  93 def import_and_index_data_from_place_table(context):
  94     """ Import data previously set up in the place table.
  95     """
  96     context.nominatim.run_nominatim('refresh', '--functions')
  97     context.nominatim.run_nominatim('import', '--continue', 'load-data',
  98                                               '--index-noanalyse', '-q')
  99
 100     check_database_integrity(context)
 101
 102 @when("updating places")
 103 def update_place_table(context):
 104     """ Update the place table with the given data. Also runs all triggers
 105         related to updates and reindexes the new data.
 106     """
 107     context.nominatim.run_nominatim('refresh', '--functions')
 108     with context.db.cursor() as cur:
 109         for row in context.table:
 110             PlaceColumn(context).add_row(row, False).db_insert(cur)
 111
 112     context.nominatim.reindex_placex(context.db)
 113     check_database_integrity(context)
 114
 115 @when("updating postcodes")
 116 def update_postcodes(context):
 117     """ Rerun the calculation of postcodes.
 118     """
 119     context.nominatim.run_nominatim('refresh', '--postcodes')
 120
 121 @when("marking for delete (?P<oids>.*)")
 122 def delete_places(context, oids):
 123     """ Remove entries from the place table. Multiple ids may be given
 124         separated by commas. Also runs all triggers
 125         related to updates and reindexes the new data.
 126     """
 127     context.nominatim.run_nominatim('refresh', '--functions')
 128     with context.db.cursor() as cur:
 129         for oid in oids.split(','):
 130             NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 131
 132     context.nominatim.reindex_placex(context.db)
 133
 134 ################################ THEN ##################################
 135
 136 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 137 def check_place_contents(context, table, exact):
 138     """ Check contents of place/placex tables. Each row represents a table row
 139         and all data must match. Data not present in the expected table, may
 140         be arbitry. The rows are identified via the 'object' column which must
 141         have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
 142         rows match (for example because 'class' was left out and there are
 143         multiple entries for the given OSM object) then all must match. All
 144         expected rows are expected to be present with at least one database row.
 145         When 'exactly' is given, there must not be additional rows in the database.
 146     """
 147     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 148         expected_content = set()
 149         for row in context.table:
 150             nid = NominatimID(row['object'])
 151             query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
 152             if table == 'placex':
 153                 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
 154             query += " FROM %s WHERE {}" % (table, )
 155             nid.query_osm_id(cur, query)
 156             assert cur.rowcount > 0, "No rows found for " + row['object']
 157
 158             for res in cur:
 159                 if exact:
 160                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
 161
 162                 DBRow(nid, res, context).assert_row(row, ['object'])
 163
 164         if exact:
 165             cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
 166             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 167
 168
 169 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 170 def check_place_has_entry(context, table, oid):
 171     """ Ensure that no database row for the given object exists. The ID
 172         must be of the form '<NRW><osm id>[:<class>]'.
 173     """
 174     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 175         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
 176         assert cur.rowcount == 0, \
 177                "Found {} entries for ID {}".format(cur.rowcount, oid)
 178
 179
 180 @then("search_name contains(?P<exclude> not)?")
 181 def check_search_name_contents(context, exclude):
 182     """ Check contents of place/placex tables. Each row represents a table row
 183         and all data must match. Data not present in the expected table, may
 184         be arbitry. The rows are identified via the 'object' column which must
 185         have an identifier of the form '<NRW><osm id>[:<class>]'. All
 186         expected rows are expected to be present with at least one database row.
 187     """
 188     tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
 189
 190     with tokenizer.name_analyzer() as analyzer:
 191         with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 192             for row in context.table:
 193                 nid = NominatimID(row['object'])
 194                 nid.row_by_place_id(cur, 'search_name',
 195                                     ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
 196                 assert cur.rowcount > 0, "No rows found for " + row['object']
 197
 198                 for res in cur:
 199                     db_row = DBRow(nid, res, context)
 200                     for name, value in zip(row.headings, row.cells):
 201                         if name in ('name_vector', 'nameaddress_vector'):
 202                             items = [x.strip() for x in value.split(',')]
 203                             tokens = analyzer.get_word_token_info(items)
 204
 205                             if not exclude:
 206                                 assert len(tokens) >= len(items), \
 207                                        "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
 208                             for word, token, wid in tokens:
 209                                 if exclude:
 210                                     assert wid not in res[name], \
 211                                            "Found term for {}/{}: {}".format(nid, name, wid)
 212                                 else:
 213                                     assert wid in res[name], \
 214                                            "Missing term for {}/{}: {}".format(nid, name, wid)
 215                         elif name != 'object':
 216                             assert db_row.contains(name, value), db_row.assert_msg(name, value)
 217
 218 @then("search_name has no entry for (?P<oid>.*)")
 219 def check_search_name_has_entry(context, oid):
 220     """ Check that there is noentry in the search_name table for the given
 221         objects. IDs are in format '<NRW><osm id>[:<class>]'.
 222     """
 223     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 224         NominatimID(oid).row_by_place_id(cur, 'search_name')
 225
 226         assert cur.rowcount == 0, \
 227                "Found {} entries for ID {}".format(cur.rowcount, oid)
 228
 229 @then("location_postcode contains exactly")
 230 def check_location_postcode(context):
 231     """ Check full contents for location_postcode table. Each row represents a table row
 232         and all data must match. Data not present in the expected table, may
 233         be arbitry. The rows are identified via 'country' and 'postcode' columns.
 234         All rows must be present as excepted and there must not be additional
 235         rows.
 236     """
 237     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 238         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
 239         assert cur.rowcount == len(list(context.table)), \
 240             "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 241
 242         results = {}
 243         for row in cur:
 244             key = (row['country_code'], row['postcode'])
 245             assert key not in results, "Postcode table has duplicate entry: {}".format(row)
 246             results[key] = DBRow((row['country_code'],row['postcode']), row, context)
 247
 248         for row in context.table:
 249             db_row = results.get((row['country'],row['postcode']))
 250             assert db_row is not None, \
 251                 f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
 252
 253             db_row.assert_row(row, ('country', 'postcode'))
 254
 255 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
 256 def check_word_table_for_postcodes(context, exclude, postcodes):
 257     """ Check that the tokenizer produces postcode tokens for the given
 258         postcodes. The postcodes are a comma-separated list of postcodes.
 259         Whitespace matters.
 260     """
 261     nctx = context.nominatim
 262     tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
 263     with tokenizer.name_analyzer() as ana:
 264         plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
 265
 266     plist.sort()
 267
 268     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 269         if nctx.tokenizer == 'icu':
 270             cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
 271                         (plist,))
 272         else:
 273             cur.execute("""SELECT word FROM word WHERE word = any(%s)
 274                              and class = 'place' and type = 'postcode'""",
 275                         (plist,))
 276
 277         found = [row[0] for row in cur]
 278         assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
 279
 280     if exclude:
 281         assert len(found) == 0, f"Unexpected postcodes: {found}"
 282     else:
 283         assert set(found) == set(plist), \
 284         f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
 285
 286 @then("place_addressline contains")
 287 def check_place_addressline(context):
 288     """ Check the contents of the place_addressline table. Each row represents
 289         a table row and all data must match. Data not present in the expected
 290         table, may be arbitry. The rows are identified via the 'object' column,
 291         representing the addressee and the 'address' column, representing the
 292         address item.
 293     """
 294     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 295         for row in context.table:
 296             nid = NominatimID(row['object'])
 297             pid = nid.get_place_id(cur)
 298             apid = NominatimID(row['address']).get_place_id(cur)
 299             cur.execute(""" SELECT * FROM place_addressline
 300                             WHERE place_id = %s AND address_place_id = %s""",
 301                         (pid, apid))
 302             assert cur.rowcount > 0, \
 303                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 304
 305             for res in cur:
 306                 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 307
 308 @then("place_addressline doesn't contain")
 309 def check_place_addressline_exclude(context):
 310     """ Check that the place_addressline doesn't contain any entries for the
 311         given addressee/address item pairs.
 312     """
 313     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 314         for row in context.table:
 315             pid = NominatimID(row['object']).get_place_id(cur)
 316             apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
 317             if apid is not None:
 318                 cur.execute(""" SELECT * FROM place_addressline
 319                                 WHERE place_id = %s AND address_place_id = %s""",
 320                             (pid, apid))
 321                 assert cur.rowcount == 0, \
 322                     "Row found for place %s and address %s" % (row['object'], row['address'])
 323
 324 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 325 def check_location_property_osmline(context, oid, neg):
 326     """ Check that the given way is present in the interpolation table.
 327     """
 328     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 329         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
 330                        FROM location_property_osmline
 331                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
 332                     (oid, ))
 333
 334         if neg:
 335             assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
 336             return
 337
 338         todo = list(range(len(list(context.table))))
 339         for res in cur:
 340             for i in todo:
 341                 row = context.table[i]
 342                 if (int(row['start']) == res['startnumber']
 343                     and int(row['end']) == res['endnumber']):
 344                     todo.remove(i)
 345                     break
 346             else:
 347                 assert False, "Unexpected row " + str(res)
 348
 349             DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 350
 351         assert not todo
 352
 353