test/bdd/steps/steps_db_ops.py

   1 # SPDX-License-Identifier: GPL-2.0-only
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2022 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 import logging
   8 from itertools import chain
   9
  10 import psycopg2.extras
  11
  12 from place_inserter import PlaceColumn
  13 from table_compare import NominatimID, DBRow
  14
  15 from nominatim.indexer import indexer
  16 from nominatim.tokenizer import factory as tokenizer_factory
  17
  18 def check_database_integrity(context):
  19     """ Check some generic constraints on the tables.
  20     """
  21     # place_addressline should not have duplicate (place_id, address_place_id)
  22     cur = context.db.cursor()
  23     cur.execute("""SELECT count(*) FROM
  24                     (SELECT place_id, address_place_id, count(*) as c
  25                      FROM place_addressline GROUP BY place_id, address_place_id) x
  26                    WHERE c > 1""")
  27     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
  28
  29
  30 ################################ GIVEN ##################################
  31
  32 @given("the (?P<named>named )?places")
  33 def add_data_to_place_table(context, named):
  34     """ Add entries into the place table. 'named places' makes sure that
  35         the entries get a random name when none is explicitly given.
  36     """
  37     with context.db.cursor() as cur:
  38         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
  39         for row in context.table:
  40             PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
  41         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
  42
  43 @given("the relations")
  44 def add_data_to_planet_relations(context):
  45     """ Add entries into the osm2pgsql relation middle table. This is needed
  46         for tests on data that looks up members.
  47     """
  48     with context.db.cursor() as cur:
  49         for r in context.table:
  50             last_node = 0
  51             last_way = 0
  52             parts = []
  53             if r['members']:
  54                 members = []
  55                 for m in r['members'].split(','):
  56                     mid = NominatimID(m)
  57                     if mid.typ == 'N':
  58                         parts.insert(last_node, int(mid.oid))
  59                         last_node += 1
  60                         last_way += 1
  61                     elif mid.typ == 'W':
  62                         parts.insert(last_way, int(mid.oid))
  63                         last_way += 1
  64                     else:
  65                         parts.append(int(mid.oid))
  66
  67                     members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
  68             else:
  69                 members = None
  70
  71             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  72
  73             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
  74                            VALUES (%s, %s, %s, %s, %s, %s)""",
  75                         (r['id'], last_node, last_way, parts, members, list(tags)))
  76
  77 @given("the ways")
  78 def add_data_to_planet_ways(context):
  79     """ Add entries into the osm2pgsql way middle table. This is necessary for
  80         tests on that that looks up node ids in this table.
  81     """
  82     with context.db.cursor() as cur:
  83         for r in context.table:
  84             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  85             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
  86
  87             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
  88                         (r['id'], nodes, list(tags)))
  89
  90 ################################ WHEN ##################################
  91
  92 @when("importing")
  93 def import_and_index_data_from_place_table(context):
  94     """ Import data previously set up in the place table.
  95     """
  96     context.nominatim.run_nominatim('refresh', '--functions')
  97     context.nominatim.run_nominatim('import', '--continue', 'load-data',
  98                                               '--index-noanalyse', '-q',
  99                                               '--offline')
 100
 101     check_database_integrity(context)
 102
 103 @when("updating places")
 104 def update_place_table(context):
 105     """ Update the place table with the given data. Also runs all triggers
 106         related to updates and reindexes the new data.
 107     """
 108     context.nominatim.run_nominatim('refresh', '--functions')
 109     with context.db.cursor() as cur:
 110         for row in context.table:
 111             PlaceColumn(context).add_row(row, False).db_insert(cur)
 112
 113     context.nominatim.reindex_placex(context.db)
 114     check_database_integrity(context)
 115
 116 @when("updating postcodes")
 117 def update_postcodes(context):
 118     """ Rerun the calculation of postcodes.
 119     """
 120     context.nominatim.run_nominatim('refresh', '--postcodes')
 121
 122 @when("marking for delete (?P<oids>.*)")
 123 def delete_places(context, oids):
 124     """ Remove entries from the place table. Multiple ids may be given
 125         separated by commas. Also runs all triggers
 126         related to updates and reindexes the new data.
 127     """
 128     context.nominatim.run_nominatim('refresh', '--functions')
 129     with context.db.cursor() as cur:
 130         for oid in oids.split(','):
 131             NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 132
 133     context.nominatim.reindex_placex(context.db)
 134
 135 ################################ THEN ##################################
 136
 137 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 138 def check_place_contents(context, table, exact):
 139     """ Check contents of place/placex tables. Each row represents a table row
 140         and all data must match. Data not present in the expected table, may
 141         be arbitry. The rows are identified via the 'object' column which must
 142         have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
 143         rows match (for example because 'class' was left out and there are
 144         multiple entries for the given OSM object) then all must match. All
 145         expected rows are expected to be present with at least one database row.
 146         When 'exactly' is given, there must not be additional rows in the database.
 147     """
 148     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 149         expected_content = set()
 150         for row in context.table:
 151             nid = NominatimID(row['object'])
 152             query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
 153             if table == 'placex':
 154                 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
 155             query += " FROM %s WHERE {}" % (table, )
 156             nid.query_osm_id(cur, query)
 157             assert cur.rowcount > 0, "No rows found for " + row['object']
 158
 159             for res in cur:
 160                 if exact:
 161                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
 162
 163                 DBRow(nid, res, context).assert_row(row, ['object'])
 164
 165         if exact:
 166             cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
 167             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 168
 169
 170 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 171 def check_place_has_entry(context, table, oid):
 172     """ Ensure that no database row for the given object exists. The ID
 173         must be of the form '<NRW><osm id>[:<class>]'.
 174     """
 175     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 176         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
 177         assert cur.rowcount == 0, \
 178                "Found {} entries for ID {}".format(cur.rowcount, oid)
 179
 180
 181 @then("search_name contains(?P<exclude> not)?")
 182 def check_search_name_contents(context, exclude):
 183     """ Check contents of place/placex tables. Each row represents a table row
 184         and all data must match. Data not present in the expected table, may
 185         be arbitry. The rows are identified via the 'object' column which must
 186         have an identifier of the form '<NRW><osm id>[:<class>]'. All
 187         expected rows are expected to be present with at least one database row.
 188     """
 189     tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
 190
 191     with tokenizer.name_analyzer() as analyzer:
 192         with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 193             for row in context.table:
 194                 nid = NominatimID(row['object'])
 195                 nid.row_by_place_id(cur, 'search_name',
 196                                     ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
 197                 assert cur.rowcount > 0, "No rows found for " + row['object']
 198
 199                 for res in cur:
 200                     db_row = DBRow(nid, res, context)
 201                     for name, value in zip(row.headings, row.cells):
 202                         if name in ('name_vector', 'nameaddress_vector'):
 203                             items = [x.strip() for x in value.split(',')]
 204                             tokens = analyzer.get_word_token_info(items)
 205
 206                             if not exclude:
 207                                 assert len(tokens) >= len(items), \
 208                                        "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
 209                             for word, token, wid in tokens:
 210                                 if exclude:
 211                                     assert wid not in res[name], \
 212                                            "Found term for {}/{}: {}".format(nid, name, wid)
 213                                 else:
 214                                     assert wid in res[name], \
 215                                            "Missing term for {}/{}: {}".format(nid, name, wid)
 216                         elif name != 'object':
 217                             assert db_row.contains(name, value), db_row.assert_msg(name, value)
 218
 219 @then("search_name has no entry for (?P<oid>.*)")
 220 def check_search_name_has_entry(context, oid):
 221     """ Check that there is noentry in the search_name table for the given
 222         objects. IDs are in format '<NRW><osm id>[:<class>]'.
 223     """
 224     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 225         NominatimID(oid).row_by_place_id(cur, 'search_name')
 226
 227         assert cur.rowcount == 0, \
 228                "Found {} entries for ID {}".format(cur.rowcount, oid)
 229
 230 @then("location_postcode contains exactly")
 231 def check_location_postcode(context):
 232     """ Check full contents for location_postcode table. Each row represents a table row
 233         and all data must match. Data not present in the expected table, may
 234         be arbitry. The rows are identified via 'country' and 'postcode' columns.
 235         All rows must be present as excepted and there must not be additional
 236         rows.
 237     """
 238     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 239         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
 240         assert cur.rowcount == len(list(context.table)), \
 241             "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 242
 243         results = {}
 244         for row in cur:
 245             key = (row['country_code'], row['postcode'])
 246             assert key not in results, "Postcode table has duplicate entry: {}".format(row)
 247             results[key] = DBRow((row['country_code'],row['postcode']), row, context)
 248
 249         for row in context.table:
 250             db_row = results.get((row['country'],row['postcode']))
 251             assert db_row is not None, \
 252                 f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
 253
 254             db_row.assert_row(row, ('country', 'postcode'))
 255
 256 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
 257 def check_word_table_for_postcodes(context, exclude, postcodes):
 258     """ Check that the tokenizer produces postcode tokens for the given
 259         postcodes. The postcodes are a comma-separated list of postcodes.
 260         Whitespace matters.
 261     """
 262     nctx = context.nominatim
 263     tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
 264     with tokenizer.name_analyzer() as ana:
 265         plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
 266
 267     plist.sort()
 268
 269     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 270         if nctx.tokenizer != 'legacy':
 271             cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
 272                         (plist,))
 273         else:
 274             cur.execute("""SELECT word FROM word WHERE word = any(%s)
 275                              and class = 'place' and type = 'postcode'""",
 276                         (plist,))
 277
 278         found = [row[0] for row in cur]
 279         assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
 280
 281     if exclude:
 282         assert len(found) == 0, f"Unexpected postcodes: {found}"
 283     else:
 284         assert set(found) == set(plist), \
 285         f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
 286
 287 @then("place_addressline contains")
 288 def check_place_addressline(context):
 289     """ Check the contents of the place_addressline table. Each row represents
 290         a table row and all data must match. Data not present in the expected
 291         table, may be arbitry. The rows are identified via the 'object' column,
 292         representing the addressee and the 'address' column, representing the
 293         address item.
 294     """
 295     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 296         for row in context.table:
 297             nid = NominatimID(row['object'])
 298             pid = nid.get_place_id(cur)
 299             apid = NominatimID(row['address']).get_place_id(cur)
 300             cur.execute(""" SELECT * FROM place_addressline
 301                             WHERE place_id = %s AND address_place_id = %s""",
 302                         (pid, apid))
 303             assert cur.rowcount > 0, \
 304                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 305
 306             for res in cur:
 307                 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 308
 309 @then("place_addressline doesn't contain")
 310 def check_place_addressline_exclude(context):
 311     """ Check that the place_addressline doesn't contain any entries for the
 312         given addressee/address item pairs.
 313     """
 314     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 315         for row in context.table:
 316             pid = NominatimID(row['object']).get_place_id(cur)
 317             apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
 318             if apid is not None:
 319                 cur.execute(""" SELECT * FROM place_addressline
 320                                 WHERE place_id = %s AND address_place_id = %s""",
 321                             (pid, apid))
 322                 assert cur.rowcount == 0, \
 323                     "Row found for place %s and address %s" % (row['object'], row['address'])
 324
 325 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 326 def check_location_property_osmline(context, oid, neg):
 327     """ Check that the given way is present in the interpolation table.
 328     """
 329     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 330         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
 331                        FROM location_property_osmline
 332                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
 333                     (oid, ))
 334
 335         if neg:
 336             assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
 337             return
 338
 339         todo = list(range(len(list(context.table))))
 340         for res in cur:
 341             for i in todo:
 342                 row = context.table[i]
 343                 if (int(row['start']) == res['startnumber']
 344                     and int(row['end']) == res['endnumber']):
 345                     todo.remove(i)
 346                     break
 347             else:
 348                 assert False, "Unexpected row " + str(res)
 349
 350             DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 351
 352         assert not todo
 353
 354