test/bdd/steps/steps_db_ops.py

   1 # SPDX-License-Identifier: GPL-2.0-only
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2022 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 import logging
   8 from itertools import chain
   9
  10 import psycopg2.extras
  11
  12 from place_inserter import PlaceColumn
  13 from table_compare import NominatimID, DBRow
  14
  15 from nominatim.indexer import indexer
  16 from nominatim.tokenizer import factory as tokenizer_factory
  17
  18 def check_database_integrity(context):
  19     """ Check some generic constraints on the tables.
  20     """
  21     # place_addressline should not have duplicate (place_id, address_place_id)
  22     cur = context.db.cursor()
  23     cur.execute("""SELECT count(*) FROM
  24                     (SELECT place_id, address_place_id, count(*) as c
  25                      FROM place_addressline GROUP BY place_id, address_place_id) x
  26                    WHERE c > 1""")
  27     assert cur.fetchone()[0] == 0, "Duplicates found in place_addressline"
  28
  29
  30 ################################ GIVEN ##################################
  31
  32 @given("the (?P<named>named )?places")
  33 def add_data_to_place_table(context, named):
  34     """ Add entries into the place table. 'named places' makes sure that
  35         the entries get a random name when none is explicitly given.
  36     """
  37     with context.db.cursor() as cur:
  38         cur.execute('ALTER TABLE place DISABLE TRIGGER place_before_insert')
  39         for row in context.table:
  40             PlaceColumn(context).add_row(row, named is not None).db_insert(cur)
  41         cur.execute('ALTER TABLE place ENABLE TRIGGER place_before_insert')
  42
  43 @given("the relations")
  44 def add_data_to_planet_relations(context):
  45     """ Add entries into the osm2pgsql relation middle table. This is needed
  46         for tests on data that looks up members.
  47     """
  48     with context.db.cursor() as cur:
  49         for r in context.table:
  50             last_node = 0
  51             last_way = 0
  52             parts = []
  53             if r['members']:
  54                 members = []
  55                 for m in r['members'].split(','):
  56                     mid = NominatimID(m)
  57                     if mid.typ == 'N':
  58                         parts.insert(last_node, int(mid.oid))
  59                         last_node += 1
  60                         last_way += 1
  61                     elif mid.typ == 'W':
  62                         parts.insert(last_way, int(mid.oid))
  63                         last_way += 1
  64                     else:
  65                         parts.append(int(mid.oid))
  66
  67                     members.extend((mid.typ.lower() + mid.oid, mid.cls or ''))
  68             else:
  69                 members = None
  70
  71             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  72
  73             cur.execute("""INSERT INTO planet_osm_rels (id, way_off, rel_off, parts, members, tags)
  74                            VALUES (%s, %s, %s, %s, %s, %s)""",
  75                         (r['id'], last_node, last_way, parts, members, list(tags)))
  76
  77 @given("the ways")
  78 def add_data_to_planet_ways(context):
  79     """ Add entries into the osm2pgsql way middle table. This is necessary for
  80         tests on that that looks up node ids in this table.
  81     """
  82     with context.db.cursor() as cur:
  83         for r in context.table:
  84             tags = chain.from_iterable([(h[5:], r[h]) for h in r.headings if h.startswith("tags+")])
  85             nodes = [ int(x.strip()) for x in r['nodes'].split(',') ]
  86
  87             cur.execute("INSERT INTO planet_osm_ways (id, nodes, tags) VALUES (%s, %s, %s)",
  88                         (r['id'], nodes, list(tags)))
  89
  90 ################################ WHEN ##################################
  91
  92 @when("importing")
  93 def import_and_index_data_from_place_table(context):
  94     """ Import data previously set up in the place table.
  95     """
  96     nctx = context.nominatim
  97
  98     tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
  99     context.nominatim.copy_from_place(context.db)
 100
 101     # XXX use tool function as soon as it is ported
 102     with context.db.cursor() as cur:
 103         with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
 104             cur.execute(fd.read())
 105         cur.execute("""
 106             INSERT INTO location_postcode
 107              (place_id, indexed_status, country_code, postcode, geometry)
 108             SELECT nextval('seq_place'), 1, country_code,
 109                    upper(trim (both ' ' from address->'postcode')) as pc,
 110                    ST_Centroid(ST_Collect(ST_Centroid(geometry)))
 111               FROM placex
 112              WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
 113                    AND geometry IS NOT null
 114              GROUP BY country_code, pc""")
 115
 116     # Call directly as the refresh function does not include postcodes.
 117     indexer.LOG.setLevel(logging.ERROR)
 118     indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
 119
 120     check_database_integrity(context)
 121
 122 @when("updating places")
 123 def update_place_table(context):
 124     """ Update the place table with the given data. Also runs all triggers
 125         related to updates and reindexes the new data.
 126     """
 127     context.nominatim.run_nominatim('refresh', '--functions')
 128     with context.db.cursor() as cur:
 129         for row in context.table:
 130             PlaceColumn(context).add_row(row, False).db_insert(cur)
 131
 132     context.nominatim.reindex_placex(context.db)
 133     check_database_integrity(context)
 134
 135 @when("updating postcodes")
 136 def update_postcodes(context):
 137     """ Rerun the calculation of postcodes.
 138     """
 139     context.nominatim.run_nominatim('refresh', '--postcodes')
 140
 141 @when("marking for delete (?P<oids>.*)")
 142 def delete_places(context, oids):
 143     """ Remove entries from the place table. Multiple ids may be given
 144         separated by commas. Also runs all triggers
 145         related to updates and reindexes the new data.
 146     """
 147     context.nominatim.run_nominatim('refresh', '--functions')
 148     with context.db.cursor() as cur:
 149         for oid in oids.split(','):
 150             NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}')
 151
 152     context.nominatim.reindex_placex(context.db)
 153
 154 ################################ THEN ##################################
 155
 156 @then("(?P<table>placex|place) contains(?P<exact> exactly)?")
 157 def check_place_contents(context, table, exact):
 158     """ Check contents of place/placex tables. Each row represents a table row
 159         and all data must match. Data not present in the expected table, may
 160         be arbitry. The rows are identified via the 'object' column which must
 161         have an identifier of the form '<NRW><osm id>[:<class>]'. When multiple
 162         rows match (for example because 'class' was left out and there are
 163         multiple entries for the given OSM object) then all must match. All
 164         expected rows are expected to be present with at least one database row.
 165         When 'exactly' is given, there must not be additional rows in the database.
 166     """
 167     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 168         expected_content = set()
 169         for row in context.table:
 170             nid = NominatimID(row['object'])
 171             query = 'SELECT *, ST_AsText(geometry) as geomtxt, ST_GeometryType(geometry) as geometrytype'
 172             if table == 'placex':
 173                 query += ' ,ST_X(centroid) as cx, ST_Y(centroid) as cy'
 174             query += " FROM %s WHERE {}" % (table, )
 175             nid.query_osm_id(cur, query)
 176             assert cur.rowcount > 0, "No rows found for " + row['object']
 177
 178             for res in cur:
 179                 if exact:
 180                     expected_content.add((res['osm_type'], res['osm_id'], res['class']))
 181
 182                 DBRow(nid, res, context).assert_row(row, ['object'])
 183
 184         if exact:
 185             cur.execute('SELECT osm_type, osm_id, class from {}'.format(table))
 186             assert expected_content == set([(r[0], r[1], r[2]) for r in cur])
 187
 188
 189 @then("(?P<table>placex|place) has no entry for (?P<oid>.*)")
 190 def check_place_has_entry(context, table, oid):
 191     """ Ensure that no database row for the given object exists. The ID
 192         must be of the form '<NRW><osm id>[:<class>]'.
 193     """
 194     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 195         NominatimID(oid).query_osm_id(cur, "SELECT * FROM %s where {}" % table)
 196         assert cur.rowcount == 0, \
 197                "Found {} entries for ID {}".format(cur.rowcount, oid)
 198
 199
 200 @then("search_name contains(?P<exclude> not)?")
 201 def check_search_name_contents(context, exclude):
 202     """ Check contents of place/placex tables. Each row represents a table row
 203         and all data must match. Data not present in the expected table, may
 204         be arbitry. The rows are identified via the 'object' column which must
 205         have an identifier of the form '<NRW><osm id>[:<class>]'. All
 206         expected rows are expected to be present with at least one database row.
 207     """
 208     tokenizer = tokenizer_factory.get_tokenizer_for_db(context.nominatim.get_test_config())
 209
 210     with tokenizer.name_analyzer() as analyzer:
 211         with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 212             for row in context.table:
 213                 nid = NominatimID(row['object'])
 214                 nid.row_by_place_id(cur, 'search_name',
 215                                     ['ST_X(centroid) as cx', 'ST_Y(centroid) as cy'])
 216                 assert cur.rowcount > 0, "No rows found for " + row['object']
 217
 218                 for res in cur:
 219                     db_row = DBRow(nid, res, context)
 220                     for name, value in zip(row.headings, row.cells):
 221                         if name in ('name_vector', 'nameaddress_vector'):
 222                             items = [x.strip() for x in value.split(',')]
 223                             tokens = analyzer.get_word_token_info(items)
 224
 225                             if not exclude:
 226                                 assert len(tokens) >= len(items), \
 227                                        "No word entry found for {}. Entries found: {!s}".format(value, len(tokens))
 228                             for word, token, wid in tokens:
 229                                 if exclude:
 230                                     assert wid not in res[name], \
 231                                            "Found term for {}/{}: {}".format(nid, name, wid)
 232                                 else:
 233                                     assert wid in res[name], \
 234                                            "Missing term for {}/{}: {}".format(nid, name, wid)
 235                         elif name != 'object':
 236                             assert db_row.contains(name, value), db_row.assert_msg(name, value)
 237
 238 @then("search_name has no entry for (?P<oid>.*)")
 239 def check_search_name_has_entry(context, oid):
 240     """ Check that there is noentry in the search_name table for the given
 241         objects. IDs are in format '<NRW><osm id>[:<class>]'.
 242     """
 243     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 244         NominatimID(oid).row_by_place_id(cur, 'search_name')
 245
 246         assert cur.rowcount == 0, \
 247                "Found {} entries for ID {}".format(cur.rowcount, oid)
 248
 249 @then("location_postcode contains exactly")
 250 def check_location_postcode(context):
 251     """ Check full contents for location_postcode table. Each row represents a table row
 252         and all data must match. Data not present in the expected table, may
 253         be arbitry. The rows are identified via 'country' and 'postcode' columns.
 254         All rows must be present as excepted and there must not be additional
 255         rows.
 256     """
 257     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 258         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
 259         assert cur.rowcount == len(list(context.table)), \
 260             "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 261
 262         results = {}
 263         for row in cur:
 264             key = (row['country_code'], row['postcode'])
 265             assert key not in results, "Postcode table has duplicate entry: {}".format(row)
 266             results[key] = DBRow((row['country_code'],row['postcode']), row, context)
 267
 268         for row in context.table:
 269             db_row = results.get((row['country'],row['postcode']))
 270             assert db_row is not None, \
 271                 "Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
 272
 273             db_row.assert_row(row, ('country', 'postcode'))
 274
 275 @then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
 276 def check_word_table_for_postcodes(context, exclude, postcodes):
 277     """ Check that the tokenizer produces postcode tokens for the given
 278         postcodes. The postcodes are a comma-separated list of postcodes.
 279         Whitespace matters.
 280     """
 281     nctx = context.nominatim
 282     tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
 283     with tokenizer.name_analyzer() as ana:
 284         plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
 285
 286     plist.sort()
 287
 288     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 289         if nctx.tokenizer == 'icu':
 290             cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
 291                         (plist,))
 292         else:
 293             cur.execute("""SELECT word FROM word WHERE word = any(%s)
 294                              and class = 'place' and type = 'postcode'""",
 295                         (plist,))
 296
 297         found = [row[0] for row in cur]
 298         assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
 299
 300     if exclude:
 301         assert len(found) == 0, f"Unexpected postcodes: {found}"
 302     else:
 303         assert set(found) == set(plist), \
 304         f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
 305
 306 @then("place_addressline contains")
 307 def check_place_addressline(context):
 308     """ Check the contents of the place_addressline table. Each row represents
 309         a table row and all data must match. Data not present in the expected
 310         table, may be arbitry. The rows are identified via the 'object' column,
 311         representing the addressee and the 'address' column, representing the
 312         address item.
 313     """
 314     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 315         for row in context.table:
 316             nid = NominatimID(row['object'])
 317             pid = nid.get_place_id(cur)
 318             apid = NominatimID(row['address']).get_place_id(cur)
 319             cur.execute(""" SELECT * FROM place_addressline
 320                             WHERE place_id = %s AND address_place_id = %s""",
 321                         (pid, apid))
 322             assert cur.rowcount > 0, \
 323                         "No rows found for place %s and address %s" % (row['object'], row['address'])
 324
 325             for res in cur:
 326                 DBRow(nid, res, context).assert_row(row, ('address', 'object'))
 327
 328 @then("place_addressline doesn't contain")
 329 def check_place_addressline_exclude(context):
 330     """ Check that the place_addressline doesn't contain any entries for the
 331         given addressee/address item pairs.
 332     """
 333     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 334         for row in context.table:
 335             pid = NominatimID(row['object']).get_place_id(cur)
 336             apid = NominatimID(row['address']).get_place_id(cur)
 337             cur.execute(""" SELECT * FROM place_addressline
 338                             WHERE place_id = %s AND address_place_id = %s""",
 339                         (pid, apid))
 340             assert cur.rowcount == 0, \
 341                 "Row found for place %s and address %s" % (row['object'], row['address'])
 342
 343 @then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
 344 def check_location_property_osmline(context, oid, neg):
 345     """ Check that the given way is present in the interpolation table.
 346     """
 347     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
 348         cur.execute("""SELECT *, ST_AsText(linegeo) as geomtxt
 349                        FROM location_property_osmline
 350                        WHERE osm_id = %s AND startnumber IS NOT NULL""",
 351                     (oid, ))
 352
 353         if neg:
 354             assert cur.rowcount == 0, "Interpolation found for way {}.".format(oid)
 355             return
 356
 357         todo = list(range(len(list(context.table))))
 358         for res in cur:
 359             for i in todo:
 360                 row = context.table[i]
 361                 if (int(row['start']) == res['startnumber']
 362                     and int(row['end']) == res['endnumber']):
 363                     todo.remove(i)
 364                     break
 365             else:
 366                 assert False, "Unexpected row " + str(res)
 367
 368             DBRow(oid, res, context).assert_row(row, ('start', 'end'))
 369
 370         assert not todo
 371
 372