X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/40cb17d2995baaba6bec299d4e6dc65b25dbdc28..324b1b5575ce1793d90cdb9837230f76acd8169e:/test/bdd/steps/steps_db_ops.py diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index 6d7bc188..be2789f3 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -214,7 +214,7 @@ def check_search_name_contents(context, exclude): for name, value in zip(row.headings, row.cells): if name in ('name_vector', 'nameaddress_vector'): items = [x.strip() for x in value.split(',')] - tokens = analyzer.get_word_token_info(context.db, items) + tokens = analyzer.get_word_token_info(items) if not exclude: assert len(tokens) >= len(items), \ @@ -251,7 +251,7 @@ def check_location_postcode(context): with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode") assert cur.rowcount == len(list(context.table)), \ - "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table))) + "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table))) results = {} for row in cur: @@ -281,6 +281,39 @@ def check_word_table(context, exclude): else: assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values) + +@then("there are(?P no)? word tokens for postcodes (?P.*)") +def check_word_table_for_postcodes(context, exclude, postcodes): + """ Check that the tokenizer produces postcode tokens for the given + postcodes. The postcodes are a comma-separated list of postcodes. + Whitespace matters. + """ + nctx = context.nominatim + tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config()) + with tokenizer.name_analyzer() as ana: + plist = [ana.normalize_postcode(p) for p in postcodes.split(',')] + + plist.sort() + + with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur: + if nctx.tokenizer == 'legacy_icu': + cur.execute("""SELECT info->>'postcode' FROM word + WHERE type = 'P' and info->>'postcode' = any(%s)""", + (plist,)) + else: + cur.execute("""SELECT word FROM word WHERE word = any(%s) + and class = 'place' and type = 'postcode'""", + (plist,)) + + found = [row[0] for row in cur] + assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}" + + if exclude: + assert len(found) == 0, f"Unexpected postcodes: {found}" + else: + assert set(found) == set(plist), \ + f"Missing postcodes {set(plist) - set(found)}. Found: {found}" + @then("place_addressline contains") def check_place_addressline(context): """ Check the contents of the place_addressline table. Each row represents