X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/40cb17d2995baaba6bec299d4e6dc65b25dbdc28..324b1b5575ce1793d90cdb9837230f76acd8169e:/test/bdd/steps/steps_db_ops.py

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 6d7bc188..be2789f3 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -214,7 +214,7 @@ def check_search_name_contents(context, exclude):
                     for name, value in zip(row.headings, row.cells):
                         if name in ('name_vector', 'nameaddress_vector'):
                             items = [x.strip() for x in value.split(',')]
-                            tokens = analyzer.get_word_token_info(context.db, items)
+                            tokens = analyzer.get_word_token_info(items)
 
                             if not exclude:
                                 assert len(tokens) >= len(items), \
@@ -251,7 +251,7 @@ def check_location_postcode(context):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
         assert cur.rowcount == len(list(context.table)), \
-            "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table)))
+            "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 
         results = {}
         for row in cur:
@@ -281,6 +281,39 @@ def check_word_table(context, exclude):
             else:
                 assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
 
+
+@then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
+def check_word_table_for_postcodes(context, exclude, postcodes):
+    """ Check that the tokenizer produces postcode tokens for the given
+        postcodes. The postcodes are a comma-separated list of postcodes.
+        Whitespace matters.
+    """
+    nctx = context.nominatim
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
+    with tokenizer.name_analyzer() as ana:
+        plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
+
+    plist.sort()
+
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        if nctx.tokenizer == 'legacy_icu':
+            cur.execute("""SELECT info->>'postcode' FROM word
+                           WHERE type = 'P' and info->>'postcode' = any(%s)""",
+                        (plist,))
+        else:
+            cur.execute("""SELECT word FROM word WHERE word = any(%s)
+                             and class = 'place' and type = 'postcode'""",
+                        (plist,))
+
+        found = [row[0] for row in cur]
+        assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
+
+    if exclude:
+        assert len(found) == 0, f"Unexpected postcodes: {found}"
+    else:
+        assert set(found) == set(plist), \
+        f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
+
 @then("place_addressline contains")
 def check_place_addressline(context):
     """ Check the contents of the place_addressline table. Each row represents