X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a263e54b9463c89addeb0ac613d0586378de22f2..c4f22a42eba499ed1854c5967b2e3932fe9b3896:/test/bdd/steps/steps_db_ops.py

diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py
index 6d7bc188..4970f6df 100644
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -1,3 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 import logging
 from itertools import chain
 
@@ -214,7 +220,7 @@ def check_search_name_contents(context, exclude):
                     for name, value in zip(row.headings, row.cells):
                         if name in ('name_vector', 'nameaddress_vector'):
                             items = [x.strip() for x in value.split(',')]
-                            tokens = analyzer.get_word_token_info(context.db, items)
+                            tokens = analyzer.get_word_token_info(items)
 
                             if not exclude:
                                 assert len(tokens) >= len(items), \
@@ -251,7 +257,7 @@ def check_location_postcode(context):
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
         cur.execute("SELECT *, ST_AsText(geometry) as geomtxt FROM location_postcode")
         assert cur.rowcount == len(list(context.table)), \
-            "Postcode table has {} rows, expected {}.".foramt(cur.rowcount, len(list(context.table)))
+            "Postcode table has {} rows, expected {}.".format(cur.rowcount, len(list(context.table)))
 
         results = {}
         for row in cur:
@@ -266,20 +272,36 @@ def check_location_postcode(context):
 
             db_row.assert_row(row, ('country', 'postcode'))
 
-@then("word contains(?P<exclude> not)?")
-def check_word_table(context, exclude):
-    """ Check the contents of the word table. Each row represents a table row
-        and all data must match. Data not present in the expected table, may
-        be arbitry. The rows are identified via all given columns.
+@then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
+def check_word_table_for_postcodes(context, exclude, postcodes):
+    """ Check that the tokenizer produces postcode tokens for the given
+        postcodes. The postcodes are a comma-separated list of postcodes.
+        Whitespace matters.
     """
+    nctx = context.nominatim
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
+    with tokenizer.name_analyzer() as ana:
+        plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
+
+    plist.sort()
+
     with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
-        for row in context.table:
-            wheres = ' AND '.join(["{} = %s".format(h) for h in row.headings])
-            cur.execute("SELECT * from word WHERE " + wheres, list(row.cells))
-            if exclude:
-                assert cur.rowcount == 0, "Row still in word table: %s" % '/'.join(values)
-            else:
-                assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)
+        if nctx.tokenizer == 'icu':
+            cur.execute("SELECT word FROM word WHERE type = 'P' and word = any(%s)",
+                        (plist,))
+        else:
+            cur.execute("""SELECT word FROM word WHERE word = any(%s)
+                             and class = 'place' and type = 'postcode'""",
+                        (plist,))
+
+        found = [row[0] for row in cur]
+        assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
+
+    if exclude:
+        assert len(found) == 0, f"Unexpected postcodes: {found}"
+    else:
+        assert set(found) == set(plist), \
+        f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
 
 @then("place_addressline contains")
 def check_place_addressline(context):