adapt tests to extended results

author Sarah Hoffmann <lonvia@denofr.de>

Fri, 21 Mar 2025 19:24:09 +0000 (20:24 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Mon, 31 Mar 2025 12:52:50 +0000 (14:52 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Fri, 21 Mar 2025 19:24:09 +0000 (20:24 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Mon, 31 Mar 2025 12:52:50 +0000 (14:52 +0200)
diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py

index ce00281cff7e09e850fb4d5ee957fb687f65d809..12cef894f863cb2336b1be26240e28fb4a8ba28e 100644 (file)
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -230,19 +230,20 @@ def test_update_statistics(word_table, table_factory, temp_db_cursor,
                             tokenizer_factory, test_config):
      word_table.add_full_word(1000, 'hello')
      word_table.add_full_word(1001, 'bye')
+    word_table.add_full_word(1002, 'town')
      table_factory('search_name',
                    'place_id BIGINT, name_vector INT[], nameaddress_vector INT[]',
-                  [(12, [1000], [1001])])
+                  [(12, [1000], [1001]), (13, [1001], [1002]), (14, [1000, 1001], [1002])])
      tok = tokenizer_factory()
  
      tok.update_statistics(test_config)
  
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1000 and
-                                          (info->>'count')::int > 0""") == 1
-    assert temp_db_cursor.scalar("""SELECT count(*) FROM word
-                                    WHERE type = 'W' and word_id = 1001 and
-                                          (info->>'addr_count')::int > 0""") == 1
+    assert temp_db_cursor.row_set("""SELECT word_id,
+                                            (info->>'count')::int,
+                                            (info->>'addr_count')::int
+                                     FROM word
+                                     WHERE type = 'W'""") == \
+        {(1000, 2, None), (1001, 2, None), (1002, None, 2)}
  
  
  def test_normalize_postcode(analyzer):
diff --git a/test/python/tokenizer/token_analysis/test_generic.py b/test/python/tokenizer/token_analysis/test_generic.py

index 02870f2445e7b798a7bc29ffa5aa794d2dd1a9a1..48f2483bc8e54cfe2a6ceed3d89a2efe14c5561e 100644 (file)
--- a/test/python/tokenizer/token_analysis/test_generic.py
+++ b/test/python/tokenizer/token_analysis/test_generic.py
@@ -40,7 +40,7 @@ def make_analyser(*variants, variant_only=False):
  
  def get_normalized_variants(proc, name):
      norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-    return proc.compute_variants(norm.transliterate(name).strip())
+    return proc.compute_variants(norm.transliterate(name).strip())[0]
  
  
  def test_no_variants():
diff --git a/test/python/tokenizer/token_analysis/test_generic_mutation.py b/test/python/tokenizer/token_analysis/test_generic_mutation.py

index 2ce2236a3c490762c965c42a75cdcc4344661c54..e0507e4c30ace40f5d4ead0c0889b10cb8eddb6b 100644 (file)
--- a/test/python/tokenizer/token_analysis/test_generic_mutation.py
+++ b/test/python/tokenizer/token_analysis/test_generic_mutation.py
@@ -40,7 +40,7 @@ class TestMutationNoVariants:
  
      def variants(self, name):
          norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
-        return set(self.analysis.compute_variants(norm.transliterate(name).strip()))
+        return set(self.analysis.compute_variants(norm.transliterate(name).strip())[0])
  
      @pytest.mark.parametrize('pattern', ('(capture)', ['a list']))
      def test_bad_pattern(self, pattern):
author	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 21 Mar 2025 19:24:09 +0000 (20:24 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 31 Mar 2025 12:52:50 +0000 (14:52 +0200)
test/python/tokenizer/test_icu.py		patch \| blob \| history
test/python/tokenizer/token_analysis/test_generic.py		patch \| blob \| history
test/python/tokenizer/token_analysis/test_generic_mutation.py		patch \| blob \| history