]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/test_tokenizer_legacy_icu.py
only consider partials in multi-words for initial count
[nominatim.git] / test / python / test_tokenizer_legacy_icu.py
index b86925ee5dd22b134bd66b7419db8b7c7b45f9d1..39fc9fb4c5a7f348c29ffe8c3b490caf458063f4 100644 (file)
@@ -60,13 +60,12 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
     monkeypatch.undo()
 
     def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
-                     suffixes=('gasse', ), abbr=('street => st', )):
+                     variants=('~gasse -> gasse', 'street => st', )):
         cfgfile = tmp_path / 'analyser_test_config.yaml'
         with cfgfile.open('w') as stream:
             cfgstr = {'normalization' : list(norm),
                        'transliteration' : list(trans),
-                       'compound_suffixes' : list(suffixes),
-                       'abbreviations' : list(abbr)}
+                       'variants' : [ {'words': list(variants)}]}
             yaml.dump(cfgstr, stream)
         tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgfile))
 
@@ -151,9 +150,8 @@ def test_init_word_table(tokenizer_factory, test_config, place_row, word_table):
     tok = tokenizer_factory()
     tok.init_new_db(test_config)
 
-    assert word_table.get_partial_words() == {('test', 1), ('52', 1),
+    assert word_table.get_partial_words() == {('test', 1),
                                               ('no', 1), ('area', 2),
-                                              ('holzstrasse', 1), ('holzstr', 1),
                                               ('holz', 1), ('strasse', 1),
                                               ('str', 1)}