]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/test_tokenizer_icu_rule_loader.py
correctly quote strings when copying in data
[nominatim.git] / test / python / test_tokenizer_icu_rule_loader.py
index 20b127f39c7c622e2424e3b50fc0c38c7f3bb934..abbc92423f4d9b1f44941b90f16534491c9dd2b6 100644 (file)
@@ -21,6 +21,7 @@ def cfgfile(tmp_path, suffix='.yaml'):
             - ":: NFC ()"
         transliteration:
             - "::  Latin ()"
+            - "[[:Punctuation:][:Space:]]+ > ' '"
         """)
         content += "compound_suffixes:\n"
         content += '\n'.join(("    - " + s for s in suffixes)) + '\n'
@@ -32,13 +33,33 @@ def cfgfile(tmp_path, suffix='.yaml'):
 
     return _create_config
 
-def test_missing_normalization(tmp_path):
+
+def test_empty_rule_file(tmp_path):
     fpath = tmp_path / ('test_config.yaml')
     fpath.write_text(dedent("""\
-        normalizatio:
-            - ":: NFD ()"
+        normalization:
+        transliteration:
+        compound_suffixes:
+        abbreviations:
         """))
 
+    rules = ICURuleLoader(fpath)
+    assert rules.get_search_rules() == ''
+    assert rules.get_normalization_rules() == ''
+    assert rules.get_transliteration_rules() == ''
+    assert rules.get_replacement_pairs() == []
+
+CONFIG_SECTIONS = ('normalization', 'transliteration',
+                   'compound_suffixes', 'abbreviations')
+
+@pytest.mark.parametrize("section", CONFIG_SECTIONS)
+def test_missing_normalization(tmp_path, section):
+    fpath = tmp_path / ('test_config.yaml')
+    with fpath.open('w') as fd:
+        for name in CONFIG_SECTIONS:
+            if name != section:
+                fd.write(name + ':\n')
+
     with pytest.raises(UsageError):
         ICURuleLoader(fpath)
 
@@ -53,6 +74,7 @@ def test_get_search_rules(cfgfile):
     rules = loader.get_search_rules()
     trans = Transliterator.createFromRules("test", rules)
 
+    assert trans.transliterate(" Baum straße ") == " baum straße "
     assert trans.transliterate(" Baumstraße ") == " baum straße "
     assert trans.transliterate(" Baumstrasse ") == " baum strasse "
     assert trans.transliterate(" Baumstr ") == " baum str "
@@ -61,6 +83,28 @@ def test_get_search_rules(cfgfile):
     assert trans.transliterate(" проспект ") == " prospekt "
 
 
+def test_get_normalization_rules(cfgfile):
+    fpath = cfgfile(['strasse', 'straße', 'weg'],
+                    ['strasse,straße => str'])
+
+    loader = ICURuleLoader(fpath)
+    rules = loader.get_normalization_rules()
+    trans = Transliterator.createFromRules("test", rules)
+
+    assert trans.transliterate(" проспект-Prospekt ") == " проспект prospekt "
+
+
+def test_get_transliteration_rules(cfgfile):
+    fpath = cfgfile(['strasse', 'straße', 'weg'],
+                    ['strasse,straße => str'])
+
+    loader = ICURuleLoader(fpath)
+    rules = loader.get_transliteration_rules()
+    trans = Transliterator.createFromRules("test", rules)
+
+    assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
+
+
 def test_get_synonym_pairs(cfgfile):
     fpath = cfgfile(['Weg', 'Strasse'],
                     ['Strasse => str,st'])