- ":: NFC ()"
transliteration:
- ":: Latin ()"
+ - "[[:Punctuation:][:Space:]]+ > ' '"
""")
content += "compound_suffixes:\n"
content += '\n'.join((" - " + s for s in suffixes)) + '\n'
return _create_config
-def test_missing_normalization(tmp_path):
+
+def test_empty_rule_file(tmp_path):
fpath = tmp_path / ('test_config.yaml')
fpath.write_text(dedent("""\
- normalizatio:
- - ":: NFD ()"
+ normalization:
+ transliteration:
+ compound_suffixes:
+ abbreviations:
"""))
+ rules = ICURuleLoader(fpath)
+ assert rules.get_search_rules() == ''
+ assert rules.get_normalization_rules() == ''
+ assert rules.get_transliteration_rules() == ''
+ assert rules.get_replacement_pairs() == []
+
+CONFIG_SECTIONS = ('normalization', 'transliteration',
+ 'compound_suffixes', 'abbreviations')
+
+@pytest.mark.parametrize("section", CONFIG_SECTIONS)
+def test_missing_normalization(tmp_path, section):
+ fpath = tmp_path / ('test_config.yaml')
+ with fpath.open('w') as fd:
+ for name in CONFIG_SECTIONS:
+ if name != section:
+ fd.write(name + ':\n')
+
with pytest.raises(UsageError):
ICURuleLoader(fpath)
+@pytest.mark.parametrize("abbr", ["simple",
+ "double => arrow => bad",
+ "bad = > arrow"])
+def test_bad_abbreviation_syntax(tmp_path, abbr):
+ fpath = tmp_path / ('test_config.yaml')
+ fpath.write_text(dedent("""\
+ normalization:
+ transliteration:
+ compound_suffixes:
+ abbreviations:
+ - {}
+ """.format(abbr)))
+
+ with pytest.raises(UsageError):
+ rules = ICURuleLoader(fpath)
+
def test_get_search_rules(cfgfile):
fpath = cfgfile(['strasse', 'straße', 'weg'],
rules = loader.get_search_rules()
trans = Transliterator.createFromRules("test", rules)
+ assert trans.transliterate(" Baum straße ") == " baum straße "
assert trans.transliterate(" Baumstraße ") == " baum straße "
assert trans.transliterate(" Baumstrasse ") == " baum strasse "
assert trans.transliterate(" Baumstr ") == " baum str "
assert trans.transliterate(" проспект ") == " prospekt "
-def test_get_synonym_pairs(cfgfile):
- fpath = cfgfile(['Weg', 'Strasse'],
- ['Strasse => str,st'])
+def test_get_normalization_rules(cfgfile):
+ fpath = cfgfile(['strasse', 'straße', 'weg'],
+ ['strasse,straße => str'])
loader = ICURuleLoader(fpath)
+ rules = loader.get_normalization_rules()
+ trans = Transliterator.createFromRules("test", rules)
+
+ assert trans.transliterate(" проспект-Prospekt ") == " проспект prospekt "
+
+
+def test_get_transliteration_rules(cfgfile):
+ fpath = cfgfile(['strasse', 'straße', 'weg'],
+ ['strasse,straße => str'])
- repl = loader.get_replacement_pairs()
+ loader = ICURuleLoader(fpath)
+ rules = loader.get_transliteration_rules()
+ trans = Transliterator.createFromRules("test", rules)
+
+ assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
+
+
+def test_get_replacement_pairs_multi_to(cfgfile):
+ fpath = cfgfile(['Pfad', 'Strasse'],
+ ['Strasse => str,st'])
+
+ repl = ICURuleLoader(fpath).get_replacement_pairs()
+
+ assert [(a, sorted(b)) for a, b in repl] == \
+ [(' strasse ', [' st ', ' str ', ' strasse ']),
+ ('strasse ', [' st ', ' str ', ' strasse ']),
+ ('pfad ', [' pfad ']),
+ ('str ' , [' str ']),
+ ('st ' , [' st '])]
+
+
+def test_get_replacement_pairs_multi_from(cfgfile):
+ fpath = cfgfile([], ['saint,Sainte => st'])
+
+ repl = ICURuleLoader(fpath).get_replacement_pairs()
+
+ assert [(a, sorted(b)) for a, b in repl] == \
+ [(' sainte ', [' sainte ', ' st ']),
+ (' saint ', [' saint ', ' st '])]
+
+
+def test_get_replacement_pairs_cross_abbreviations(cfgfile):
+ fpath = cfgfile([], ['saint,Sainte => st',
+ 'sainte => ste'])
+
+ repl = ICURuleLoader(fpath).get_replacement_pairs()
+
+ assert [(a, sorted(b)) for a, b in repl] == \
+ [(' sainte ', [' sainte ', ' st ', ' ste ']),
+ (' saint ', [' saint ', ' st '])]
+
+
+@pytest.mark.parametrize("abbr", ["missing to =>",
+ " => missing from",
+ "=>"])
+def test_bad_abbreviation_syntax(tmp_path, abbr):
+ fpath = tmp_path / ('test_config.yaml')
+ fpath.write_text(dedent("""\
+ normalization:
+ transliteration:
+ compound_suffixes:
+ abbreviations:
+ - {}
+ """.format(abbr)))
- assert repl == [(' strasse ', {' strasse ', ' str ', ' st '}),
- ('strasse ', {' strasse ', ' str ', ' st '}),
- ('weg ', {' weg '})]
+ repl = ICURuleLoader(fpath).get_replacement_pairs()
+ assert repl == []