2 Tests for converting a config file to ICU rules.
5 from textwrap import dedent
7 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
8 from nominatim.errors import UsageError
10 from icu import Transliterator
13 def cfgfile(tmp_path, suffix='.yaml'):
14 def _create_config(suffixes, abbr):
18 - "[[:Nonspacing Mark:] [:Cf:]] >"
20 - "[[:Punctuation:][:Space:]]+ > ' '"
25 content += "compound_suffixes:\n"
26 content += '\n'.join((" - " + s for s in suffixes)) + '\n'
27 content += "abbreviations:\n"
28 content += '\n'.join((" - " + s for s in abbr)) + '\n'
29 fpath = tmp_path / ('test_config' + suffix)
30 fpath.write_text(dedent(content))
35 def test_missing_normalization(tmp_path):
36 fpath = tmp_path / ('test_config.yaml')
37 fpath.write_text(dedent("""\
42 with pytest.raises(UsageError):
46 def test_get_search_rules(cfgfile):
47 fpath = cfgfile(['strasse', 'straße', 'weg'],
48 ['strasse,straße => str',
51 loader = ICURuleLoader(fpath)
53 rules = loader.get_search_rules()
54 trans = Transliterator.createFromRules("test", rules)
56 assert trans.transliterate(" Baumstraße ") == " baum straße "
57 assert trans.transliterate(" Baumstrasse ") == " baum strasse "
58 assert trans.transliterate(" Baumstr ") == " baum str "
59 assert trans.transliterate(" Baumwegstr ") == " baumweg str "
60 assert trans.transliterate(" Αθήνα ") == " athēna "
61 assert trans.transliterate(" проспект ") == " prospekt "
64 def test_get_synonym_pairs(cfgfile):
65 fpath = cfgfile(['Weg', 'Strasse'],
66 ['Strasse => str,st'])
68 loader = ICURuleLoader(fpath)
70 repl = loader.get_replacement_pairs()
72 assert repl == [(' strasse ', {' strasse ', ' str ', ' st '}),
73 ('strasse ', {' strasse ', ' str ', ' st '}),