import pytest
from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
-from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
from nominatim.errors import UsageError
@pytest.fixture
-def cfgfile(tmp_path, suffix='.yaml'):
+def cfgfile(def_config, tmp_path):
+ project_dir = tmp_path / 'project_dir'
+ project_dir.mkdir()
+ def_config.project_dir = project_dir
+
def _create_config(*variants, **kwargs):
content = dedent("""\
normalization:
content += '\n'.join((" - " + s for s in variants)) + '\n'
for k, v in kwargs:
content += " {}: {}\n".format(k, v)
- fpath = tmp_path / ('test_config' + suffix)
- fpath.write_text(dedent(content))
- return fpath
+ (project_dir / 'icu_tokenizer.yaml').write_text(content)
+
+ return def_config
return _create_config
def test_variants_empty(cfgfile):
- fpath = cfgfile('saint -> 🜵', 'street -> st')
+ config = cfgfile('saint -> 🜵', 'street -> st')
- rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
- proc = ICUNameProcessor(rules)
+ proc = ICURuleLoader(config).make_token_analysis()
assert get_normalized_variants(proc, '🜵') == []
assert get_normalized_variants(proc, '🜳') == []
@pytest.mark.parametrize("rules,name,variants", VARIANT_TESTS)
def test_variants(cfgfile, rules, name, variants):
- fpath = cfgfile(*rules)
- proc = ICUNameProcessor(ICUNameProcessorRules(loader=ICURuleLoader(fpath)))
+ config = cfgfile(*rules)
+ proc = ICURuleLoader(config).make_token_analysis()
result = get_normalized_variants(proc, name)
def test_search_normalized(cfgfile):
- fpath = cfgfile('~street => s,st', 'master => mstr')
-
- rules = ICUNameProcessorRules(loader=ICURuleLoader(fpath))
- proc = ICUNameProcessor(rules)
+ config = cfgfile('~street => s,st', 'master => mstr')
+ proc = ICURuleLoader(config).make_token_analysis()
assert proc.get_search_normalized('Master Street') == 'master street'
assert proc.get_search_normalized('Earnes St') == 'earnes st'