X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a8b037669ac8a9f52ad0091b83ae4f7f9b78b28e..7c869701f02369873c5573236398ec885c4a951f:/nominatim/tokenizer/icu_rule_loader.py diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py index f461a1f1..4c36282c 100644 --- a/nominatim/tokenizer/icu_rule_loader.py +++ b/nominatim/tokenizer/icu_rule_loader.py @@ -12,13 +12,15 @@ import io import json import logging +from icu import Transliterator + from nominatim.config import flatten_config_list, Configuration from nominatim.db.properties import set_property, get_property from nominatim.db.connection import Connection from nominatim.errors import UsageError from nominatim.tokenizer.place_sanitizer import PlaceSanitizer from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis -from nominatim.tokenizer.token_analysis.base import AnalysisModule, Analyser +from nominatim.tokenizer.token_analysis.base import AnalysisModule, Analyzer import nominatim.data.country_info LOG = logging.getLogger() @@ -135,6 +137,11 @@ class ICURuleLoader: if not isinstance(self.analysis_rules, list): raise UsageError("Configuration section 'token-analysis' must be a list.") + norm = Transliterator.createFromRules("rule_loader_normalization", + self.normalization_rules) + trans = Transliterator.createFromRules("rule_loader_transliteration", + self.transliteration_rules) + for section in self.analysis_rules: name = section.get('id', None) if name in self.analysis: @@ -144,8 +151,7 @@ class ICURuleLoader: LOG.fatal("ICU tokenizer configuration has two token " "analyzers with id '%s'.", name) raise UsageError("Syntax error in ICU tokenizer config.") - self.analysis[name] = TokenAnalyzerRule(section, - self.normalization_rules, + self.analysis[name] = TokenAnalyzerRule(section, norm, trans, self.config) @@ -170,7 +176,8 @@ class TokenAnalyzerRule: and creates a new token analyzer on request. """ - def __init__(self, rules: Mapping[str, Any], normalization_rules: str, + def __init__(self, rules: Mapping[str, Any], + normalizer: Any, transliterator: Any, config: Configuration) -> None: analyzer_name = _get_section(rules, 'analyzer') if not analyzer_name or not isinstance(analyzer_name, str): @@ -179,10 +186,11 @@ class TokenAnalyzerRule: self._analysis_mod: AnalysisModule = \ config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis') - self.config = self._analysis_mod.configure(rules, normalization_rules) + self.config = self._analysis_mod.configure(rules, normalizer, + transliterator) - def create(self, normalizer: Any, transliterator: Any) -> Analyser: + def create(self, normalizer: Any, transliterator: Any) -> Analyzer: """ Create a new analyser instance for the given rule. """ return self._analysis_mod.create(normalizer, transliterator, self.config)