From: Sarah Hoffmann Date: Mon, 25 Jul 2022 14:27:22 +0000 (+0200) Subject: add support for external token analysis modules X-Git-Tag: v4.1.0~5^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/d819036daab743e8fd093e1e5fff693c936dc2a8 add support for external token analysis modules --- diff --git a/nominatim/config.py b/nominatim/config.py index 7f57a3be..7502aff7 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -222,7 +222,7 @@ class Configuration: return result - def load_plugin_module(self, module_name: str, internal_path: str) -> object: + def load_plugin_module(self, module_name: str, internal_path: str) -> Any: """ Load a Python module as a plugin. The module_name may have three variants: diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py index cf9fdb88..f461a1f1 100644 --- a/nominatim/tokenizer/icu_rule_loader.py +++ b/nominatim/tokenizer/icu_rule_loader.py @@ -8,7 +8,6 @@ Helper class to create ICU rules from a configuration file. """ from typing import Mapping, Any, Dict, Optional -import importlib import io import json import logging @@ -145,7 +144,9 @@ class ICURuleLoader: LOG.fatal("ICU tokenizer configuration has two token " "analyzers with id '%s'.", name) raise UsageError("Syntax error in ICU tokenizer config.") - self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules) + self.analysis[name] = TokenAnalyzerRule(section, + self.normalization_rules, + self.config) @staticmethod @@ -169,15 +170,18 @@ class TokenAnalyzerRule: and creates a new token analyzer on request. """ - def __init__(self, rules: Mapping[str, Any], normalization_rules: str) -> None: - # Find the analysis module - module_name = 'nominatim.tokenizer.token_analysis.' \ - + _get_section(rules, 'analyzer').replace('-', '_') - self._analysis_mod: AnalysisModule = importlib.import_module(module_name) + def __init__(self, rules: Mapping[str, Any], normalization_rules: str, + config: Configuration) -> None: + analyzer_name = _get_section(rules, 'analyzer') + if not analyzer_name or not isinstance(analyzer_name, str): + raise UsageError("'analyzer' parameter needs to be simple string") + + self._analysis_mod: AnalysisModule = \ + config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis') - # Load the configuration. self.config = self._analysis_mod.configure(rules, normalization_rules) + def create(self, normalizer: Any, transliterator: Any) -> Analyser: """ Create a new analyser instance for the given rule. """