X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d35400a7d7655e777224db9beddb44a68f4f2949..c74904d075f8fb622350b73b3dbb03e4c0be4924:/nominatim/tokenizer/icu_token_analysis.py?ds=inline diff --git a/nominatim/tokenizer/icu_token_analysis.py b/nominatim/tokenizer/icu_token_analysis.py index f27a2fbe..7ea31e8e 100644 --- a/nominatim/tokenizer/icu_token_analysis.py +++ b/nominatim/tokenizer/icu_token_analysis.py @@ -1,16 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Container class collecting all components required to transform an OSM name into a Nominatim token. """ - +from typing import Mapping, Optional, TYPE_CHECKING from icu import Transliterator +from nominatim.tokenizer.token_analysis.base import Analyzer + +if TYPE_CHECKING: + from typing import Any + from nominatim.tokenizer.icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import + class ICUTokenAnalysis: """ Container class collecting the transliterators and token analysis - modules for a single NameAnalyser instance. + modules for a single Analyser instance. """ - def __init__(self, norm_rules, trans_rules, analysis_rules): + def __init__(self, norm_rules: str, trans_rules: str, + analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']): self.normalizer = Transliterator.createFromRules("icu_normalization", norm_rules) trans_rules += ";[:Space:]+ > ' '" @@ -19,5 +32,12 @@ class ICUTokenAnalysis: self.search = Transliterator.createFromRules("icu_search", norm_rules + trans_rules) - self.analysis = {name: arules.create(self.to_ascii, arules.config) + self.analysis = {name: arules.create(self.normalizer, self.to_ascii) for name, arules in analysis_rules.items()} + + + def get_analyzer(self, name: Optional[str]) -> Analyzer: + """ Return the given named analyzer. If no analyzer with that + name exists, return the default analyzer. + """ + return self.analysis.get(name) or self.analysis[None]