1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Container class collecting all components required to transform an OSM name
9 into a Nominatim token.
11 from typing import Mapping, Optional, TYPE_CHECKING
12 from icu import Transliterator
14 from nominatim.tokenizer.token_analysis.base import Analyser
17 from typing import Any
18 from nominatim.tokenizer.icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
20 class ICUTokenAnalysis:
21 """ Container class collecting the transliterators and token analysis
22 modules for a single NameAnalyser instance.
25 def __init__(self, norm_rules: str, trans_rules: str,
26 analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule[Any]']):
27 self.normalizer = Transliterator.createFromRules("icu_normalization",
29 trans_rules += ";[:Space:]+ > ' '"
30 self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
32 self.search = Transliterator.createFromRules("icu_search",
33 norm_rules + trans_rules)
35 self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
36 for name, arules in analysis_rules.items()}
39 def get_analyzer(self, name: Optional[str]) -> Analyser:
40 """ Return the given named analyzer. If no analyzer with that
41 name exists, return the default analyzer.
43 return self.analysis.get(name) or self.analysis[None]