1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Container class collecting all components required to transform an OSM name
9 into a Nominatim token.
11 from typing import Mapping, Optional, TYPE_CHECKING
12 from icu import Transliterator
14 from .token_analysis.base import Analyzer
17 from typing import Any # noqa
18 from .icu_rule_loader import TokenAnalyzerRule
21 class ICUTokenAnalysis:
22 """ Container class collecting the transliterators and token analysis
23 modules for a single Analyser instance.
26 def __init__(self, norm_rules: str, trans_rules: str,
27 analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
28 # additional break signs are not relevant during name analysis
29 norm_rules += ";[[:Space:][-:]]+ > ' ';"
30 self.normalizer = Transliterator.createFromRules("icu_normalization",
32 trans_rules += ";[:Space:]+ > ' '"
33 self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
35 self.search = Transliterator.createFromRules("icu_search",
36 norm_rules + trans_rules)
38 self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
39 for name, arules in analysis_rules.items()}
41 def get_analyzer(self, name: Optional[str]) -> Analyzer:
42 """ Return the given named analyzer. If no analyzer with that
43 name exists, return the default analyzer.
45 return self.analysis.get(name) or self.analysis[None]