]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tokenizer/icu_token_analysis.py
fe6704d4249158526ababae3b4910da1f013cd30
[nominatim.git] / src / nominatim_db / tokenizer / icu_token_analysis.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Container class collecting all components required to transform an OSM name
9 into a Nominatim token.
10 """
11 from typing import Mapping, Optional, TYPE_CHECKING
12 from icu import Transliterator
13
14 from .token_analysis.base import Analyzer
15
16 if TYPE_CHECKING:
17     from typing import Any
18     from .icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
19
20 class ICUTokenAnalysis:
21     """ Container class collecting the transliterators and token analysis
22         modules for a single Analyser instance.
23     """
24
25     def __init__(self, norm_rules: str, trans_rules: str,
26                  analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
27         self.normalizer = Transliterator.createFromRules("icu_normalization",
28                                                          norm_rules)
29         trans_rules += ";[:Space:]+ > ' '"
30         self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
31                                                        trans_rules)
32         self.search = Transliterator.createFromRules("icu_search",
33                                                      norm_rules + trans_rules)
34
35         self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
36                          for name, arules in analysis_rules.items()}
37
38
39     def get_analyzer(self, name: Optional[str]) -> Analyzer:
40         """ Return the given named analyzer. If no analyzer with that
41             name exists, return the default analyzer.
42         """
43         return self.analysis.get(name) or self.analysis[None]