]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/icu_token_analysis.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / tokenizer / icu_token_analysis.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Container class collecting all components required to transform an OSM name
9 into a Nominatim token.
10 """
11
12 from icu import Transliterator
13
14 class ICUTokenAnalysis:
15     """ Container class collecting the transliterators and token analysis
16         modules for a single NameAnalyser instance.
17     """
18
19     def __init__(self, norm_rules, trans_rules, analysis_rules):
20         self.normalizer = Transliterator.createFromRules("icu_normalization",
21                                                          norm_rules)
22         trans_rules += ";[:Space:]+ > ' '"
23         self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
24                                                        trans_rules)
25         self.search = Transliterator.createFromRules("icu_search",
26                                                      norm_rules + trans_rules)
27
28         self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
29                          for name, arules in analysis_rules.items()}
30
31
32     def get_analyzer(self, name):
33         """ Return the given named analyzer. If no analyzer with that
34             name exists, return the default analyzer.
35         """
36         return self.analysis.get(name) or self.analysis[None]