]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/icu_token_analysis.py
reintroduce cutoffs when searching for very frequent words
[nominatim.git] / nominatim / tokenizer / icu_token_analysis.py
index 68fc82e333b6a44de6eb9d42ed06a2d4ae17da58..7ea31e8ea1eb21b20f3e8dd8b3c3f12cdc7cee83 100644 (file)
@@ -8,15 +8,22 @@
 Container class collecting all components required to transform an OSM name
 into a Nominatim token.
 """
 Container class collecting all components required to transform an OSM name
 into a Nominatim token.
 """
-
+from typing import Mapping, Optional, TYPE_CHECKING
 from icu import Transliterator
 
 from icu import Transliterator
 
+from nominatim.tokenizer.token_analysis.base import Analyzer
+
+if TYPE_CHECKING:
+    from typing import Any
+    from nominatim.tokenizer.icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
+
 class ICUTokenAnalysis:
     """ Container class collecting the transliterators and token analysis
 class ICUTokenAnalysis:
     """ Container class collecting the transliterators and token analysis
-        modules for a single NameAnalyser instance.
+        modules for a single Analyser instance.
     """
 
     """
 
-    def __init__(self, norm_rules, trans_rules, analysis_rules):
+    def __init__(self, norm_rules: str, trans_rules: str,
+                 analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
         self.normalizer = Transliterator.createFromRules("icu_normalization",
                                                          norm_rules)
         trans_rules += ";[:Space:]+ > ' '"
         self.normalizer = Transliterator.createFromRules("icu_normalization",
                                                          norm_rules)
         trans_rules += ";[:Space:]+ > ' '"
@@ -25,11 +32,11 @@ class ICUTokenAnalysis:
         self.search = Transliterator.createFromRules("icu_search",
                                                      norm_rules + trans_rules)
 
         self.search = Transliterator.createFromRules("icu_search",
                                                      norm_rules + trans_rules)
 
-        self.analysis = {name: arules.create(self.normalizer, self.to_ascii, arules.config)
+        self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
                          for name, arules in analysis_rules.items()}
 
 
                          for name, arules in analysis_rules.items()}
 
 
-    def get_analyzer(self, name):
+    def get_analyzer(self, name: Optional[str]) -> Analyzer:
         """ Return the given named analyzer. If no analyzer with that
             name exists, return the default analyzer.
         """
         """ Return the given named analyzer. If no analyzer with that
             name exists, return the default analyzer.
         """