from .token_analysis.base import Analyzer
if TYPE_CHECKING:
- from typing import Any
- from .icu_rule_loader import TokenAnalyzerRule # pylint: disable=cyclic-import
+ from typing import Any # noqa
+ from .icu_rule_loader import TokenAnalyzerRule
+
class ICUTokenAnalysis:
""" Container class collecting the transliterators and token analysis
def __init__(self, norm_rules: str, trans_rules: str,
analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
+ # additional break signs are not relevant during name analysis
+ norm_rules += ";[[:Space:][-:]]+ > ' ';"
self.normalizer = Transliterator.createFromRules("icu_normalization",
norm_rules)
trans_rules += ";[:Space:]+ > ' '"
self.analysis = {name: arules.create(self.normalizer, self.to_ascii)
for name, arules in analysis_rules.items()}
-
def get_analyzer(self, name: Optional[str]) -> Analyzer:
""" Return the given named analyzer. If no analyzer with that
name exists, return the default analyzer.