+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Helper class to create ICU rules from a configuration file.
"""
from nominatim.db.properties import set_property, get_property
from nominatim.errors import UsageError
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis
+import nominatim.data.country_info
LOG = logging.getLogger()
rules = config.load_sub_configuration('icu_tokenizer.yaml',
config='TOKENIZER_CONFIG')
+ # Make sure country information is available to analyzers and sanitizers.
+ nominatim.data.country_info.setup_country_config(config)
+
self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
self.analysis_rules = _get_section(rules, 'token-analysis')
def make_token_analysis(self):
""" Create a token analyser from the reviouly loaded rules.
"""
- return self.analysis[None].create(self.normalization_rules,
- self.transliteration_rules)
+ return ICUTokenAnalysis(self.normalization_rules,
+ self.transliteration_rules, self.analysis)
def get_search_rules(self):
else:
LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name)
- UsageError("Syntax error in ICU tokenizer config.")
+ raise UsageError("Syntax error in ICU tokenizer config.")
self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules)
module_name = 'nominatim.tokenizer.token_analysis.' \
+ _get_section(rules, 'analyzer').replace('-', '_')
analysis_mod = importlib.import_module(module_name)
- self._mod_create = analysis_mod.create
+ self.create = analysis_mod.create
# Load the configuration.
self.config = analysis_mod.configure(rules, normalization_rules)
-
-
- def create(self, normalization_rules, transliteration_rules):
- """ Create an analyzer from the given rules.
- """
- return self._mod_create(normalization_rules,
- transliteration_rules,
- self.config)