+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Helper class to create ICU rules from a configuration file.
"""
from nominatim.errors import UsageError
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
from nominatim.tokenizer.icu_token_analysis import ICUTokenAnalysis
+import nominatim.data.country_info
LOG = logging.getLogger()
rules = config.load_sub_configuration('icu_tokenizer.yaml',
config='TOKENIZER_CONFIG')
+ # Make sure country information is available to analyzers and sanitizers.
+ nominatim.data.country_info.setup_country_config(config)
+
self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
self.analysis_rules = _get_section(rules, 'token-analysis')
else:
LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name)
- UsageError("Syntax error in ICU tokenizer config.")
+ raise UsageError("Syntax error in ICU tokenizer config.")
self.analysis[name] = TokenAnalyzerRule(section, self.normalization_rules)