The configure() function now receives a Transliterator object instead
of the ICU rules. This harmonizes the parameters with the create
function.
import json
import logging
import json
import logging
+from icu import Transliterator
+
from nominatim.config import flatten_config_list, Configuration
from nominatim.db.properties import set_property, get_property
from nominatim.db.connection import Connection
from nominatim.config import flatten_config_list, Configuration
from nominatim.db.properties import set_property, get_property
from nominatim.db.connection import Connection
if not isinstance(self.analysis_rules, list):
raise UsageError("Configuration section 'token-analysis' must be a list.")
if not isinstance(self.analysis_rules, list):
raise UsageError("Configuration section 'token-analysis' must be a list.")
+ norm = Transliterator.createFromRules("rule_loader_normalization",
+ self.normalization_rules)
+ trans = Transliterator.createFromRules("rule_loader_transliteration",
+ self.transliteration_rules)
+
for section in self.analysis_rules:
name = section.get('id', None)
if name in self.analysis:
for section in self.analysis_rules:
name = section.get('id', None)
if name in self.analysis:
LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name)
raise UsageError("Syntax error in ICU tokenizer config.")
LOG.fatal("ICU tokenizer configuration has two token "
"analyzers with id '%s'.", name)
raise UsageError("Syntax error in ICU tokenizer config.")
- self.analysis[name] = TokenAnalyzerRule(section,
- self.normalization_rules,
+ self.analysis[name] = TokenAnalyzerRule(section, norm, trans,
and creates a new token analyzer on request.
"""
and creates a new token analyzer on request.
"""
- def __init__(self, rules: Mapping[str, Any], normalization_rules: str,
+ def __init__(self, rules: Mapping[str, Any],
+ normalizer: Any, transliterator: Any,
config: Configuration) -> None:
analyzer_name = _get_section(rules, 'analyzer')
if not analyzer_name or not isinstance(analyzer_name, str):
config: Configuration) -> None:
analyzer_name = _get_section(rules, 'analyzer')
if not analyzer_name or not isinstance(analyzer_name, str):
self._analysis_mod: AnalysisModule = \
config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
self._analysis_mod: AnalysisModule = \
config.load_plugin_module(analyzer_name, 'nominatim.tokenizer.token_analysis')
- self.config = self._analysis_mod.configure(rules, normalization_rules)
+ self.config = self._analysis_mod.configure(rules, normalizer,
+ transliterator)
def create(self, normalizer: Any, transliterator: Any) -> Analyser:
def create(self, normalizer: Any, transliterator: Any) -> Analyser:
""" Protocol for analysis modules.
"""
""" Protocol for analysis modules.
"""
- def configure(self, rules: Mapping[str, Any], normalization_rules: str) -> Any:
+ def configure(self, rules: Mapping[str, Any],
+ normalizer: Any, transliterator: Any) -> Any:
""" Prepare the configuration of the analysis module.
This function should prepare all data that can be shared
between instances of this analyser.
""" Prepare the configuration of the analysis module.
This function should prepare all data that can be shared
between instances of this analyser.
Arguments:
rules: A dictionary with the additional configuration options
as specified in the tokenizer configuration.
Arguments:
rules: A dictionary with the additional configuration options
as specified in the tokenizer configuration.
- normalization_rules: ICU rules for normalization as a string
- that can be used with createFromRules().
+ normalizer: an ICU Transliterator with the compiled normalization
+ rules.
+ transliterator: an ICU tranliterator with the compiled
+ transliteration rules.
Returns:
A data object with the configuration that was set up. May be
Returns:
A data object with the configuration that was set up. May be
import itertools
import re
import itertools
import re
-from icu import Transliterator
-
from nominatim.config import flatten_config_list
from nominatim.errors import UsageError
from nominatim.config import flatten_config_list
from nominatim.errors import UsageError
def get_variant_config(in_rules: Any,
def get_variant_config(in_rules: Any,
- normalization_rules: str) -> Tuple[List[Tuple[str, List[str]]], str]:
+ normalizer: Any) -> Tuple[List[Tuple[str, List[str]]], str]:
""" Convert the variant definition from the configuration into
replacement sets.
""" Convert the variant definition from the configuration into
replacement sets.
vset: Set[ICUVariant] = set()
rules = flatten_config_list(in_rules, 'variants')
vset: Set[ICUVariant] = set()
rules = flatten_config_list(in_rules, 'variants')
- vmaker = _VariantMaker(normalization_rules)
+ vmaker = _VariantMaker(normalizer)
for section in rules:
for rule in (section.get('words') or []):
for section in rules:
for rule in (section.get('words') or []):
All text in rules is normalized to make sure the variants match later.
"""
All text in rules is normalized to make sure the variants match later.
"""
- def __init__(self, norm_rules: Any) -> None:
- self.norm = Transliterator.createFromRules("rule_loader_normalization",
- norm_rules)
+ def __init__(self, normalizer: Any) -> None:
+ self.norm = normalizer
def compute(self, rule: Any) -> Iterator[ICUVariant]:
def compute(self, rule: Any) -> Iterator[ICUVariant]:
### Configuration section
### Configuration section
-def configure(rules: Mapping[str, Any], normalization_rules: str) -> Dict[str, Any]:
+def configure(rules: Mapping[str, Any], normalizer: Any, _: Any) -> Dict[str, Any]:
""" Extract and preprocess the configuration for this module.
"""
config: Dict[str, Any] = {}
config['replacements'], config['chars'] = get_variant_config(rules.get('variants'),
""" Extract and preprocess the configuration for this module.
"""
config: Dict[str, Any] = {}
config['replacements'], config['chars'] = get_variant_config(rules.get('variants'),
config['variant_only'] = rules.get('mode', '') == 'variant-only'
# parse mutation rules
config['variant_only'] = rules.get('mode', '') == 'variant-only'
# parse mutation rules
Specialized processor for housenumbers. Analyses common housenumber patterns
and creates variants for them.
"""
Specialized processor for housenumbers. Analyses common housenumber patterns
and creates variants for them.
"""
-from typing import Mapping, Any, List, cast
+from typing import Any, List, cast
import re
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
import re
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
### Configuration section
### Configuration section
-def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613
+def configure(*_: Any) -> None:
""" All behaviour is currently hard-coded.
"""
return None
""" All behaviour is currently hard-coded.
"""
return None
Specialized processor for postcodes. Supports a 'lookup' variant of the
token, which produces variants with optional spaces.
"""
Specialized processor for postcodes. Supports a 'lookup' variant of the
token, which produces variants with optional spaces.
"""
-from typing import Mapping, Any, List
+from typing import Any, List
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
### Configuration section
from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
### Configuration section
-def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613
+def configure(*_: Any) -> None:
""" All behaviour is currently hard-coded.
"""
return None
""" All behaviour is currently hard-coded.
"""
return None
rules = { 'analyzer': 'generic', 'variants': [{'words': variants}]}
if variant_only:
rules['mode'] = 'variant-only'
rules = { 'analyzer': 'generic', 'variants': [{'words': variants}]}
if variant_only:
rules['mode'] = 'variant-only'
- config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
+ config = module.configure(rules, norm, trans)
return module.create(norm, trans, config)
return module.create(norm, trans, config)
def test_no_variants():
rules = { 'analyzer': 'generic' }
def test_no_variants():
rules = { 'analyzer': 'generic' }
- config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
+ config = module.configure(rules, norm, trans)
proc = module.create(norm, trans, config)
proc = module.create(norm, trans, config)
@staticmethod
def configure_rules(*variants):
rules = { 'analyzer': 'generic', 'variants': [{'words': variants}]}
@staticmethod
def configure_rules(*variants):
rules = { 'analyzer': 'generic', 'variants': [{'words': variants}]}
- return module.configure(rules, DEFAULT_NORMALIZATION)
+ trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
+ norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
+ return module.configure(rules, norm, trans)
def get_replacements(self, *variants):
def get_replacements(self, *variants):
'mutations': [ {'pattern': m[0], 'replacements': m[1]}
for m in mutations]
}
'mutations': [ {'pattern': m[0], 'replacements': m[1]}
for m in mutations]
}
- config = module.configure(rules, DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
+ config = module.configure(rules, norm, trans)
self.analysis = module.create(norm, trans, config)
self.analysis = module.create(norm, trans, config)