+++ /dev/null
-"""
-Data structures for saving variant expansions for ICU tokenizer.
-"""
-from collections import namedtuple
-
-_ICU_VARIANT_PORPERTY_FIELDS = ['lang']
-
-
-class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
- """ Data container for saving properties that describe when a variant
- should be applied.
-
- Property instances are hashable.
- """
- @classmethod
- def from_rules(cls, _):
- """ Create a new property type from a generic dictionary.
-
- The function only takes into account the properties that are
- understood presently and ignores all others.
- """
- return cls(lang=None)
-
-
-ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
"""
Generic processor for names that creates abbreviation variants.
"""
-from collections import defaultdict
+from collections import defaultdict, namedtuple
import itertools
import re
from nominatim.config import flatten_config_list
from nominatim.errors import UsageError
-import nominatim.tokenizer.icu_variants as variants
### Configuration section
+ICUVariant = namedtuple('ICUVariant', ['source', 'replacement'])
+
def configure(rules, normalization_rules):
""" Extract and preprocess the configuration for this module.
"""
vmaker = _VariantMaker(normalization_rules)
- properties = []
for section in rules:
- # Create the property field and deduplicate against existing
- # instances.
- props = variants.ICUVariantProperties.from_rules(section)
- for existing in properties:
- if existing == props:
- props = existing
- break
- else:
- properties.append(props)
-
for rule in (section.get('words') or []):
- vset.update(vmaker.compute(rule, props))
+ vset.update(vmaker.compute(rule))
# Intermediate reorder by source. Also compute required character set.
for variant in vset:
norm_rules)
- def compute(self, rule, props):
+ def compute(self, rule):
""" Generator for all ICUVariant tuples from a single variant rule.
"""
parts = re.split(r'(\|)?([=-])>', rule)
for src in src_terms:
if src:
for froms, tos in _create_variants(*src, src[0], decompose):
- yield variants.ICUVariant(froms, tos, props)
+ yield ICUVariant(froms, tos)
for src, repl in itertools.product(src_terms, repl_terms):
if src and repl:
for froms, tos in _create_variants(*src, repl, decompose):
- yield variants.ICUVariant(froms, tos, props)
+ yield ICUVariant(froms, tos)
def _parse_variant_word(self, name):