From 92f6ec2328606666032f7e9adc0c3cdcd7c76804 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 5 Oct 2021 10:29:36 +0200 Subject: [PATCH] remove support for properties on variants Those are not going to be used in the near future, so no need to carry that code around just now. --- nominatim/tokenizer/icu_variants.py | 25 ------------------- nominatim/tokenizer/token_analysis/generic.py | 24 ++++++------------ 2 files changed, 7 insertions(+), 42 deletions(-) delete mode 100644 nominatim/tokenizer/icu_variants.py diff --git a/nominatim/tokenizer/icu_variants.py b/nominatim/tokenizer/icu_variants.py deleted file mode 100644 index 93272f58..00000000 --- a/nominatim/tokenizer/icu_variants.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -Data structures for saving variant expansions for ICU tokenizer. -""" -from collections import namedtuple - -_ICU_VARIANT_PORPERTY_FIELDS = ['lang'] - - -class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)): - """ Data container for saving properties that describe when a variant - should be applied. - - Property instances are hashable. - """ - @classmethod - def from_rules(cls, _): - """ Create a new property type from a generic dictionary. - - The function only takes into account the properties that are - understood presently and ignores all others. - """ - return cls(lang=None) - - -ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties']) diff --git a/nominatim/tokenizer/token_analysis/generic.py b/nominatim/tokenizer/token_analysis/generic.py index 343534ee..18dd5dfe 100644 --- a/nominatim/tokenizer/token_analysis/generic.py +++ b/nominatim/tokenizer/token_analysis/generic.py @@ -1,7 +1,7 @@ """ Generic processor for names that creates abbreviation variants. """ -from collections import defaultdict +from collections import defaultdict, namedtuple import itertools import re @@ -10,10 +10,11 @@ import datrie from nominatim.config import flatten_config_list from nominatim.errors import UsageError -import nominatim.tokenizer.icu_variants as variants ### Configuration section +ICUVariant = namedtuple('ICUVariant', ['source', 'replacement']) + def configure(rules, normalization_rules): """ Extract and preprocess the configuration for this module. """ @@ -27,20 +28,9 @@ def configure(rules, normalization_rules): vmaker = _VariantMaker(normalization_rules) - properties = [] for section in rules: - # Create the property field and deduplicate against existing - # instances. - props = variants.ICUVariantProperties.from_rules(section) - for existing in properties: - if existing == props: - props = existing - break - else: - properties.append(props) - for rule in (section.get('words') or []): - vset.update(vmaker.compute(rule, props)) + vset.update(vmaker.compute(rule)) # Intermediate reorder by source. Also compute required character set. for variant in vset: @@ -66,7 +56,7 @@ class _VariantMaker: norm_rules) - def compute(self, rule, props): + def compute(self, rule): """ Generator for all ICUVariant tuples from a single variant rule. """ parts = re.split(r'(\|)?([=-])>', rule) @@ -82,12 +72,12 @@ class _VariantMaker: for src in src_terms: if src: for froms, tos in _create_variants(*src, src[0], decompose): - yield variants.ICUVariant(froms, tos, props) + yield ICUVariant(froms, tos) for src, repl in itertools.product(src_terms, repl_terms): if src and repl: for froms, tos in _create_variants(*src, repl, decompose): - yield variants.ICUVariant(froms, tos, props) + yield ICUVariant(froms, tos) def _parse_variant_word(self, name): -- 2.39.5