remove support for properties on variants

author Sarah Hoffmann <lonvia@denofr.de>

Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)
diff --git a/nominatim/tokenizer/icu_variants.py b/nominatim/tokenizer/icu_variants.py

deleted file mode 100644 (file)

index 93272f5..0000000
--- a/nominatim/tokenizer/icu_variants.py
+++ /dev/null
@@ -1,25 +0,0 @@
-"""
-Data structures for saving variant expansions for ICU tokenizer.
-"""
-from collections import namedtuple
-
-_ICU_VARIANT_PORPERTY_FIELDS = ['lang']
-
-
-class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
-    """ Data container for saving properties that describe when a variant
-        should be applied.
-
-        Property instances are hashable.
-    """
-    @classmethod
-    def from_rules(cls, _):
-        """ Create a new property type from a generic dictionary.
-
-            The function only takes into account the properties that are
-            understood presently and ignores all others.
-        """
-        return cls(lang=None)
-
-
-ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
diff --git a/nominatim/tokenizer/token_analysis/generic.py b/nominatim/tokenizer/token_analysis/generic.py

index 343534eec8a8778133a512982c38fd0360147959..18dd5dfea1058e0d06113f00d2fd91115bff21c3 100644 (file)
--- a/nominatim/tokenizer/token_analysis/generic.py
+++ b/nominatim/tokenizer/token_analysis/generic.py
@@ -1,7 +1,7 @@
  """
  Generic processor for names that creates abbreviation variants.
  """
-from collections import defaultdict
+from collections import defaultdict, namedtuple
  import itertools
  import re
  
@@ -10,10 +10,11 @@ import datrie
  
  from nominatim.config import flatten_config_list
  from nominatim.errors import UsageError
-import nominatim.tokenizer.icu_variants as variants
  
  ### Configuration section
  
+ICUVariant = namedtuple('ICUVariant', ['source', 'replacement'])
+
  def configure(rules, normalization_rules):
      """ Extract and preprocess the configuration for this module.
      """
@@ -27,20 +28,9 @@ def configure(rules, normalization_rules):
  
          vmaker = _VariantMaker(normalization_rules)
  
-        properties = []
          for section in rules:
-            # Create the property field and deduplicate against existing
-            # instances.
-            props = variants.ICUVariantProperties.from_rules(section)
-            for existing in properties:
-                if existing == props:
-                    props = existing
-                    break
-            else:
-                properties.append(props)
-
              for rule in (section.get('words') or []):
-                vset.update(vmaker.compute(rule, props))
+                vset.update(vmaker.compute(rule))
  
          # Intermediate reorder by source. Also compute required character set.
          for variant in vset:
@@ -66,7 +56,7 @@ class _VariantMaker:
                                                     norm_rules)
  
  
-    def compute(self, rule, props):
+    def compute(self, rule):
          """ Generator for all ICUVariant tuples from a single variant rule.
          """
          parts = re.split(r'(\|)?([=-])>', rule)
@@ -82,12 +72,12 @@ class _VariantMaker:
              for src in src_terms:
                  if src:
                      for froms, tos in _create_variants(*src, src[0], decompose):
-                        yield variants.ICUVariant(froms, tos, props)
+                        yield ICUVariant(froms, tos)
  
          for src, repl in itertools.product(src_terms, repl_terms):
              if src and repl:
                  for froms, tos in _create_variants(*src, repl, decompose):
-                    yield variants.ICUVariant(froms, tos, props)
+                    yield ICUVariant(froms, tos)
  
  
      def _parse_variant_word(self, name):
author	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 5 Oct 2021 08:29:36 +0000 (10:29 +0200)
nominatim/tokenizer/icu_variants.py	[deleted file]	patch \| blob \| history
nominatim/tokenizer/token_analysis/generic.py		patch \| blob \| history