2 Data structures for saving variant expansions for ICU tokenizer.
4 from collections import namedtuple
7 from nominatim.errors import UsageError
9 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
11 def _get_strtuple_prop(rules, field):
12 """ Return the given field of the rules dictionary as a list.
14 If the field is not defined or empty, returns None. If the field is
15 a singe string, it is converted into a tuple with a single element.
16 If the field is a list of strings, return as a string tuple.
17 Raise a usage error in all other cases.
19 value = rules.get(field)
24 if isinstance(value, str):
27 if not isinstance(value, list) or any(not isinstance(x, str) for x in value):
28 raise UsageError("YAML variant property '{}' should be a list.".format(field))
33 class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS,
34 defaults=(None, )*len(_ICU_VARIANT_PORPERTY_FIELDS))):
35 """ Data container for saving properties that describe when a variant
38 Porperty instances are hashable.
41 def from_rules(cls, rules):
42 """ Create a new property type from a generic dictionary.
44 The function only takes into account the properties that are
45 understood presently and ignores all others.
47 return cls(lang=_get_strtuple_prop(rules, 'lang'))
50 ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
52 def pickle_variant_set(variants):
53 """ Serializes an iterable of variant rules to a string.
55 # Create a list of property sets. So they don't need to be duplicated
58 for variant in variants:
59 if variant.properties not in properties:
60 properties[variant.properties] = pid
63 # Convert the variants into a simple list.
64 variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
66 # Convert everythin to json.
67 return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
68 'variants': variants})
71 def unpickle_variant_set(variant_string):
72 """ Deserializes a variant string that was previously created with
73 pickle_variant_set() into a set of ICUVariants.
75 data = json.loads(variant_string)
77 properties = {int(k): ICUVariantProperties(**v) for k, v in data['properties'].items()}
80 return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))