2 Data structures for saving variant expansions for ICU tokenizer.
4 from collections import namedtuple
7 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
10 class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)):
11 """ Data container for saving properties that describe when a variant
14 Property instances are hashable.
17 def from_rules(cls, _):
18 """ Create a new property type from a generic dictionary.
20 The function only takes into account the properties that are
21 understood presently and ignores all others.
26 ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
29 def pickle_variant_set(variants):
30 """ Serializes an iterable of variant rules to a string.
32 # Create a list of property sets. So they don't need to be duplicated
35 for variant in variants:
36 if variant.properties not in properties:
37 properties[variant.properties] = pid
40 # Convert the variants into a simple list.
41 variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
43 # Convert everythin to json.
44 return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
45 'variants': variants})
48 def unpickle_variant_set(variant_string):
49 """ Deserializes a variant string that was previously created with
50 pickle_variant_set() into a set of ICUVariants.
52 data = json.loads(variant_string)
54 properties = {int(k): ICUVariantProperties.from_rules(v)
55 for k, v in data['properties'].items()}
57 return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))