2 Data structures for saving variant expansions for ICU tokenizer.
4 from collections import namedtuple
7 _ICU_VARIANT_PORPERTY_FIELDS = ['lang']
10 class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS,
11 defaults=(None, )*len(_ICU_VARIANT_PORPERTY_FIELDS))):
12 """ Data container for saving properties that describe when a variant
15 Porperty instances are hashable.
18 def from_rules(cls, _):
19 """ Create a new property type from a generic dictionary.
21 The function only takes into account the properties that are
22 understood presently and ignores all others.
27 ICUVariant = namedtuple('ICUVariant', ['source', 'replacement', 'properties'])
30 def pickle_variant_set(variants):
31 """ Serializes an iterable of variant rules to a string.
33 # Create a list of property sets. So they don't need to be duplicated
36 for variant in variants:
37 if variant.properties not in properties:
38 properties[variant.properties] = pid
41 # Convert the variants into a simple list.
42 variants = [(v.source, v.replacement, properties[v.properties]) for v in variants]
44 # Convert everythin to json.
45 return json.dumps({'properties': {v: k._asdict() for k, v in properties.items()},
46 'variants': variants})
49 def unpickle_variant_set(variant_string):
50 """ Deserializes a variant string that was previously created with
51 pickle_variant_set() into a set of ICUVariants.
53 data = json.loads(variant_string)
55 properties = {int(k): ICUVariantProperties(**v) for k, v in data['properties'].items()}
58 return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants']))