- def __init__(self, norm_rules, trans_rules, replacements):
- self.normalizer = Transliterator.createFromRules("icu_normalization",
- norm_rules)
- self.to_ascii = Transliterator.createFromRules("icu_to_ascii",
- trans_rules +
- ";[:Space:]+ > ' '")
- self.search = Transliterator.createFromRules("icu_search",
- norm_rules + trans_rules)
-
- # Intermediate reorder by source. Also compute required character set.
- immediate = defaultdict(list)
- chars = set()
- for variant in replacements:
- if variant.source[-1] == ' ' and variant.replacement[-1] == ' ':
- replstr = variant.replacement[:-1]
- else:
- replstr = variant.replacement
- immediate[variant.source].append(replstr)
- chars.update(variant.source)
- # Then copy to datrie
- self.replacements = datrie.Trie(''.join(chars))
- for src, repllist in immediate.items():
- self.replacements[src] = repllist
-
-
- def get_normalized(self, name):
- """ Normalize the given name, i.e. remove all elements not relevant
- for search.
- """
- return self.normalizer.transliterate(name).strip()