Helper class to create ICU rules from a configuration file.
"""
import io
-import yaml
import logging
from collections import defaultdict
import itertools
+import yaml
from icu import Transliterator
from nominatim.errors import UsageError
def __init__(self, configfile):
self.configfile = configfile
+ self.compound_suffixes = set()
+ self.abbreviations = defaultdict()
if configfile.suffix == '.yaml':
self._load_from_yaml()
suffixes.add(suffix)
suffixes.update(self.abbreviations.get(suffix, []))
- for suffix in sorted(suffixes, key=lambda x:len(x), reverse=True):
+ for suffix in sorted(suffixes, key=len, reverse=True):
rules.write("'{0} ' > ' {0} ';".format(suffix))
# Finally add transliteration.
synonyms[abbr + ' '].add(' ' + abbr + ' ')
# sort the resulting list by descending length (longer matches are prefered).
- sorted_keys = sorted(synonyms.keys(), key=lambda x: len(x), reverse=True)
+ sorted_keys = sorted(synonyms.keys(), key=len, reverse=True)
return [(k, list(synonyms[k])) for k in sorted_keys]
def _load_from_yaml(self):
- rules = yaml.load(self.configfile.read_text())
+ rules = yaml.safe_load(self.configfile.read_text())
self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
"""
content = self._get_section(rules, section)
+ if content is None:
+ return ''
+
if isinstance(content, str):
return (self.configfile.parent / content).read_text().replace('\n', ' ')
abbrterms = (norm.transliterate(t.strip()) for t in parts[1].split(','))
for full, abbr in itertools.product(fullterms, abbrterms):
- self.abbreviations[full].append(abbr)
+ if full and abbr:
+ self.abbreviations[full].append(abbr)