X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b4fec57b6d53f8e8a45c46ff11f13cbcbea1006a..8171fe4571a57bf8e5b2a8f676989e973897e2e7:/nominatim/tokenizer/icu_name_processor.py?ds=inline diff --git a/nominatim/tokenizer/icu_name_processor.py b/nominatim/tokenizer/icu_name_processor.py index 93d2b0ff..544f5ebc 100644 --- a/nominatim/tokenizer/icu_name_processor.py +++ b/nominatim/tokenizer/icu_name_processor.py @@ -8,67 +8,25 @@ import itertools from icu import Transliterator import datrie -from nominatim.db.properties import set_property, get_property -from nominatim.tokenizer import icu_variants as variants - -DBCFG_IMPORT_NORM_RULES = "tokenizer_import_normalisation" -DBCFG_IMPORT_TRANS_RULES = "tokenizer_import_transliteration" -DBCFG_IMPORT_REPLACEMENTS = "tokenizer_import_replacements" -DBCFG_SEARCH_STD_RULES = "tokenizer_search_standardization" - - -class ICUNameProcessorRules: - """ Data object that saves the rules needed for the name processor. - - The rules can either be initialised through an ICURuleLoader or - be loaded from a database when a connection is given. - """ - def __init__(self, loader=None, conn=None): - if loader is not None: - self.norm_rules = loader.get_normalization_rules() - self.trans_rules = loader.get_transliteration_rules() - self.replacements = loader.get_replacement_pairs() - self.search_rules = loader.get_search_rules() - elif conn is not None: - self.norm_rules = get_property(conn, DBCFG_IMPORT_NORM_RULES) - self.trans_rules = get_property(conn, DBCFG_IMPORT_TRANS_RULES) - self.replacements = \ - variants.unpickle_variant_set(get_property(conn, DBCFG_IMPORT_REPLACEMENTS)) - self.search_rules = get_property(conn, DBCFG_SEARCH_STD_RULES) - else: - assert False, "Parameter loader or conn required." - - - def save_rules(self, conn): - """ Save the rules in the property table of the given database. - the rules can be loaded again by handing in a connection into - the constructor of the class. - """ - set_property(conn, DBCFG_IMPORT_NORM_RULES, self.norm_rules) - set_property(conn, DBCFG_IMPORT_TRANS_RULES, self.trans_rules) - set_property(conn, DBCFG_IMPORT_REPLACEMENTS, - variants.pickle_variant_set(self.replacements)) - set_property(conn, DBCFG_SEARCH_STD_RULES, self.search_rules) - class ICUNameProcessor: """ Collects the different transformation rules for normalisation of names - and provides the functions to aply the transformations. + and provides the functions to apply the transformations. """ - def __init__(self, rules): + def __init__(self, norm_rules, trans_rules, replacements): self.normalizer = Transliterator.createFromRules("icu_normalization", - rules.norm_rules) + norm_rules) self.to_ascii = Transliterator.createFromRules("icu_to_ascii", - rules.trans_rules + + trans_rules + ";[:Space:]+ > ' '") self.search = Transliterator.createFromRules("icu_search", - rules.search_rules) + norm_rules + trans_rules) # Intermediate reorder by source. Also compute required character set. immediate = defaultdict(list) chars = set() - for variant in rules.replacements: + for variant in replacements: if variant.source[-1] == ' ' and variant.replacement[-1] == ' ': replstr = variant.replacement[:-1] else: