From: Sarah Hoffmann Date: Tue, 11 Jan 2022 16:51:05 +0000 (+0100) Subject: refactor variant production to use generators X-Git-Tag: v4.1.0~95^2~5 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/630ad38a6768f8379328795f8aa0e127d7105c44?ds=sidebyside refactor variant production to use generators --- diff --git a/nominatim/tokenizer/token_analysis/generic.py b/nominatim/tokenizer/token_analysis/generic.py index f790dad2..05ba885b 100644 --- a/nominatim/tokenizer/token_analysis/generic.py +++ b/nominatim/tokenizer/token_analysis/generic.py @@ -176,14 +176,26 @@ class GenericTokenAnalysis: """ Compute the spelling variants for the given normalized name and transliterate the result. """ + results = set() + for variant in self._generate_word_variants(norm_name): + if not self.variant_only or variant.strip() != norm_name: + trans_name = self.to_ascii.transliterate(variant).strip() + if trans_name: + results.add(trans_name) + + return list(results) + + + def _generate_word_variants(self, norm_name): baseform = '^ ' + norm_name + ' ^' + baselen = len(baseform) partials = [''] startpos = 0 if self.replacements is not None: pos = 0 force_space = False - while pos < len(baseform): + while pos < baselen: full, repl = self.replacements.longest_prefix_item(baseform[pos:], (None, None)) if full is not None: @@ -207,24 +219,9 @@ class GenericTokenAnalysis: # No variants detected? Fast return. if startpos == 0: - if self.variant_only: - return [] - - trans_name = self.to_ascii.transliterate(norm_name).strip() - return [trans_name] if trans_name else [] + return (norm_name, ) - return self._compute_result_set(partials, baseform[startpos:], - norm_name if self.variant_only else '') + if startpos < baselen: + return (part[1:] + baseform[startpos:-1] for part in partials) - - def _compute_result_set(self, partials, prefix, exclude): - results = set() - - for variant in partials: - vname = (variant + prefix)[1:-1].strip() - if vname != exclude: - trans_name = self.to_ascii.transliterate(vname).strip() - if trans_name: - results.add(trans_name) - - return list(results) + return (part[1:-1] for part in partials)