X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/e85f7e7aa9b9c297b6b5f266d811c935af8cbb9e..bb18479d5bfd59fe418f21e79136646133e93567:/nominatim/tokenizer/icu_name_processor.py diff --git a/nominatim/tokenizer/icu_name_processor.py b/nominatim/tokenizer/icu_name_processor.py index 6ead712e..93d2b0ff 100644 --- a/nominatim/tokenizer/icu_name_processor.py +++ b/nominatim/tokenizer/icu_name_processor.py @@ -105,6 +105,11 @@ class ICUNameProcessor: partials = [v + done + r for v, r in itertools.product(partials, repl) if not force_space or r.startswith(' ')] + if len(partials) > 128: + # If too many variants are produced, they are unlikely + # to be helpful. Only use the original term. + startpos = 0 + break startpos = pos + len(full) if full[-1] == ' ': startpos -= 1 @@ -114,18 +119,22 @@ class ICUNameProcessor: pos += 1 force_space = False - results = set() - + # No variants detected? Fast return. if startpos == 0: trans_name = self.to_ascii.transliterate(norm_name).strip() + return [trans_name] if trans_name else [] + + return self._compute_result_set(partials, baseform[startpos:]) + + + def _compute_result_set(self, partials, prefix): + results = set() + + for variant in partials: + vname = variant + prefix + trans_name = self.to_ascii.transliterate(vname[1:-1]).strip() if trans_name: results.add(trans_name) - else: - for variant in partials: - name = variant + baseform[startpos:] - trans_name = self.to_ascii.transliterate(name[1:-1]).strip() - if trans_name: - results.add(trans_name) return list(results)