X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3206bf59df0213d24bd3e11df7dd2abaebf89911..87907916ff06741bd4627101d30e6e11b8ea1a1a:/nominatim/tokenizer/legacy_icu_tokenizer.py?ds=sidebyside diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 156e99ec..2bd22c72 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -423,8 +423,7 @@ class LegacyICUNameAnalyzer: names = place.get('name') if names: - full_names = set((self.make_standard_word(name) for name in names.values())) - full_names.discard('') + full_names = self._compute_full_names(names) token_info.add_names(self.conn, full_names) @@ -461,6 +460,25 @@ class LegacyICUNameAnalyzer: return token_info.data + def _compute_full_names(self, names): + """ Return the set of all full name word ids to be used with the + given dictionary of names. + """ + full_names = set() + for name in (n for ns in names.values() for n in re.split('[;,]', ns)): + word = self.make_standard_word(name) + if word: + full_names.add(word) + + brace_split = name.split('(', 2) + if len(brace_split) > 1: + word = self.make_standard_word(brace_split[0]) + if word: + full_names.add(word) + + return full_names + + def _add_postcode(self, postcode): """ Make sure the normalized postcode is present in the word table. """ @@ -519,8 +537,6 @@ class _TokenInfo: """ # Start with all partial names terms = set((part for ns in names for part in ns.split())) - # Add partials for the full terms (TO BE REMOVED) - terms.update((n for n in names)) # Add the full names terms.update((' ' + n for n in names))