only consider partials in multi-words for initial count

[nominatim.git] / nominatim / tokenizer / legacy_icu_tokenizer.py
diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py

index 5f83b73dc2c7c7fb6e7c444404549abf11c863cb..6bf409cca3ab3674b41605b06e8dfe49eda40e41 100644 (file)
--- a/nominatim/tokenizer/legacy_icu_tokenizer.py
+++ b/nominatim/tokenizer/legacy_icu_tokenizer.py
@@ -168,7 +168,8 @@ class LegacyICUTokenizer:
                  for name, cnt in cur:
                      terms = set()
                      for word in name_proc.get_variants_ascii(name_proc.get_normalized(name)):
-                        terms.update(word.split())
+                        if ' ' in word:
+                            terms.update(word.split())
                      for term in terms:
                          words[term] += cnt