be less strict on filtering one-letter queries

author Sarah Hoffmann <lonvia@denofr.de>

Tue, 10 Dec 2024 09:07:48 +0000 (10:07 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Tue, 10 Dec 2024 09:28:47 +0000 (10:28 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Tue, 10 Dec 2024 09:07:48 +0000 (10:07 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Tue, 10 Dec 2024 09:28:47 +0000 (10:28 +0100)
diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py

index c18dd8be62ed1190284e9c0751464b5e54091a47..ac78d03c1fc1776667d3cb31020cc03bfa0c5a90 100644 (file)
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -167,6 +167,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
          normalized = list(filter(lambda p: p.text,
                                   (qmod.Phrase(p.ptype, self.normalize_text(p.text))
                                    for p in phrases)))
          normalized = list(filter(lambda p: p.text,
                                   (qmod.Phrase(p.ptype, self.normalize_text(p.text))
                                    for p in phrases)))
+        if len(normalized) == 1 \
+                and normalized[0].text.count(' ') > 3 \
+                and max(len(s) for s in normalized[0].text.split()) < 3:
+            normalized = []
          query = qmod.QueryStruct(normalized)
          log().var_dump('Normalized query', query.source)
          if not query.source:
          query = qmod.QueryStruct(normalized)
          log().var_dump('Normalized query', query.source)
          if not query.source:
@@ -202,12 +206,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
-        norm = cast(str, self.normalizer.transliterate(text))
-        numspaces = norm.count(' ')
-        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
-            return ''
-
-        return norm
+        return cast(str, self.normalizer.transliterate(text))
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.
author	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 10 Dec 2024 09:07:48 +0000 (10:07 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Tue, 10 Dec 2024 09:28:47 +0000 (10:28 +0100)