]> git.openstreetmap.org Git - nominatim.git/commitdiff
be less strict on filtering one-letter queries
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 10 Dec 2024 09:07:48 +0000 (10:07 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 10 Dec 2024 09:28:47 +0000 (10:28 +0100)
src/nominatim_api/search/icu_tokenizer.py

index c18dd8be62ed1190284e9c0751464b5e54091a47..ac78d03c1fc1776667d3cb31020cc03bfa0c5a90 100644 (file)
@@ -167,6 +167,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
         normalized = list(filter(lambda p: p.text,
                                  (qmod.Phrase(p.ptype, self.normalize_text(p.text))
                                   for p in phrases)))
         normalized = list(filter(lambda p: p.text,
                                  (qmod.Phrase(p.ptype, self.normalize_text(p.text))
                                   for p in phrases)))
+        if len(normalized) == 1 \
+                and normalized[0].text.count(' ') > 3 \
+                and max(len(s) for s in normalized[0].text.split()) < 3:
+            normalized = []
         query = qmod.QueryStruct(normalized)
         log().var_dump('Normalized query', query.source)
         if not query.source:
         query = qmod.QueryStruct(normalized)
         log().var_dump('Normalized query', query.source)
         if not query.source:
@@ -202,12 +206,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
-        norm = cast(str, self.normalizer.transliterate(text))
-        numspaces = norm.count(' ')
-        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
-            return ''
-
-        return norm
+        return cast(str, self.normalizer.transliterate(text))
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
         """ Transliterate the phrases and split them into tokens.
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
         """ Transliterate the phrases and split them into tokens.