]> git.openstreetmap.org Git - nominatim.git/commitdiff
block queries with lots of one and two letter terms
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 8 Aug 2023 12:59:33 +0000 (14:59 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 8 Aug 2023 12:59:33 +0000 (14:59 +0200)
nominatim/api/search/icu_tokenizer.py

index f259995db112bbbe537aaa3855f2d4d78e36f5e2..ad08294e00f8409c04043d9187a96198aebc8e16 100644 (file)
@@ -192,7 +192,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
-        return cast(str, self.normalizer.transliterate(text))
+        norm = cast(str, self.normalizer.transliterate(text))
+        numspaces = norm.count(' ')
+        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
+            return ''
+
+        return norm
 
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: