From: Sarah Hoffmann Date: Tue, 8 Aug 2023 12:59:33 +0000 (+0200) Subject: block queries with lots of one and two letter terms X-Git-Tag: deploy~59 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/0a19cc18e5060e02eb76850cf428a4d9c48ec0c1?ds=inline block queries with lots of one and two letter terms --- diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index f259995d..ad08294e 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -192,7 +192,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: