From 061c52b3979811113989f4430cb4c57f149eb643 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 10 Dec 2024 10:07:48 +0100 Subject: [PATCH] be less strict on filtering one-letter queries --- src/nominatim_api/search/icu_tokenizer.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py index c18dd8be..ac78d03c 100644 --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@ -167,6 +167,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): normalized = list(filter(lambda p: p.text, (qmod.Phrase(p.ptype, self.normalize_text(p.text)) for p in phrases))) + if len(normalized) == 1 \ + and normalized[0].text.count(' ') > 3 \ + and max(len(s) for s in normalized[0].text.split()) < 3: + normalized = [] query = qmod.QueryStruct(normalized) log().var_dump('Normalized query', query.source) if not query.source: @@ -202,12 +206,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): standardized form search will work with. All information removed at this stage is inevitably lost. """ - norm = cast(str, self.normalizer.transliterate(text)) - numspaces = norm.count(' ') - if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: - return '' - - return norm + return cast(str, self.normalizer.transliterate(text)) def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: """ Transliterate the phrases and split them into tokens. -- 2.39.5