From: Sarah Hoffmann Date: Wed, 19 Feb 2025 19:51:20 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/19dfe0fdd45e0541b7171a96c16ecc9a84dd4c36?ds=inline;hp=-c Merge remote-tracking branch 'upstream/master' --- 19dfe0fdd45e0541b7171a96c16ecc9a84dd4c36 diff --combined src/nominatim_api/search/icu_tokenizer.py index 8f2069c1,487dd171..44afce9f --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@@ -208,12 -208,6 +208,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna log().section('Analyze query (using ICU tokenizer)') for func in self.preprocessors: phrases = func(phrases) + + if len(phrases) == 1 \ + and phrases[0].text.count(' ') > 3 \ + and max(len(s) for s in phrases[0].text.split()) < 3: + normalized = [] + query = qmod.QueryStruct(phrases) log().var_dump('Normalized query', query.source) @@@ -250,7 -244,7 +250,7 @@@ standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + return cast(str, self.normalizer.transliterate(text)).strip('-: ') def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: """ Transliterate the phrases and split them into tokens.