X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/671af4cff24ed671bed414160a73ea3541907d29..472b1f59f1af4f97430aac8926a1b5faee7fdc71:/src/nominatim_api/search/icu_tokenizer.py diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py index 6f1dcf79..44afce9f 100644 --- a/src/nominatim_api/search/icu_tokenizer.py +++ b/src/nominatim_api/search/icu_tokenizer.py @@ -208,6 +208,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): log().section('Analyze query (using ICU tokenizer)') for func in self.preprocessors: phrases = func(phrases) + + if len(phrases) == 1 \ + and phrases[0].text.count(' ') > 3 \ + and max(len(s) for s in phrases[0].text.split()) < 3: + normalized = [] + query = qmod.QueryStruct(phrases) log().var_dump('Normalized query', query.source) @@ -244,7 +250,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + return cast(str, self.normalizer.transliterate(text)).strip('-: ') def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: """ Transliterate the phrases and split them into tokens.