From: Sarah Hoffmann Date: Mon, 18 Dec 2023 15:00:08 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~29 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/7b27c04b8392d0babe33cf53e2838b05ffbcbacf?hp=-c Merge remote-tracking branch 'upstream/master' --- 7b27c04b8392d0babe33cf53e2838b05ffbcbacf diff --combined nominatim/api/search/icu_tokenizer.py index 06a06f34,eabd329d..ff1c3fee --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@@ -22,6 -22,7 +22,7 @@@ from nominatim.api.connection import Se from nominatim.api.logging import log from nominatim.api.search import query as qmod from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer + from nominatim.db.sqlalchemy_types import Json DB_TO_TOKEN_TYPE = { @@@ -159,7 -160,7 +160,7 @@@ class ICUQueryAnalyzer(AbstractQueryAna sa.Column('word_token', sa.Text, nullable=False), sa.Column('type', sa.Text, nullable=False), sa.Column('word', sa.Text), - sa.Column('info', self.conn.t.types.Json)) + sa.Column('info', Json)) async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct: @@@ -207,12 -208,7 +208,12 @@@ standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: