X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8c7140d92b7a679ae5ef5bb3655c69bd39b7dfeb..cbe22149df0b71620258e439669a8d96ae390717:/nominatim/api/search/icu_tokenizer.py?ds=sidebyside diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index fceec2df..ff1c3fee 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -22,6 +22,7 @@ from nominatim.api.connection import SearchConnection from nominatim.api.logging import log from nominatim.api.search import query as qmod from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer +from nominatim.db.sqlalchemy_types import Json DB_TO_TOKEN_TYPE = { @@ -159,7 +160,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): sa.Column('word_token', sa.Text, nullable=False), sa.Column('type', sa.Text, nullable=False), sa.Column('word', sa.Text), - sa.Column('info', self.conn.t.types.Json)) + sa.Column('info', Json)) async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct: @@ -207,7 +208,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: