From: Sarah Hoffmann Date: Wed, 16 Aug 2023 21:01:25 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~53 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/93f8e28eb146e671849b96f4d5ef1b924a907abc?hp=-c Merge remote-tracking branch 'upstream/master' --- 93f8e28eb146e671849b96f4d5ef1b924a907abc diff --combined nominatim/api/search/icu_tokenizer.py index d3e34537,b68e8d10..d2cdd96e --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@@ -133,10 -133,19 +133,19 @@@ class ICUQueryAnalyzer(AbstractQueryAna async def setup(self) -> None: """ Set up static data structures needed for the analysis. """ - rules = await self.conn.get_property('tokenizer_import_normalisation') - self.normalizer = Transliterator.createFromRules("normalization", rules) - rules = await self.conn.get_property('tokenizer_import_transliteration') - self.transliterator = Transliterator.createFromRules("transliteration", rules) + async def _make_normalizer() -> Any: + rules = await self.conn.get_property('tokenizer_import_normalisation') + return Transliterator.createFromRules("normalization", rules) + + self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer', + _make_normalizer) + + async def _make_transliterator() -> Any: + rules = await self.conn.get_property('tokenizer_import_transliteration') + return Transliterator.createFromRules("transliteration", rules) + + self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator', + _make_transliterator) if 'word' not in self.conn.t.meta.tables: sa.Table('word', self.conn.t.meta, @@@ -192,12 -201,7 +201,12 @@@ standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: