]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 16 Aug 2023 21:01:25 +0000 (23:01 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 16 Aug 2023 21:01:25 +0000 (23:01 +0200)
1  2 
nominatim/api/search/icu_tokenizer.py

index d3e34537a8d7c03585b16dac4a2136e80e77c8fd,b68e8d10eef70816f6cb772da2d7036e8a31693d..d2cdd96e16432d62f2b02d0fcbe390e060d905e1
@@@ -133,10 -133,19 +133,19 @@@ class ICUQueryAnalyzer(AbstractQueryAna
      async def setup(self) -> None:
          """ Set up static data structures needed for the analysis.
          """
-         rules = await self.conn.get_property('tokenizer_import_normalisation')
-         self.normalizer = Transliterator.createFromRules("normalization", rules)
-         rules = await self.conn.get_property('tokenizer_import_transliteration')
-         self.transliterator = Transliterator.createFromRules("transliteration", rules)
+         async def _make_normalizer() -> Any:
+             rules = await self.conn.get_property('tokenizer_import_normalisation')
+             return Transliterator.createFromRules("normalization", rules)
+         self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+                                                            _make_normalizer)
+         async def _make_transliterator() -> Any:
+             rules = await self.conn.get_property('tokenizer_import_transliteration')
+             return Transliterator.createFromRules("transliteration", rules)
+         self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+                                                                _make_transliterator)
  
          if 'word' not in self.conn.t.meta.tables:
              sa.Table('word', self.conn.t.meta,
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: