]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3155 from lonvia/caching-of-transliterators
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 16 Aug 2023 20:55:54 +0000 (22:55 +0200)
committerGitHub <noreply@github.com>
Wed, 16 Aug 2023 20:55:54 +0000 (22:55 +0200)
Cache ICU transliterators between calls

nominatim/api/connection.py
nominatim/api/search/icu_tokenizer.py

index 72cabf7814bd4d5402d4b99dada5ce720fd8c852..bf2173144d72fa7deee39daa010f1e75ef5293dc 100644 (file)
@@ -7,7 +7,8 @@
 """
 Extended SQLAlchemy connection class that also includes access to the schema.
 """
-from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
+from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
+                   Awaitable, Callable, TypeVar
 
 import sqlalchemy as sa
 from sqlalchemy.ext.asyncio import AsyncConnection
@@ -17,6 +18,8 @@ from nominatim.db.sqlalchemy_schema import SearchTables
 from nominatim.db.sqlalchemy_types import Geometry
 from nominatim.api.logging import log
 
+T = TypeVar('T')
+
 class SearchConnection:
     """ An extended SQLAlchemy connection class, that also contains
         then table definitions. The underlying asynchronous SQLAlchemy
@@ -61,11 +64,10 @@ class SearchConnection:
 
             Raises a ValueError if the property does not exist.
         """
-        if name.startswith('DB:'):
-            raise ValueError(f"Illegal property value '{name}'.")
+        lookup_name = f'DBPROP:{name}'
 
-        if cached and name in self._property_cache:
-            return cast(str, self._property_cache[name])
+        if cached and lookup_name in self._property_cache:
+            return cast(str, self._property_cache[lookup_name])
 
         sql = sa.select(self.t.properties.c.value)\
             .where(self.t.properties.c.property == name)
@@ -74,7 +76,7 @@ class SearchConnection:
         if value is None:
             raise ValueError(f"Property '{name}' not found in database.")
 
-        self._property_cache[name] = cast(str, value)
+        self._property_cache[lookup_name] = cast(str, value)
 
         return cast(str, value)
 
@@ -92,6 +94,29 @@ class SearchConnection:
         return self._property_cache['DB:server_version']
 
 
+    async def get_cached_value(self, group: str, name: str,
+                               factory: Callable[[], Awaitable[T]]) -> T:
+        """ Access the cache for this Nominatim instance.
+            Each cache value needs to belong to a group and have a name.
+            This function is for internal API use only.
+
+            `factory` is an async callback function that produces
+            the value if it is not already cached.
+
+            Returns the cached value or the result of factory (also caching
+            the result).
+        """
+        full_name = f'{group}:{name}'
+
+        if full_name in self._property_cache:
+            return cast(T, self._property_cache[full_name])
+
+        value = await factory()
+        self._property_cache[full_name] = value
+
+        return value
+
+
     async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
         """ Lookup up if there is a classtype table for the given category
             and return a SQLAlchemy table for it, if it exists.
index 7bf516e3aa25c12775d0c3ffae7271076f6d0032..b68e8d10eef70816f6cb772da2d7036e8a31693d 100644 (file)
@@ -133,10 +133,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
     async def setup(self) -> None:
         """ Set up static data structures needed for the analysis.
         """
-        rules = await self.conn.get_property('tokenizer_import_normalisation')
-        self.normalizer = Transliterator.createFromRules("normalization", rules)
-        rules = await self.conn.get_property('tokenizer_import_transliteration')
-        self.transliterator = Transliterator.createFromRules("transliteration", rules)
+        async def _make_normalizer() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_normalisation')
+            return Transliterator.createFromRules("normalization", rules)
+
+        self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+                                                           _make_normalizer)
+
+        async def _make_transliterator() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_transliteration')
+            return Transliterator.createFromRules("transliteration", rules)
+
+        self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+                                                               _make_transliterator)
 
         if 'word' not in self.conn.t.meta.tables:
             sa.Table('word', self.conn.t.meta,