]> git.openstreetmap.org Git - nominatim.git/commitdiff
add factory for query analyzer
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 22 May 2023 07:23:19 +0000 (09:23 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 22 May 2023 07:23:19 +0000 (09:23 +0200)
nominatim/api/search/icu_tokenizer.py
nominatim/api/search/query_analyzer_factory.py [new file with mode: 0644]
test/python/api/search/test_query_analyzer_factory.py [new file with mode: 0644]

index 14698a28867ca7ae0fc783f6b6e11385ffe45d8a..17e679057ecb5a4eb63daeb08f38e361f19a3ada 100644 (file)
@@ -21,10 +21,7 @@ from nominatim.typing import SaRow
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.search import query as qmod
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.search import query as qmod
-
-# XXX: TODO
-class AbstractQueryAnalyzer:
-    pass
+from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
 
 
 DB_TO_TOKEN_TYPE = {
 
 
 DB_TO_TOKEN_TYPE = {
diff --git a/nominatim/api/search/query_analyzer_factory.py b/nominatim/api/search/query_analyzer_factory.py
new file mode 100644 (file)
index 0000000..9804f3c
--- /dev/null
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Factory for creating a query analyzer for the configured tokenizer.
+"""
+from typing import List, cast
+from abc import ABC, abstractmethod
+from pathlib import Path
+import importlib
+
+from nominatim.api.logging import log
+from nominatim.api.connection import SearchConnection
+from nominatim.api.search.query import Phrase, QueryStruct
+
+class AbstractQueryAnalyzer(ABC):
+    """ Class for analysing incomming queries.
+
+        Query analyzers are tied to the tokenizer used on import.
+    """
+
+    @abstractmethod
+    async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct:
+        """ Analyze the given phrases and return the tokenized query.
+        """
+
+
+async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
+    """ Create a query analyzer for the tokenizer used by the database.
+    """
+    name = await conn.get_property('tokenizer')
+
+    src_file = Path(__file__).parent / f'{name}_tokenizer.py'
+    if not src_file.is_file():
+        log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
+        raise RuntimeError('Tokenizer not found')
+
+    module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
+
+    return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
diff --git a/test/python/api/search/test_query_analyzer_factory.py b/test/python/api/search/test_query_analyzer_factory.py
new file mode 100644 (file)
index 0000000..2d113e3
--- /dev/null
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for query analyzer creation.
+"""
+from pathlib import Path
+
+import pytest
+
+from nominatim.api import NominatimAPIAsync
+from nominatim.api.search.query_analyzer_factory import make_query_analyzer
+from nominatim.api.search.icu_tokenizer import ICUQueryAnalyzer
+
+@pytest.mark.asyncio
+async def test_import_icu_tokenizer(table_factory):
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT',
+                  content=(('tokenizer', 'icu'),
+                           ('tokenizer_import_normalisation', ':: lower();'),
+                           ('tokenizer_import_transliteration', "'1' > '/1/'; 'ä' > 'ä '")))
+
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    async with api.begin() as conn:
+        ana = await make_query_analyzer(conn)
+
+        assert isinstance(ana, ICUQueryAnalyzer)
+    await api.close()
+
+
+@pytest.mark.asyncio
+async def test_import_missing_property(table_factory):
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT')
+
+    async with api.begin() as conn:
+        with pytest.raises(ValueError, match='Property.*not found'):
+            await make_query_analyzer(conn)
+    await api.close()
+
+
+@pytest.mark.asyncio
+async def test_import_missing_module(table_factory):
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT',
+                  content=(('tokenizer', 'missing'),))
+
+    async with api.begin() as conn:
+        with pytest.raises(RuntimeError, match='Tokenizer not found'):
+            await make_query_analyzer(conn)
+    await api.close()
+