]> git.openstreetmap.org Git - nominatim.git/commitdiff
add factory for query analyzer
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 22 May 2023 07:23:19 +0000 (09:23 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 22 May 2023 07:23:19 +0000 (09:23 +0200)
nominatim/api/search/icu_tokenizer.py
nominatim/api/search/query_analyzer_factory.py [new file with mode: 0644]
test/python/api/search/test_query_analyzer_factory.py [new file with mode: 0644]

index 14698a28867ca7ae0fc783f6b6e11385ffe45d8a..17e679057ecb5a4eb63daeb08f38e361f19a3ada 100644 (file)
@@ -21,10 +21,7 @@ from nominatim.typing import SaRow
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.search import query as qmod
-
-# XXX: TODO
-class AbstractQueryAnalyzer:
-    pass
+from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
 
 
 DB_TO_TOKEN_TYPE = {
diff --git a/nominatim/api/search/query_analyzer_factory.py b/nominatim/api/search/query_analyzer_factory.py
new file mode 100644 (file)
index 0000000..9804f3c
--- /dev/null
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Factory for creating a query analyzer for the configured tokenizer.
+"""
+from typing import List, cast
+from abc import ABC, abstractmethod
+from pathlib import Path
+import importlib
+
+from nominatim.api.logging import log
+from nominatim.api.connection import SearchConnection
+from nominatim.api.search.query import Phrase, QueryStruct
+
+class AbstractQueryAnalyzer(ABC):
+    """ Class for analysing incomming queries.
+
+        Query analyzers are tied to the tokenizer used on import.
+    """
+
+    @abstractmethod
+    async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct:
+        """ Analyze the given phrases and return the tokenized query.
+        """
+
+
+async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
+    """ Create a query analyzer for the tokenizer used by the database.
+    """
+    name = await conn.get_property('tokenizer')
+
+    src_file = Path(__file__).parent / f'{name}_tokenizer.py'
+    if not src_file.is_file():
+        log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
+        raise RuntimeError('Tokenizer not found')
+
+    module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
+
+    return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
diff --git a/test/python/api/search/test_query_analyzer_factory.py b/test/python/api/search/test_query_analyzer_factory.py
new file mode 100644 (file)
index 0000000..2d113e3
--- /dev/null
@@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for query analyzer creation.
+"""
+from pathlib import Path
+
+import pytest
+
+from nominatim.api import NominatimAPIAsync
+from nominatim.api.search.query_analyzer_factory import make_query_analyzer
+from nominatim.api.search.icu_tokenizer import ICUQueryAnalyzer
+
+@pytest.mark.asyncio
+async def test_import_icu_tokenizer(table_factory):
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT',
+                  content=(('tokenizer', 'icu'),
+                           ('tokenizer_import_normalisation', ':: lower();'),
+                           ('tokenizer_import_transliteration', "'1' > '/1/'; 'ä' > 'ä '")))
+
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    async with api.begin() as conn:
+        ana = await make_query_analyzer(conn)
+
+        assert isinstance(ana, ICUQueryAnalyzer)
+    await api.close()
+
+
+@pytest.mark.asyncio
+async def test_import_missing_property(table_factory):
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT')
+
+    async with api.begin() as conn:
+        with pytest.raises(ValueError, match='Property.*not found'):
+            await make_query_analyzer(conn)
+    await api.close()
+
+
+@pytest.mark.asyncio
+async def test_import_missing_module(table_factory):
+    api = NominatimAPIAsync(Path('/invalid'), {})
+    table_factory('nominatim_properties',
+                  definition='property TEXT, value TEXT',
+                  content=(('tokenizer', 'missing'),))
+
+    async with api.begin() as conn:
+        with pytest.raises(RuntimeError, match='Tokenizer not found'):
+            await make_query_analyzer(conn)
+    await api.close()
+