From 2448cf2a14b441f98c81913d25ef73ce620a3a48 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 22 May 2023 09:23:19 +0200 Subject: [PATCH 1/1] add factory for query analyzer --- nominatim/api/search/icu_tokenizer.py | 5 +- .../api/search/query_analyzer_factory.py | 43 ++++++++++++++ .../api/search/test_query_analyzer_factory.py | 57 +++++++++++++++++++ 3 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 nominatim/api/search/query_analyzer_factory.py create mode 100644 test/python/api/search/test_query_analyzer_factory.py diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index 14698a28..17e67905 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -21,10 +21,7 @@ from nominatim.typing import SaRow from nominatim.api.connection import SearchConnection from nominatim.api.logging import log from nominatim.api.search import query as qmod - -# XXX: TODO -class AbstractQueryAnalyzer: - pass +from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer DB_TO_TOKEN_TYPE = { diff --git a/nominatim/api/search/query_analyzer_factory.py b/nominatim/api/search/query_analyzer_factory.py new file mode 100644 index 00000000..9804f3ce --- /dev/null +++ b/nominatim/api/search/query_analyzer_factory.py @@ -0,0 +1,43 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Factory for creating a query analyzer for the configured tokenizer. +""" +from typing import List, cast +from abc import ABC, abstractmethod +from pathlib import Path +import importlib + +from nominatim.api.logging import log +from nominatim.api.connection import SearchConnection +from nominatim.api.search.query import Phrase, QueryStruct + +class AbstractQueryAnalyzer(ABC): + """ Class for analysing incomming queries. + + Query analyzers are tied to the tokenizer used on import. + """ + + @abstractmethod + async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct: + """ Analyze the given phrases and return the tokenized query. + """ + + +async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer: + """ Create a query analyzer for the tokenizer used by the database. + """ + name = await conn.get_property('tokenizer') + + src_file = Path(__file__).parent / f'{name}_tokenizer.py' + if not src_file.is_file(): + log().comment(f"No tokenizer named '{name}' available. Database not set up properly.") + raise RuntimeError('Tokenizer not found') + + module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer') + + return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn)) diff --git a/test/python/api/search/test_query_analyzer_factory.py b/test/python/api/search/test_query_analyzer_factory.py new file mode 100644 index 00000000..2d113e3e --- /dev/null +++ b/test/python/api/search/test_query_analyzer_factory.py @@ -0,0 +1,57 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for query analyzer creation. +""" +from pathlib import Path + +import pytest + +from nominatim.api import NominatimAPIAsync +from nominatim.api.search.query_analyzer_factory import make_query_analyzer +from nominatim.api.search.icu_tokenizer import ICUQueryAnalyzer + +@pytest.mark.asyncio +async def test_import_icu_tokenizer(table_factory): + table_factory('nominatim_properties', + definition='property TEXT, value TEXT', + content=(('tokenizer', 'icu'), + ('tokenizer_import_normalisation', ':: lower();'), + ('tokenizer_import_transliteration', "'1' > '/1/'; 'ä' > 'ä '"))) + + api = NominatimAPIAsync(Path('/invalid'), {}) + async with api.begin() as conn: + ana = await make_query_analyzer(conn) + + assert isinstance(ana, ICUQueryAnalyzer) + await api.close() + + +@pytest.mark.asyncio +async def test_import_missing_property(table_factory): + api = NominatimAPIAsync(Path('/invalid'), {}) + table_factory('nominatim_properties', + definition='property TEXT, value TEXT') + + async with api.begin() as conn: + with pytest.raises(ValueError, match='Property.*not found'): + await make_query_analyzer(conn) + await api.close() + + +@pytest.mark.asyncio +async def test_import_missing_module(table_factory): + api = NominatimAPIAsync(Path('/invalid'), {}) + table_factory('nominatim_properties', + definition='property TEXT, value TEXT', + content=(('tokenizer', 'missing'),)) + + async with api.begin() as conn: + with pytest.raises(RuntimeError, match='Tokenizer not found'): + await make_query_analyzer(conn) + await api.close() + -- 2.39.5