1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Factory for creating a query analyzer for the configured tokenizer.
10 from typing import List, cast, TYPE_CHECKING
11 from abc import ABC, abstractmethod
12 from pathlib import Path
15 from ..logging import log
16 from ..connection import SearchConnection
19 from .query import Phrase, QueryStruct
22 class AbstractQueryAnalyzer(ABC):
23 """ Class for analysing incoming queries.
25 Query analyzers are tied to the tokenizer used on import.
29 async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
30 """ Analyze the given phrases and return the tokenized query.
34 def normalize_text(self, text: str) -> str:
35 """ Bring the given text into a normalized form. That is the
36 standardized form search will work with. All information removed
37 at this stage is inevitably lost.
41 async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
42 """ Create a query analyzer for the tokenizer used by the database.
44 name = await conn.get_property('tokenizer')
46 src_file = Path(__file__).parent / f'{name}_tokenizer.py'
47 if not src_file.is_file():
48 log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
49 raise RuntimeError('Tokenizer not found')
51 module = importlib.import_module(f'nominatim_api.search.{name}_tokenizer')
53 return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))