X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/6e89310a9285f1ad15d8002bf68f578eada367a0..7c9002cae7f950a99f3045d3058ed61ef79fe044:/src/nominatim_db/tokenizer/base.py?ds=sidebyside diff --git a/src/nominatim_db/tokenizer/base.py b/src/nominatim_db/tokenizer/base.py index 0ca7e1d1..4b96cb23 100644 --- a/src/nominatim_db/tokenizer/base.py +++ b/src/nominatim_db/tokenizer/base.py @@ -12,11 +12,12 @@ from abc import ABC, abstractmethod from typing import List, Tuple, Dict, Any, Optional, Iterable from pathlib import Path -from nominatim_core.typing import Protocol -from nominatim_core.config import Configuration -from nominatim_core.db.connection import Connection +from ..typing import Protocol +from ..config import Configuration +from ..db.connection import Connection from ..data.place_info import PlaceInfo + class AbstractAnalyzer(ABC): """ The analyzer provides the functions for analysing names and building the token database. @@ -28,17 +29,14 @@ class AbstractAnalyzer(ABC): def __enter__(self) -> 'AbstractAnalyzer': return self - def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None: self.close() - @abstractmethod def close(self) -> None: """ Free all resources used by the analyzer. """ - @abstractmethod def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, int]]: """ Return token information for the given list of words. @@ -57,7 +55,6 @@ class AbstractAnalyzer(ABC): (original word, word token, word id). """ - @abstractmethod def normalize_postcode(self, postcode: str) -> str: """ Convert the postcode to its standardized form. @@ -72,14 +69,12 @@ class AbstractAnalyzer(ABC): The given postcode after normalization. """ - @abstractmethod def update_postcodes_from_db(self) -> None: """ Update the tokenizer's postcode tokens from the current content of the `location_postcode` table. """ - @abstractmethod def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]], @@ -95,7 +90,6 @@ class AbstractAnalyzer(ABC): ones that already exist. """ - @abstractmethod def add_country_names(self, country_code: str, names: Dict[str, str]) -> None: """ Add the given names to the tokenizer's list of country tokens. @@ -106,7 +100,6 @@ class AbstractAnalyzer(ABC): names: Dictionary of name type to name. """ - @abstractmethod def process_place(self, place: PlaceInfo) -> Any: """ Extract tokens for the given place and compute the @@ -122,7 +115,6 @@ class AbstractAnalyzer(ABC): """ - class AbstractTokenizer(ABC): """ The tokenizer instance is the central instance of the tokenizer in the system. There will only be a single instance of the tokenizer @@ -146,7 +138,6 @@ class AbstractTokenizer(ABC): tokenizers. """ - @abstractmethod def init_from_project(self, config: Configuration) -> None: """ Initialise the tokenizer from an existing database setup. @@ -158,7 +149,6 @@ class AbstractTokenizer(ABC): config: Read-only object with configuration options. """ - @abstractmethod def finalize_import(self, config: Configuration) -> None: """ This function is called at the very end of an import when all @@ -170,7 +160,6 @@ class AbstractTokenizer(ABC): config: Read-only object with configuration options. """ - @abstractmethod def update_sql_functions(self, config: Configuration) -> None: """ Update the SQL part of the tokenizer. This function is called @@ -184,7 +173,6 @@ class AbstractTokenizer(ABC): config: Read-only object with configuration options. """ - @abstractmethod def check_database(self, config: Configuration) -> Optional[str]: """ Check that the database is set up correctly and ready for being @@ -199,7 +187,6 @@ class AbstractTokenizer(ABC): how to resolve the issue. If everything is okay, return `None`. """ - @abstractmethod def update_statistics(self, config: Configuration, threads: int = 1) -> None: """ Recompute any tokenizer statistics necessary for efficient lookup. @@ -208,14 +195,12 @@ class AbstractTokenizer(ABC): it to be called in order to work. """ - @abstractmethod def update_word_tokens(self) -> None: """ Do house-keeping on the tokenizers internal data structures. Remove unused word tokens, resort data etc. """ - @abstractmethod def name_analyzer(self) -> AbstractAnalyzer: """ Create a new analyzer for tokenizing names and queries @@ -231,7 +216,6 @@ class AbstractTokenizer(ABC): call the close() function before destructing the analyzer. """ - @abstractmethod def most_frequent_words(self, conn: Connection, num: int) -> List[str]: """ Return a list of the most frequent full words in the database.