from ..db.connection import Connection
from ..data.place_info import PlaceInfo
+
class AbstractAnalyzer(ABC):
""" The analyzer provides the functions for analysing names and building
the token database.
def __enter__(self) -> 'AbstractAnalyzer':
return self
-
def __exit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
self.close()
-
@abstractmethod
def close(self) -> None:
""" Free all resources used by the analyzer.
"""
-
@abstractmethod
def get_word_token_info(self, words: List[str]) -> List[Tuple[str, str, int]]:
""" Return token information for the given list of words.
(original word, word token, word id).
"""
-
@abstractmethod
def normalize_postcode(self, postcode: str) -> str:
""" Convert the postcode to its standardized form.
The given postcode after normalization.
"""
-
@abstractmethod
def update_postcodes_from_db(self) -> None:
""" Update the tokenizer's postcode tokens from the current content
of the `location_postcode` table.
"""
-
@abstractmethod
def update_special_phrases(self,
phrases: Iterable[Tuple[str, str, str, str]],
ones that already exist.
"""
-
@abstractmethod
def add_country_names(self, country_code: str, names: Dict[str, str]) -> None:
""" Add the given names to the tokenizer's list of country tokens.
names: Dictionary of name type to name.
"""
-
@abstractmethod
def process_place(self, place: PlaceInfo) -> Any:
""" Extract tokens for the given place and compute the
"""
-
class AbstractTokenizer(ABC):
""" The tokenizer instance is the central instance of the tokenizer in
the system. There will only be a single instance of the tokenizer
tokenizers.
"""
-
@abstractmethod
def init_from_project(self, config: Configuration) -> None:
""" Initialise the tokenizer from an existing database setup.
config: Read-only object with configuration options.
"""
-
@abstractmethod
def finalize_import(self, config: Configuration) -> None:
""" This function is called at the very end of an import when all
config: Read-only object with configuration options.
"""
-
@abstractmethod
def update_sql_functions(self, config: Configuration) -> None:
""" Update the SQL part of the tokenizer. This function is called
config: Read-only object with configuration options.
"""
-
@abstractmethod
def check_database(self, config: Configuration) -> Optional[str]:
""" Check that the database is set up correctly and ready for being
how to resolve the issue. If everything is okay, return `None`.
"""
-
@abstractmethod
def update_statistics(self, config: Configuration, threads: int = 1) -> None:
""" Recompute any tokenizer statistics necessary for efficient lookup.
it to be called in order to work.
"""
-
@abstractmethod
def update_word_tokens(self) -> None:
""" Do house-keeping on the tokenizers internal data structures.
Remove unused word tokens, resort data etc.
"""
-
@abstractmethod
def name_analyzer(self) -> AbstractAnalyzer:
""" Create a new analyzer for tokenizing names and queries
call the close() function before destructing the analyzer.
"""
-
@abstractmethod
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
""" Return a list of the most frequent full words in the database.