+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Abstract class defintions for tokenizers. These base classes are here
mainly for documentation purposes.
def close(self) -> None:
""" Free all resources used by the analyzer.
"""
- pass
@abstractmethod
found for the given words. Each list entry is a tuple of
(original word, word token, word id).
"""
- pass
@abstractmethod
Returns:
The given postcode after normalization.
"""
- pass
@abstractmethod
""" Update the tokenizer's postcode tokens from the current content
of the `location_postcode` table.
"""
- pass
@abstractmethod
When false, just add the given phrases to the
ones that already exist.
"""
- pass
@abstractmethod
refer to.
names: Dictionary of name type to name.
"""
- pass
@abstractmethod
TODO: can we move the init_db parameter somewhere else?
"""
- pass
@abstractmethod
- def init_from_project(self) -> None:
+ def init_from_project(self, config: Configuration) -> None:
""" Initialise the tokenizer from an existing database setup.
The function should load all previously saved configuration from
the project directory and/or the property table.
+
+ Arguments:
+ config: Read-only object with configuration options.
"""
- pass
@abstractmethod
Arguments:
config: Read-only object with configuration options.
"""
- pass
@abstractmethod
Arguments:
config: Read-only object with configuration options.
"""
- pass
@abstractmethod
- def check_database(self) -> str:
+ def check_database(self, config: Configuration) -> str:
""" Check that the database is set up correctly and ready for being
queried.
+ Arguments:
+ config: Read-only object with configuration options.
+
Returns:
If an issue was found, return an error message with the
description of the issue as well as hints for the user on
- how to resolve the issue.
+ how to resolve the issue. If everything is okay, return `None`.
+ """
- Return `None`, if no issue was found.
+
+ @abstractmethod
+ def update_statistics(self) -> None:
+ """ Recompute any tokenizer statistics necessary for efficient lookup.
+ This function is meant to be called from time to time by the user
+ to improve performance. However, the tokenizer must not depend on
+ it to be called in order to work.
+ """
+
+
+ @abstractmethod
+ def update_word_tokens(self) -> None:
+ """ Do house-keeping on the tokenizers internal data structures.
+ Remove unused word tokens, resort data etc.
"""
- pass
@abstractmethod
When used outside the with construct, the caller must ensure to
call the close() function before destructing the analyzer.
"""
- pass