X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/51b6d16dc6363577795ec6baffb50150f220fc77..aff43fb1a3b193f853ddd74a490cdff529d0af67:/nominatim/tokenizer/token_analysis/base.py diff --git a/nominatim/tokenizer/token_analysis/base.py b/nominatim/tokenizer/token_analysis/base.py index cbd445c8..c7ec61c9 100644 --- a/nominatim/tokenizer/token_analysis/base.py +++ b/nominatim/tokenizer/token_analysis/base.py @@ -28,8 +28,8 @@ class Analyzer(Protocol): Returns: ID string with a canonical form of the name. The string may - be empty, when the analyzer cannot analyze the name at all, - for example because the character set in use does not match. + be empty, when the analyzer cannot analyze the name at all, + for example because the character set in use does not match. """ def compute_variants(self, canonical_id: str) -> List[str]: @@ -42,17 +42,20 @@ class Analyzer(Protocol): Returns: A list of possible spelling variants. All strings must have - been transformed with the global normalizer and - transliterator ICU rules. Otherwise they cannot be matched - against the query later. - The list may be empty, when there are no useful - spelling variants. This may happen, when an analyzer only - produces extra variants to the canonical spelling. + been transformed with the global normalizer and + transliterator ICU rules. Otherwise they cannot be matched + against the input by the query frontend. + The list may be empty, when there are no useful + spelling variants. This may happen when an analyzer only + usually outputs additional variants to the canonical spelling + and there are no such variants. """ class AnalysisModule(Protocol): - """ Protocol for analysis modules. + """ The setup of the token analysis is split into two parts: + configuration and analyser factory. A token analysis module must + therefore implement the two functions here described. """ def configure(self, rules: Mapping[str, Any], @@ -64,14 +67,15 @@ class AnalysisModule(Protocol): Arguments: rules: A dictionary with the additional configuration options as specified in the tokenizer configuration. - normalizer: an ICU Transliterator with the compiled normalization - rules. - transliterator: an ICU transliterator with the compiled - transliteration rules. + normalizer: an ICU Transliterator with the compiled + global normalization rules. + transliterator: an ICU Transliterator with the compiled + global transliteration rules. Returns: - A data object with the configuration that was set up. May be - used freely by the analysis module as needed. + A data object with configuration data. This will be handed + as is into the `create()` function and may be + used freely by the analysis module as needed. """ def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer: @@ -82,11 +86,11 @@ class AnalysisModule(Protocol): Arguments: normalizer: an ICU Transliterator with the compiled normalization rules. - transliterator: an ICU tranliterator with the compiled + transliterator: an ICU Transliterator with the compiled transliteration rules. config: The object that was returned by the call to configure(). Returns: A new analyzer instance. This must be an object that implements - the Analyzer protocol. + the Analyzer protocol. """