1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Common data types and protocols for analysers.
10 from typing import Mapping, List, Any
12 from nominatim.typing import Protocol
14 class Analyser(Protocol):
15 """ The `create()` function of an analysis module needs to return an
16 object that implements the following functions.
19 def normalize(self, name: str) -> str:
20 """ Return the normalized form of the name. This is the standard form
21 from which possible variants for the name can be derived.
24 def get_variants_ascii(self, norm_name: str) -> List[str]:
25 """ Compute the spelling variants for the given normalized name
26 and transliterate the result.
29 class AnalysisModule(Protocol):
30 """ Protocol for analysis modules.
33 def configure(self, rules: Mapping[str, Any],
34 normalizer: Any, transliterator: Any) -> Any:
35 """ Prepare the configuration of the analysis module.
36 This function should prepare all data that can be shared
37 between instances of this analyser.
40 rules: A dictionary with the additional configuration options
41 as specified in the tokenizer configuration.
42 normalizer: an ICU Transliterator with the compiled normalization
44 transliterator: an ICU tranliterator with the compiled
45 transliteration rules.
48 A data object with the configuration that was set up. May be
49 used freely by the analysis module as needed.
52 def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyser:
53 """ Create a new instance of the analyser.
54 A separate instance of the analyser is created for each thread
55 when used in multi-threading context.
58 normalizer: an ICU Transliterator with the compiled normalization
60 transliterator: an ICU tranliterator with the compiled
61 transliteration rules.
62 config: The object that was returned by the call to configure().
65 A new analyzer instance. This must be a class that implements
66 the Analyser protocol.