Merge pull request #3178 from lonvia/library-documentation

[nominatim.git] / nominatim / tokenizer / token_analysis / base.py
diff --git a/nominatim/tokenizer/token_analysis/base.py b/nominatim/tokenizer/token_analysis/base.py

index cbd445c80fefdd079fe9692f2084ed5a696da8f6..c7ec61c9fd683c7b105b2ccb3c750995dd435e09 100644 (file)
--- a/nominatim/tokenizer/token_analysis/base.py
+++ b/nominatim/tokenizer/token_analysis/base.py
@@ -28,8 +28,8 @@ class Analyzer(Protocol):
  
              Returns:
                  ID string with a canonical form of the name. The string may
-                be empty, when the analyzer cannot analyze the name at all,
-                for example because the character set in use does not match.
+                    be empty, when the analyzer cannot analyze the name at all,
+                    for example because the character set in use does not match.
          """
  
      def compute_variants(self, canonical_id: str) -> List[str]:
@@ -42,17 +42,20 @@ class Analyzer(Protocol):
  
              Returns:
                  A list of possible spelling variants. All strings must have
-                been transformed with the global normalizer and
-                transliterator ICU rules. Otherwise they cannot be matched
-                against the query later.
-                The list may be empty, when there are no useful
-                spelling variants. This may happen, when an analyzer only
-                produces extra variants to the canonical spelling.
+                    been transformed with the global normalizer and
+                    transliterator ICU rules. Otherwise they cannot be matched
+                    against the input by the query frontend.
+                    The list may be empty, when there are no useful
+                    spelling variants. This may happen when an analyzer only
+                    usually outputs additional variants to the canonical spelling
+                    and there are no such variants.
          """
  
  
  class AnalysisModule(Protocol):
-    """ Protocol for analysis modules.
+    """ The setup of the token analysis is split into two parts:
+        configuration and analyser factory. A token analysis module must
+        therefore implement the two functions here described.
      """
  
      def configure(self, rules: Mapping[str, Any],
@@ -64,14 +67,15 @@ class AnalysisModule(Protocol):
              Arguments:
                  rules: A dictionary with the additional configuration options
                         as specified in the tokenizer configuration.
-                normalizer: an ICU Transliterator with the compiled normalization
-                            rules.
-                transliterator: an ICU transliterator with the compiled
-                                transliteration rules.
+                normalizer: an ICU Transliterator with the compiled
+                            global normalization rules.
+                transliterator: an ICU Transliterator with the compiled
+                                global transliteration rules.
  
              Returns:
-                A data object with the configuration that was set up. May be
-                used freely by the analysis module as needed.
+                A data object with configuration data. This will be handed
+                    as is into the `create()` function and may be
+                    used freely by the analysis module as needed.
          """
  
      def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
@@ -82,11 +86,11 @@ class AnalysisModule(Protocol):
              Arguments:
                  normalizer: an ICU Transliterator with the compiled normalization
                              rules.
-                transliterator: an ICU tranliterator with the compiled
+                transliterator: an ICU Transliterator with the compiled
                                  transliteration rules.
                  config: The object that was returned by the call to configure().
  
              Returns:
                  A new analyzer instance. This must be an object that implements
-                the Analyzer protocol.
+                    the Analyzer protocol.
          """