update to modern mkdocstrings python handler

author Sarah Hoffmann <lonvia@denofr.de>

Fri, 18 Aug 2023 15:28:45 +0000 (17:28 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Fri, 25 Aug 2023 19:40:20 +0000 (21:40 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Fri, 18 Aug 2023 15:28:45 +0000 (17:28 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Fri, 25 Aug 2023 19:40:20 +0000 (21:40 +0200)
diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt

index edfc882942635fb81ada651bfd7fadca31353c69..562774cfa0a664ceeee9fb1ae8d75f6c3b05292f 100644 (file)
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -25,10 +25,10 @@ endforeach()
  ADD_CUSTOM_TARGET(doc
     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
     COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
-   COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+   COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
  )
  
  ADD_CUSTOM_TARGET(serve-doc
-    COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
-    WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+    COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
  )
diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md

index 6199ea4252469537a0c3953415cff05795735cdd..4a9d77bd6c1633344c7cc5e394f401047b7daf72 100644 (file)
--- a/docs/customize/Tokenizers.md
+++ b/docs/customize/Tokenizers.md
@@ -178,64 +178,72 @@ The following is a list of sanitizers that are shipped with Nominatim.
  ::: nominatim.tokenizer.sanitizers.split_name_list
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  ##### strip-brace-terms
  
  ::: nominatim.tokenizer.sanitizers.strip_brace_terms
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  ##### tag-analyzer-by-language
  
  ::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  ##### clean-housenumbers
  
  ::: nominatim.tokenizer.sanitizers.clean_housenumbers
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  ##### clean-postcodes
  
  ::: nominatim.tokenizer.sanitizers.clean_postcodes
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  ##### clean-tiger-tags
  
  ::: nominatim.tokenizer.sanitizers.clean_tiger_tags
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  #### delete-tags
  
  ::: nominatim.tokenizer.sanitizers.delete_tags
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  #### tag-japanese
  
  ::: nominatim.tokenizer.sanitizers.tag_japanese
      selection:
          members: False
-    rendering:
+    options:
          heading_level: 6
+        docstring_section_style: spacy
  
  #### Token Analysis
  
diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md

index d0369ea13839e5123128b4360e9fae8b21776807..643064549cae9e3291fc9ecc9dca62ad8a687f0e 100644 (file)
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -47,8 +47,8 @@ depending on your choice of webserver framework:
  The documentation is built with mkdocs:
  
  * [mkdocs](https://www.mkdocs.org/) >= 1.1.2
-* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
-* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
+* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
+* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
  
  ### Installing prerequisites on Ubuntu/Debian
  
diff --git a/docs/develop/ICU-Tokenizer-Modules.md b/docs/develop/ICU-Tokenizer-Modules.md

index 2cf30a5699f7863db7db1f7eb04f7fa2ae3bf1b5..f1853006f00400ed772126a10a4901e1550bd0eb 100644 (file)
--- a/docs/develop/ICU-Tokenizer-Modules.md
+++ b/docs/develop/ICU-Tokenizer-Modules.md
@@ -53,21 +53,18 @@ the function.
  ### Sanitizer configuration
  
  ::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
-    rendering:
-        show_source: no
-        heading_level: 6
+    options:
+        heading_level: 3
  
  ### The main filter function of the sanitizer
  
  The filter function receives a single object of type `ProcessInfo`
  which has with three members:
  
- * `place`: read-only information about the place being processed.
+ * `place: PlaceInfo`: read-only information about the place being processed.
     See PlaceInfo below.
- * `names`: The current list of names for the place. Each name is a
-   PlaceName object.
- * `address`: The current list of address names for the place. Each name
-   is a PlaceName object.
+ * `names: List[PlaceName]`: The current list of names for the place.
+ * `address: List[PlaceName]`: The current list of address names for the place.
  
  While the `place` member is provided for information only, the `names` and
  `address` lists are meant to be manipulated by the sanitizer. It may add and
@@ -77,17 +74,15 @@ adding extra attributes) or completely replace the list with a different one.
  #### PlaceInfo - information about the place
  
  ::: nominatim.data.place_info.PlaceInfo
-    rendering:
-        show_source: no
-        heading_level: 6
+    options:
+        heading_level: 3
  
  
  #### PlaceName - extended naming information
  
  ::: nominatim.data.place_name.PlaceName
-    rendering:
-        show_source: no
-        heading_level: 6
+    options:
+        heading_level: 3
  
  
  ### Example: Filter for US street prefixes
@@ -145,15 +140,13 @@ They can be found in the directory
  ## Custom token analysis module
  
  ::: nominatim.tokenizer.token_analysis.base.AnalysisModule
-    rendering:
-        show_source: no
-        heading_level: 6
+    options:
+        heading_level: 3
  
  
  ::: nominatim.tokenizer.token_analysis.base.Analyzer
-    rendering:
-        show_source: no
-        heading_level: 6
+    options:
+        heading_level: 3
  
  ### Example: Creating acronym variants for long names
  
diff --git a/docs/develop/Tokenizers.md b/docs/develop/Tokenizers.md

index eb0d4ea2e513b030a9dd3e09262196e5875d83ed..c82071b62aa82dd3d8246307472528bbbc441174 100644 (file)
--- a/docs/develop/Tokenizers.md
+++ b/docs/develop/Tokenizers.md
@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
  and implement the abstract functions defined there.
  
  ::: nominatim.tokenizer.base.AbstractTokenizer
-    rendering:
-        heading_level: 4
+    options:
+        heading_level: 3
  
  ### Python Analyzer Class
  
  ::: nominatim.tokenizer.base.AbstractAnalyzer
-    rendering:
-        heading_level: 4
+    options:
+        heading_level: 3
  
  ### PL/pgSQL Functions
  
diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml

index c2a8d393302bb56d93e9397c5cfd697f9b81c7df..92ad92f549539c0bd3155114800ec117aaaacfac 100644 (file)
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -59,7 +59,8 @@ plugins:
      - search
      - mkdocstrings:
          handlers:
-          python-legacy:
-            rendering:
-              show_source: false
-              show_signature_annotations: false
+          python:
+            paths: ["${PROJECT_SOURCE_DIR}"]
+            options:
+              show_source: False
+              show_bases: False
diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py

index f0fd9dd0e178c5a1319ce6e79a560cb438567f12..51afd540cb2b3e4c44250033bb1fd928d144b9bb 100644 (file)
--- a/nominatim/tokenizer/base.py
+++ b/nominatim/tokenizer/base.py
@@ -53,8 +53,8 @@ class AbstractAnalyzer(ABC):
  
              Returns:
                  The function returns the list of all tuples that could be
-                found for the given words. Each list entry is a tuple of
-                (original word, word token, word id).
+                    found for the given words. Each list entry is a tuple of
+                    (original word, word token, word id).
          """
  
  
@@ -118,7 +118,7 @@ class AbstractAnalyzer(ABC):
  
              Returns:
                  A JSON-serialisable structure that will be handed into
-                the database via the `token_info` field.
+                    the database via the `token_info` field.
          """
  
  
@@ -144,8 +144,6 @@ class AbstractTokenizer(ABC):
                  tables should be skipped. This option is only required for
                  migration purposes and can be safely ignored by custom
                  tokenizers.
-
-            TODO: can we move the init_db parameter somewhere else?
          """
  
  
@@ -197,8 +195,8 @@ class AbstractTokenizer(ABC):
  
              Returns:
                If an issue was found, return an error message with the
-              description of the issue as well as hints for the user on
-              how to resolve the issue. If everything is okay, return `None`.
+                  description of the issue as well as hints for the user on
+                  how to resolve the issue. If everything is okay, return `None`.
          """
  
  
@@ -236,8 +234,12 @@ class AbstractTokenizer(ABC):
  
      @abstractmethod
      def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
-        """ Return a list of the `num` most frequent full words
-            in the database.
+        """ Return a list of the most frequent full words in the database.
+
+            Arguments:
+              conn: Open connection to the database which may be used to
+                    retrive the words.
+              num: Maximum number of words to return.
          """
  
  
diff --git a/nominatim/tokenizer/sanitizers/config.py b/nominatim/tokenizer/sanitizers/config.py

index 9b4f763ac81780508efbef1c9a66df14fd013cd0..79396a75a6893fe968e6c613873097301a2cedc0 100644 (file)
--- a/nominatim/tokenizer/sanitizers/config.py
+++ b/nominatim/tokenizer/sanitizers/config.py
@@ -41,9 +41,9 @@ class SanitizerConfig(_BaseUserDict):
  
              Returns:
                  If the parameter value is a simple string, it is returned as a
-                one-item list. If the parameter value does not exist, the given
-                default is returned. If the parameter value is a list, it is
-                checked to contain only strings before being returned.
+                    one-item list. If the parameter value does not exist, the given
+                    default is returned. If the parameter value is a list, it is
+                    checked to contain only strings before being returned.
          """
          values = self.data.get(param, None)
  
@@ -94,10 +94,10 @@ class SanitizerConfig(_BaseUserDict):
  
              Returns:
                  A regular expression pattern which can be used to
-                split a string. The regular expression makes sure that the
-                resulting names are stripped and that repeated delimiters
-                are ignored. It may still create empty fields on occasion. The
-                code needs to filter those.
+                    split a string. The regular expression makes sure that the
+                    resulting names are stripped and that repeated delimiters
+                    are ignored. It may still create empty fields on occasion. The
+                    code needs to filter those.
          """
          delimiter_set = set(self.data.get('delimiters', default))
          if not delimiter_set:
@@ -133,8 +133,8 @@ class SanitizerConfig(_BaseUserDict):
  
              Returns:
                  A filter function that takes a target string as the argument and
-                returns True if it fully matches any of the regular expressions
-                otherwise returns False.
+                    returns True if it fully matches any of the regular expressions
+                    otherwise returns False.
          """
          filters = self.get_string_list(param) or default
  
diff --git a/nominatim/tokenizer/token_analysis/base.py b/nominatim/tokenizer/token_analysis/base.py

index 68046f9621306b0341366702ce81b43b640e922e..c7ec61c9fd683c7b105b2ccb3c750995dd435e09 100644 (file)
--- a/nominatim/tokenizer/token_analysis/base.py
+++ b/nominatim/tokenizer/token_analysis/base.py
@@ -28,8 +28,8 @@ class Analyzer(Protocol):
  
              Returns:
                  ID string with a canonical form of the name. The string may
-                be empty, when the analyzer cannot analyze the name at all,
-                for example because the character set in use does not match.
+                    be empty, when the analyzer cannot analyze the name at all,
+                    for example because the character set in use does not match.
          """
  
      def compute_variants(self, canonical_id: str) -> List[str]:
@@ -42,13 +42,13 @@ class Analyzer(Protocol):
  
              Returns:
                  A list of possible spelling variants. All strings must have
-                been transformed with the global normalizer and
-                transliterator ICU rules. Otherwise they cannot be matched
-                against the input by the query frontend.
-                The list may be empty, when there are no useful
-                spelling variants. This may happen when an analyzer only
-                usually outputs additional variants to the canonical spelling
-                and there are no such variants.
+                    been transformed with the global normalizer and
+                    transliterator ICU rules. Otherwise they cannot be matched
+                    against the input by the query frontend.
+                    The list may be empty, when there are no useful
+                    spelling variants. This may happen when an analyzer only
+                    usually outputs additional variants to the canonical spelling
+                    and there are no such variants.
          """
  
  
@@ -74,8 +74,8 @@ class AnalysisModule(Protocol):
  
              Returns:
                  A data object with configuration data. This will be handed
-                as is into the `create()` function and may be
-                used freely by the analysis module as needed.
+                    as is into the `create()` function and may be
+                    used freely by the analysis module as needed.
          """
  
      def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
@@ -92,5 +92,5 @@ class AnalysisModule(Protocol):
  
              Returns:
                  A new analyzer instance. This must be an object that implements
-                the Analyzer protocol.
+                    the Analyzer protocol.
          """
author	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 18 Aug 2023 15:28:45 +0000 (17:28 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Fri, 25 Aug 2023 19:40:20 +0000 (21:40 +0200)
docs/CMakeLists.txt		patch \| blob \| history
docs/customize/Tokenizers.md		patch \| blob \| history
docs/develop/Development-Environment.md		patch \| blob \| history
docs/develop/ICU-Tokenizer-Modules.md		patch \| blob \| history
docs/develop/Tokenizers.md		patch \| blob \| history
docs/mkdocs.yml		patch \| blob \| history
nominatim/tokenizer/base.py		patch \| blob \| history
nominatim/tokenizer/sanitizers/config.py		patch \| blob \| history
nominatim/tokenizer/token_analysis/base.py		patch \| blob \| history