From: Sarah Hoffmann Date: Fri, 18 Aug 2023 15:28:45 +0000 (+0200) Subject: update to modern mkdocstrings python handler X-Git-Tag: v4.3.0~13^2~16 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/d3372e69eca75aad7a7decc17b5fd7a5ffc1b1f8?ds=sidebyside;hp=d5b6042118504e6419a557c99685381914161732 update to modern mkdocstrings python handler --- diff --git a/docs/CMakeLists.txt b/docs/CMakeLists.txt index edfc8829..562774cf 100644 --- a/docs/CMakeLists.txt +++ b/docs/CMakeLists.txt @@ -25,10 +25,10 @@ endforeach() ADD_CUSTOM_TARGET(doc COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md - COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml + COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml ) ADD_CUSTOM_TARGET(serve-doc - COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve - WORKING_DIRECTORY ${PROJECT_BINARY_DIR} + COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} ) diff --git a/docs/customize/Tokenizers.md b/docs/customize/Tokenizers.md index 6199ea42..4a9d77bd 100644 --- a/docs/customize/Tokenizers.md +++ b/docs/customize/Tokenizers.md @@ -178,64 +178,72 @@ The following is a list of sanitizers that are shipped with Nominatim. ::: nominatim.tokenizer.sanitizers.split_name_list selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy ##### strip-brace-terms ::: nominatim.tokenizer.sanitizers.strip_brace_terms selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy ##### tag-analyzer-by-language ::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy ##### clean-housenumbers ::: nominatim.tokenizer.sanitizers.clean_housenumbers selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy ##### clean-postcodes ::: nominatim.tokenizer.sanitizers.clean_postcodes selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy ##### clean-tiger-tags ::: nominatim.tokenizer.sanitizers.clean_tiger_tags selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy #### delete-tags ::: nominatim.tokenizer.sanitizers.delete_tags selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy #### tag-japanese ::: nominatim.tokenizer.sanitizers.tag_japanese selection: members: False - rendering: + options: heading_level: 6 + docstring_section_style: spacy #### Token Analysis diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index d0369ea1..64306454 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -47,8 +47,8 @@ depending on your choice of webserver framework: The documentation is built with mkdocs: * [mkdocs](https://www.mkdocs.org/) >= 1.1.2 -* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16 -* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/) +* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18 +* [mkdocstrings-python](https://mkdocstrings.github.io/python/) ### Installing prerequisites on Ubuntu/Debian diff --git a/docs/develop/ICU-Tokenizer-Modules.md b/docs/develop/ICU-Tokenizer-Modules.md index 2cf30a56..f1853006 100644 --- a/docs/develop/ICU-Tokenizer-Modules.md +++ b/docs/develop/ICU-Tokenizer-Modules.md @@ -53,21 +53,18 @@ the function. ### Sanitizer configuration ::: nominatim.tokenizer.sanitizers.config.SanitizerConfig - rendering: - show_source: no - heading_level: 6 + options: + heading_level: 3 ### The main filter function of the sanitizer The filter function receives a single object of type `ProcessInfo` which has with three members: - * `place`: read-only information about the place being processed. + * `place: PlaceInfo`: read-only information about the place being processed. See PlaceInfo below. - * `names`: The current list of names for the place. Each name is a - PlaceName object. - * `address`: The current list of address names for the place. Each name - is a PlaceName object. + * `names: List[PlaceName]`: The current list of names for the place. + * `address: List[PlaceName]`: The current list of address names for the place. While the `place` member is provided for information only, the `names` and `address` lists are meant to be manipulated by the sanitizer. It may add and @@ -77,17 +74,15 @@ adding extra attributes) or completely replace the list with a different one. #### PlaceInfo - information about the place ::: nominatim.data.place_info.PlaceInfo - rendering: - show_source: no - heading_level: 6 + options: + heading_level: 3 #### PlaceName - extended naming information ::: nominatim.data.place_name.PlaceName - rendering: - show_source: no - heading_level: 6 + options: + heading_level: 3 ### Example: Filter for US street prefixes @@ -145,15 +140,13 @@ They can be found in the directory ## Custom token analysis module ::: nominatim.tokenizer.token_analysis.base.AnalysisModule - rendering: - show_source: no - heading_level: 6 + options: + heading_level: 3 ::: nominatim.tokenizer.token_analysis.base.Analyzer - rendering: - show_source: no - heading_level: 6 + options: + heading_level: 3 ### Example: Creating acronym variants for long names diff --git a/docs/develop/Tokenizers.md b/docs/develop/Tokenizers.md index eb0d4ea2..c82071b6 100644 --- a/docs/develop/Tokenizers.md +++ b/docs/develop/Tokenizers.md @@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer` and implement the abstract functions defined there. ::: nominatim.tokenizer.base.AbstractTokenizer - rendering: - heading_level: 4 + options: + heading_level: 3 ### Python Analyzer Class ::: nominatim.tokenizer.base.AbstractAnalyzer - rendering: - heading_level: 4 + options: + heading_level: 3 ### PL/pgSQL Functions diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index c2a8d393..92ad92f5 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -59,7 +59,8 @@ plugins: - search - mkdocstrings: handlers: - python-legacy: - rendering: - show_source: false - show_signature_annotations: false + python: + paths: ["${PROJECT_SOURCE_DIR}"] + options: + show_source: False + show_bases: False diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py index f0fd9dd0..51afd540 100644 --- a/nominatim/tokenizer/base.py +++ b/nominatim/tokenizer/base.py @@ -53,8 +53,8 @@ class AbstractAnalyzer(ABC): Returns: The function returns the list of all tuples that could be - found for the given words. Each list entry is a tuple of - (original word, word token, word id). + found for the given words. Each list entry is a tuple of + (original word, word token, word id). """ @@ -118,7 +118,7 @@ class AbstractAnalyzer(ABC): Returns: A JSON-serialisable structure that will be handed into - the database via the `token_info` field. + the database via the `token_info` field. """ @@ -144,8 +144,6 @@ class AbstractTokenizer(ABC): tables should be skipped. This option is only required for migration purposes and can be safely ignored by custom tokenizers. - - TODO: can we move the init_db parameter somewhere else? """ @@ -197,8 +195,8 @@ class AbstractTokenizer(ABC): Returns: If an issue was found, return an error message with the - description of the issue as well as hints for the user on - how to resolve the issue. If everything is okay, return `None`. + description of the issue as well as hints for the user on + how to resolve the issue. If everything is okay, return `None`. """ @@ -236,8 +234,12 @@ class AbstractTokenizer(ABC): @abstractmethod def most_frequent_words(self, conn: Connection, num: int) -> List[str]: - """ Return a list of the `num` most frequent full words - in the database. + """ Return a list of the most frequent full words in the database. + + Arguments: + conn: Open connection to the database which may be used to + retrive the words. + num: Maximum number of words to return. """ diff --git a/nominatim/tokenizer/sanitizers/config.py b/nominatim/tokenizer/sanitizers/config.py index 9b4f763a..79396a75 100644 --- a/nominatim/tokenizer/sanitizers/config.py +++ b/nominatim/tokenizer/sanitizers/config.py @@ -41,9 +41,9 @@ class SanitizerConfig(_BaseUserDict): Returns: If the parameter value is a simple string, it is returned as a - one-item list. If the parameter value does not exist, the given - default is returned. If the parameter value is a list, it is - checked to contain only strings before being returned. + one-item list. If the parameter value does not exist, the given + default is returned. If the parameter value is a list, it is + checked to contain only strings before being returned. """ values = self.data.get(param, None) @@ -94,10 +94,10 @@ class SanitizerConfig(_BaseUserDict): Returns: A regular expression pattern which can be used to - split a string. The regular expression makes sure that the - resulting names are stripped and that repeated delimiters - are ignored. It may still create empty fields on occasion. The - code needs to filter those. + split a string. The regular expression makes sure that the + resulting names are stripped and that repeated delimiters + are ignored. It may still create empty fields on occasion. The + code needs to filter those. """ delimiter_set = set(self.data.get('delimiters', default)) if not delimiter_set: @@ -133,8 +133,8 @@ class SanitizerConfig(_BaseUserDict): Returns: A filter function that takes a target string as the argument and - returns True if it fully matches any of the regular expressions - otherwise returns False. + returns True if it fully matches any of the regular expressions + otherwise returns False. """ filters = self.get_string_list(param) or default diff --git a/nominatim/tokenizer/token_analysis/base.py b/nominatim/tokenizer/token_analysis/base.py index 68046f96..c7ec61c9 100644 --- a/nominatim/tokenizer/token_analysis/base.py +++ b/nominatim/tokenizer/token_analysis/base.py @@ -28,8 +28,8 @@ class Analyzer(Protocol): Returns: ID string with a canonical form of the name. The string may - be empty, when the analyzer cannot analyze the name at all, - for example because the character set in use does not match. + be empty, when the analyzer cannot analyze the name at all, + for example because the character set in use does not match. """ def compute_variants(self, canonical_id: str) -> List[str]: @@ -42,13 +42,13 @@ class Analyzer(Protocol): Returns: A list of possible spelling variants. All strings must have - been transformed with the global normalizer and - transliterator ICU rules. Otherwise they cannot be matched - against the input by the query frontend. - The list may be empty, when there are no useful - spelling variants. This may happen when an analyzer only - usually outputs additional variants to the canonical spelling - and there are no such variants. + been transformed with the global normalizer and + transliterator ICU rules. Otherwise they cannot be matched + against the input by the query frontend. + The list may be empty, when there are no useful + spelling variants. This may happen when an analyzer only + usually outputs additional variants to the canonical spelling + and there are no such variants. """ @@ -74,8 +74,8 @@ class AnalysisModule(Protocol): Returns: A data object with configuration data. This will be handed - as is into the `create()` function and may be - used freely by the analysis module as needed. + as is into the `create()` function and may be + used freely by the analysis module as needed. """ def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer: @@ -92,5 +92,5 @@ class AnalysisModule(Protocol): Returns: A new analyzer instance. This must be an object that implements - the Analyzer protocol. + the Analyzer protocol. """