"""
Configuration for Sanitizers.
"""
+from typing import Sequence, Optional, Pattern, Callable, Any, TYPE_CHECKING
from collections import UserDict
import re
from nominatim.errors import UsageError
-class SanitizerConfig(UserDict):
- """ Dictionary with configuration options for a sanitizer.
-
- In addition to the usualy dictionary function, the class provides
- accessors to standard sanatizer options that are used by many of the
+# working around missing generics in Python < 3.8
+# See https://github.com/python/typing/issues/60#issuecomment-869757075
+if TYPE_CHECKING:
+ _BaseUserDict = UserDict[str, Any]
+else:
+ _BaseUserDict = UserDict
+
+class SanitizerConfig(_BaseUserDict):
+ """ The `SanitizerConfig` class is a read-only dictionary
+ with configuration options for the sanitizer.
+ In addition to the usual dictionary functions, the class provides
+ accessors to standard sanitizer options that are used by many of the
sanitizers.
"""
- def get_string_list(self, param, default=tuple()):
+ def get_string_list(self, param: str, default: Sequence[str] = tuple()) -> Sequence[str]:
""" Extract a configuration parameter as a string list.
- If the parameter value is a simple string, it is returned as a
- one-item list. If the parameter value does not exist, the given
- default is returned. If the parameter value is a list, it is checked
- to contain only strings before being returned.
+
+ Arguments:
+ param: Name of the configuration parameter.
+ default: Value to return, when the parameter is missing.
+
+ Returns:
+ If the parameter value is a simple string, it is returned as a
+ one-item list. If the parameter value does not exist, the given
+ default is returned. If the parameter value is a list, it is
+ checked to contain only strings before being returned.
"""
values = self.data.get(param, None)
return None if default is None else list(default)
if isinstance(values, str):
- return [values]
+ return [values] if values else []
if not isinstance(values, (list, tuple)):
raise UsageError(f"Parameter '{param}' must be string or list of strings.")
return values
- def get_delimiter(self, default=',;'):
- """ Return the 'delimiter' parameter in the configuration as a
- compiled regular expression that can be used to split the names on the
- delimiters. The regular expression makes sure that the resulting names
- are stripped and that repeated delimiters
- are ignored but it will still create empty fields on occasion. The
- code needs to filter those.
+ def get_bool(self, param: str, default: Optional[bool] = None) -> bool:
+ """ Extract a configuration parameter as a boolean.
+
+ Arguments:
+ param: Name of the configuration parameter. The parameter must
+ contain one of the yaml boolean values or an
+ UsageError will be raised.
+ default: Value to return, when the parameter is missing.
+ When set to `None`, the parameter must be defined.
- The 'default' parameter defines the delimiter set to be used when
- not explicitly configured.
+ Returns:
+ Boolean value of the given parameter.
+ """
+ value = self.data.get(param, default)
+
+ if not isinstance(value, bool):
+ raise UsageError(f"Parameter '{param}' must be a boolean value ('yes' or 'no'.")
+
+ return value
+
+
+ def get_delimiter(self, default: str = ',;') -> Pattern[str]:
+ """ Return the 'delimiters' parameter in the configuration as a
+ compiled regular expression that can be used to split strings on
+ these delimiters.
+
+ Arguments:
+ default: Delimiters to be used when 'delimiters' parameter
+ is not explicitly configured.
+
+ Returns:
+ A regular expression pattern which can be used to
+ split a string. The regular expression makes sure that the
+ resulting names are stripped and that repeated delimiters
+ are ignored. It may still create empty fields on occasion. The
+ code needs to filter those.
"""
delimiter_set = set(self.data.get('delimiters', default))
if not delimiter_set:
return re.compile('\\s*[{}]+\\s*'.format(''.join('\\' + d for d in delimiter_set)))
- def get_filter_kind(self, *default):
+ def get_filter_kind(self, *default: str) -> Callable[[str], bool]:
""" Return a filter function for the name kind from the 'filter-kind'
- config parameter. The filter functions takes a name item and returns
- True when the item passes the filter.
+ config parameter.
- If the parameter is empty, the filter lets all items pass. If the
- paramter is a string, it is interpreted as a single regular expression
- that must match the full kind string. If the parameter is a list then
+ If the 'filter-kind' parameter is empty, the filter lets all items
+ pass. If the parameter is a string, it is interpreted as a single
+ regular expression that must match the full kind string.
+ If the parameter is a list then
any of the regular expressions in the list must match to pass.
+
+ Arguments:
+ default: Filters to be used, when the 'filter-kind' parameter
+ is not specified. If omitted then the default is to
+ let all names pass.
+
+ Returns:
+ A filter function which takes a name string and returns
+ True when the item passes the filter.
"""
filters = self.get_string_list('filter-kind', default)
regexes = [re.compile(regex) for regex in filters]
- return lambda name: any(regex.fullmatch(name.kind) for regex in regexes)
+ return lambda name: any(regex.fullmatch(name) for regex in regexes)