X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/62eedbb8f6ddb7928022d799a1b1fea62844faf8..ca149fb796b1c5e6705a25005683548b816d20f2:/nominatim/tokenizer/sanitizers/base.py?ds=sidebyside diff --git a/nominatim/tokenizer/sanitizers/base.py b/nominatim/tokenizer/sanitizers/base.py index f2e1bc41..2de868c7 100644 --- a/nominatim/tokenizer/sanitizers/base.py +++ b/nominatim/tokenizer/sanitizers/base.py @@ -7,73 +7,12 @@ """ Common data types and protocols for sanitizers. """ -from typing import Optional, Dict, List, Mapping, Callable +from typing import Optional, List, Mapping, Callable -from typing_extensions import Protocol, Final from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_info import PlaceInfo - -class PlaceName: - """ A searchable name for a place together with properties. - Every name object saves the name proper and two basic properties: - * 'kind' describes the name of the OSM key used without any suffixes - (i.e. the part after the colon removed) - * 'suffix' contains the suffix of the OSM tag, if any. The suffix - is the part of the key after the first colon. - In addition to that, the name may have arbitrary additional attributes. - Which attributes are used, depends on the token analyser. - """ - - def __init__(self, name: str, kind: str, suffix: Optional[str]): - self.name = name - self.kind = kind - self.suffix = suffix - self.attr: Dict[str, str] = {} - - - def __repr__(self) -> str: - return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')" - - - def clone(self, name: Optional[str] = None, - kind: Optional[str] = None, - suffix: Optional[str] = None, - attr: Optional[Mapping[str, str]] = None) -> 'PlaceName': - """ Create a deep copy of the place name, optionally with the - given parameters replaced. In the attribute list only the given - keys are updated. The list is not replaced completely. - In particular, the function cannot to be used to remove an - attribute from a place name. - """ - newobj = PlaceName(name or self.name, - kind or self.kind, - suffix or self.suffix) - - newobj.attr.update(self.attr) - if attr: - newobj.attr.update(attr) - - return newobj - - - def set_attr(self, key: str, value: str) -> None: - """ Add the given property to the name. If the property was already - set, then the value is overwritten. - """ - self.attr[key] = value - - - def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]: - """ Return the given property or the value of 'default' if it - is not set. - """ - return self.attr.get(key, default) - - - def has_attr(self, key: str) -> bool: - """ Check if the given attribute is set. - """ - return key in self.attr +from nominatim.data.place_name import PlaceName +from nominatim.typing import Protocol, Final class ProcessInfo: @@ -113,7 +52,13 @@ class SanitizerHandler(Protocol): def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]: """ - A sanitizer must define a single function `create`. It takes the - dictionary with the configuration information for the sanitizer and - returns a function that transforms name and address. + Create a function for sanitizing a place. + + Arguments: + config: A dictionary with the additional configuration options + specified in the tokenizer configuration + + Return: + The result must be a callable that takes a place description + and transforms name and address as reuqired. """