X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/83054af46fb21ea40c0bebb2c8a528937e5a49a0..67cfad6a2c3300783695e7cd9f6f4191d5630e5b:/nominatim/tokenizer/sanitizers/base.py diff --git a/nominatim/tokenizer/sanitizers/base.py b/nominatim/tokenizer/sanitizers/base.py index 692c6d5f..2de868c7 100644 --- a/nominatim/tokenizer/sanitizers/base.py +++ b/nominatim/tokenizer/sanitizers/base.py @@ -7,74 +7,13 @@ """ Common data types and protocols for sanitizers. """ -from typing import Optional, Dict, List, Mapping, Callable +from typing import Optional, List, Mapping, Callable from nominatim.tokenizer.sanitizers.config import SanitizerConfig from nominatim.data.place_info import PlaceInfo +from nominatim.data.place_name import PlaceName from nominatim.typing import Protocol, Final -class PlaceName: - """ A searchable name for a place together with properties. - Every name object saves the name proper and two basic properties: - * 'kind' describes the name of the OSM key used without any suffixes - (i.e. the part after the colon removed) - * 'suffix' contains the suffix of the OSM tag, if any. The suffix - is the part of the key after the first colon. - In addition to that, the name may have arbitrary additional attributes. - Which attributes are used, depends on the token analyser. - """ - - def __init__(self, name: str, kind: str, suffix: Optional[str]): - self.name = name - self.kind = kind - self.suffix = suffix - self.attr: Dict[str, str] = {} - - - def __repr__(self) -> str: - return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')" - - - def clone(self, name: Optional[str] = None, - kind: Optional[str] = None, - suffix: Optional[str] = None, - attr: Optional[Mapping[str, str]] = None) -> 'PlaceName': - """ Create a deep copy of the place name, optionally with the - given parameters replaced. In the attribute list only the given - keys are updated. The list is not replaced completely. - In particular, the function cannot to be used to remove an - attribute from a place name. - """ - newobj = PlaceName(name or self.name, - kind or self.kind, - suffix or self.suffix) - - newobj.attr.update(self.attr) - if attr: - newobj.attr.update(attr) - - return newobj - - - def set_attr(self, key: str, value: str) -> None: - """ Add the given property to the name. If the property was already - set, then the value is overwritten. - """ - self.attr[key] = value - - - def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]: - """ Return the given property or the value of 'default' if it - is not set. - """ - return self.attr.get(key, default) - - - def has_attr(self, key: str) -> bool: - """ Check if the given attribute is set. - """ - return key in self.attr - class ProcessInfo: """ Container class for information handed into to handler functions. @@ -113,7 +52,13 @@ class SanitizerHandler(Protocol): def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]: """ - A sanitizer must define a single function `create`. It takes the - dictionary with the configuration information for the sanitizer and - returns a function that transforms name and address. + Create a function for sanitizing a place. + + Arguments: + config: A dictionary with the additional configuration options + specified in the tokenizer configuration + + Return: + The result must be a callable that takes a place description + and transforms name and address as reuqired. """