1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Common data types and protocols for sanitizers.
10 from typing import Optional, Dict, List, Mapping, Callable
12 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
13 from nominatim.data.place_info import PlaceInfo
14 from nominatim.typing import Protocol, Final
17 """ Each name and address part of a place is encapsulated in an object of
18 this class. It saves not only the name proper but also describes the
19 kind of name with two properties:
21 * `kind` describes the name of the OSM key used without any suffixes
22 (i.e. the part after the colon removed)
23 * `suffix` contains the suffix of the OSM tag, if any. The suffix
24 is the part of the key after the first colon.
26 In addition to that, a name may have arbitrary additional attributes.
27 How attributes are used, depends on the sanatizers and token analysers.
28 The exception is is the 'analyzer' attribute. This apptribute determines
29 which token analysis module will be used to finalize the treatment of
33 def __init__(self, name: str, kind: str, suffix: Optional[str]):
37 self.attr: Dict[str, str] = {}
40 def __repr__(self) -> str:
41 return f"PlaceName(name='{self.name}',kind='{self.kind}',suffix='{self.suffix}')"
44 def clone(self, name: Optional[str] = None,
45 kind: Optional[str] = None,
46 suffix: Optional[str] = None,
47 attr: Optional[Mapping[str, str]] = None) -> 'PlaceName':
48 """ Create a deep copy of the place name, optionally with the
49 given parameters replaced. In the attribute list only the given
50 keys are updated. The list is not replaced completely.
51 In particular, the function cannot to be used to remove an
52 attribute from a place name.
54 newobj = PlaceName(name or self.name,
56 suffix or self.suffix)
58 newobj.attr.update(self.attr)
60 newobj.attr.update(attr)
65 def set_attr(self, key: str, value: str) -> None:
66 """ Add the given property to the name. If the property was already
67 set, then the value is overwritten.
69 self.attr[key] = value
72 def get_attr(self, key: str, default: Optional[str] = None) -> Optional[str]:
73 """ Return the given property or the value of 'default' if it
76 return self.attr.get(key, default)
79 def has_attr(self, key: str) -> bool:
80 """ Check if the given attribute is set.
82 return key in self.attr
86 """ Container class for information handed into to handler functions.
87 The 'names' and 'address' members are mutable. A handler must change
88 them by either modifying the lists place or replacing the old content
92 def __init__(self, place: PlaceInfo):
93 self.place: Final = place
94 self.names = self._convert_name_dict(place.name)
95 self.address = self._convert_name_dict(place.address)
99 def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
100 """ Convert a dictionary of names into a list of PlaceNames.
101 The dictionary key is split into the primary part of the key
102 and the suffix (the part after an optional colon).
107 for key, value in names.items():
108 parts = key.split(':', 1)
109 out.append(PlaceName(value.strip(),
111 parts[1].strip() if len(parts) > 1 else None))
116 class SanitizerHandler(Protocol):
117 """ Protocol for sanitizer modules.
120 def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
122 Create a function for sanitizing a place.
125 config: A dictionary with the additional configuration options
126 specified in the tokenizer configuration
129 The result must be a callable that takes a place description
130 and transforms name and address as reuqired.