1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Common data types and protocols for sanitizers.
10 from typing import Optional, List, Mapping, Callable
12 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
13 from nominatim.data.place_info import PlaceInfo
14 from nominatim.data.place_name import PlaceName
15 from nominatim.typing import Protocol, Final
19 """ Container class for information handed into to handler functions.
20 The 'names' and 'address' members are mutable. A handler must change
21 them by either modifying the lists place or replacing the old content
25 def __init__(self, place: PlaceInfo):
26 self.place: Final = place
27 self.names = self._convert_name_dict(place.name)
28 self.address = self._convert_name_dict(place.address)
32 def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
33 """ Convert a dictionary of names into a list of PlaceNames.
34 The dictionary key is split into the primary part of the key
35 and the suffix (the part after an optional colon).
40 for key, value in names.items():
41 parts = key.split(':', 1)
42 out.append(PlaceName(value.strip(),
44 parts[1].strip() if len(parts) > 1 else None))
49 class SanitizerHandler(Protocol):
50 """ Protocol for sanitizer modules.
53 def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
55 Create a function for sanitizing a place.
58 config: A dictionary with the additional configuration options
59 specified in the tokenizer configuration
62 The result must be a callable that takes a place description
63 and transforms name and address as reuqired.