1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Common data types and protocols for sanitizers.
10 from typing import Optional, List, Mapping, Callable
12 from ...typing import Protocol, Final
13 from ...data.place_info import PlaceInfo
14 from ...data.place_name import PlaceName
15 from .config import SanitizerConfig
19 """ Container class for information handed into to handler functions.
20 The 'names' and 'address' members are mutable. A handler must change
21 them by either modifying the lists place or replacing the old content
25 def __init__(self, place: PlaceInfo):
26 self.place: Final = place
27 self.names = self._convert_name_dict(place.name)
28 self.address = self._convert_name_dict(place.address)
31 def _convert_name_dict(names: Optional[Mapping[str, str]]) -> List[PlaceName]:
32 """ Convert a dictionary of names into a list of PlaceNames.
33 The dictionary key is split into the primary part of the key
34 and the suffix (the part after an optional colon).
39 for key, value in names.items():
40 parts = key.split(':', 1)
41 out.append(PlaceName(value.strip(),
43 parts[1].strip() if len(parts) > 1 else None))
48 class SanitizerHandler(Protocol):
49 """ Protocol for sanitizer modules.
52 def create(self, config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
54 Create a function for sanitizing a place.
57 config: A dictionary with the additional configuration options
58 specified in the tokenizer configuration
61 The result must be a callable that takes a place description
62 and transforms name and address as required.