1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Handler for cleaning name and address tags in place information before it
9 is handed to the token analysis.
11 from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
13 from nominatim_core.errors import UsageError
14 from nominatim_core.config import Configuration
15 from .sanitizers.config import SanitizerConfig
16 from .sanitizers.base import SanitizerHandler, ProcessInfo
17 from ..data.place_name import PlaceName
18 from ..data.place_info import PlaceInfo
22 """ Controller class which applies sanitizer functions on the place
23 names and address before they are used by the token analysers.
26 def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]],
27 config: Configuration) -> None:
28 self.handlers: List[Callable[[ProcessInfo], None]] = []
32 if 'step' not in func:
33 raise UsageError("Sanitizer rule is missing the 'step' attribute.")
34 if not isinstance(func['step'], str):
35 raise UsageError("'step' attribute must be a simple string.")
37 module: SanitizerHandler = \
38 config.load_plugin_module(func['step'], 'nominatim_db.tokenizer.sanitizers')
40 self.handlers.append(module.create(SanitizerConfig(func)))
43 def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
44 """ Extract a sanitized list of names and address parts from the
45 given place. The function returns a tuple
46 (list of names, list of address names)
48 obj = ProcessInfo(place)
50 for func in self.handlers:
53 return obj.names, obj.address