]> git.openstreetmap.org Git - nominatim.git/blob - src/nominatim_db/tokenizer/sanitizers/split_name_list.py
Update lookup.py - Correct spelling for "simultaneously"
[nominatim.git] / src / nominatim_db / tokenizer / sanitizers / split_name_list.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Sanitizer that splits lists of names into their components.
9
10 Arguments:
11     delimiters: Define the set of characters to be used for
12                 splitting the list. (default: ',;')
13 """
14 from typing import Callable
15
16 from .base import ProcessInfo
17 from .config import SanitizerConfig
18
19
20 def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
21     """ Create a name processing function that splits name values with
22         multiple values into their components.
23     """
24     regexp = config.get_delimiter()
25
26     def _process(obj: ProcessInfo) -> None:
27         if not obj.names:
28             return
29
30         new_names = []
31         for name in obj.names:
32             split_names = regexp.split(name.name)
33             if len(split_names) == 1:
34                 new_names.append(name)
35             else:
36                 new_names.extend(name.clone(name=n) for n in split_names if n)
37
38         obj.names = new_names
39
40     return _process