]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/split_name_list.py
86385985053ef2623c1c016d458c2d51ae420d44
[nominatim.git] / nominatim / tokenizer / sanitizers / split_name_list.py
1 """
2 Sanitizer that splits lists of names into their components.
3
4 Arguments:
5     delimiters: Define the set of characters to be used for
6                 splitting the list. (default: `,;`)
7 """
8 import re
9
10 from nominatim.errors import UsageError
11
12 def create(func):
13     """ Create a name processing function that splits name values with
14         multiple values into their components.
15     """
16     delimiter_set = set(func.get('delimiters', ',;'))
17     if not delimiter_set:
18         raise UsageError("Set of delimiters in split-name-list sanitizer is empty.")
19
20     regexp = re.compile('\\s*[{}]\\s*'.format(''.join('\\' + d for d in delimiter_set)))
21
22     def _process(obj):
23         if not obj.names:
24             return
25
26         new_names = []
27         for name in obj.names:
28             split_names = regexp.split(name.name)
29             if len(split_names) == 1:
30                 new_names.append(name)
31             else:
32                 new_names.extend(name.clone(name=n) for n in split_names if n)
33
34         obj.names = new_names
35
36     return _process