]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/split_name_list.py
introduce sanitizer step before token analysis
[nominatim.git] / nominatim / tokenizer / sanitizers / split_name_list.py
1 """
2 Name processor that splits name values with multiple values into their components.
3 """
4 import re
5
6 def create(func):
7     """ Create a name processing function that splits name values with
8         multiple values into their components. The optional parameter
9         'delimiters' can be used to define the characters that should be used
10         for splitting. The default is ',;'.
11     """
12     regexp = re.compile('[{}]'.format(func.get('delimiters', ',;')))
13
14     def _process(obj):
15         if not obj.names:
16             return
17
18         new_names = []
19         for name in obj.names:
20             split_names = regexp.split(name.name)
21             if len(split_names) == 1:
22                 new_names.append(name)
23             else:
24                 new_names.extend(name.clone(name=n) for n in split_names)
25
26         obj.names = new_names
27
28     return _process