]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/strip_brace_terms.py
add documentation for new configuration of ICU tokenizer
[nominatim.git] / nominatim / tokenizer / sanitizers / strip_brace_terms.py
1 """
2 This sanitizer creates additional name variants for names that have
3 addendums in brackets (e.g. "Halle (Saale)"). The additional variant contains
4 only the main name part with the bracket part removed.
5 """
6
7 def create(_):
8     """ Create a name processing function that creates additional name variants
9         for bracket addendums.
10     """
11     def _process(obj):
12         """ Add variants for names that have a bracket extension.
13         """
14         if obj.names:
15             new_names = []
16             for name in (n for n in obj.names if '(' in n.name):
17                 new_name = name.name.split('(')[0].strip()
18                 if new_name:
19                     new_names.append(name.clone(name=new_name))
20
21             obj.names.extend(new_names)
22
23     return _process