]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/sanitizers/clean_housenumbers.py
factor out housenumber splitting into sanitizer
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_housenumbers.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Sanitizer that cleans and normalizes housenumbers.
9 """
10 import re
11
12 class _HousenumberSanitizer:
13
14     def __init__(self, config):
15         pass
16
17
18     def __call__(self, obj):
19         if not obj.address:
20             return
21
22         new_address = []
23         for item in obj.address:
24             if item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'):
25                 new_address.extend(item.clone(kind='housenumber', name=n) for n in self.sanitize(item.name))
26             else:
27                 # Don't touch other address items.
28                 new_address.append(item)
29
30         obj.address = new_address
31
32
33     def sanitize(self, value):
34         """ Extract housenumbers in a regularized format from an OSM value.
35
36             The function works as a generator that yields all valid housenumbers
37             that can be created from the value.
38         """
39         for hnr in self._split_number(value):
40             yield from self._regularize(hnr)
41
42
43     def _split_number(self, hnr):
44         for part in re.split(r'[;,]', hnr):
45             yield part.strip()
46
47
48     def _regularize(self, hnr):
49         yield hnr
50
51
52 def create(config):
53     """ Create a housenumber processing function.
54     """
55
56     return _HousenumberSanitizer(config)