1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Sanitizer that preprocesses address tags for house numbers. The sanitizer
11 * define which tags are to be considered house numbers (see 'filter-kind')
12 * split house number lists into individual numbers (see 'delimiters')
15 delimiters: Define the set of characters to be used for
16 splitting a list of house numbers into parts. (default: ',;')
17 filter-kind: Define the address tags that are considered to be a
18 house number. Either takes a single string or a list of strings,
19 where each string is a regular expression. An address item
20 is considered a house number if the 'kind' fully matches any
21 of the given regular expressions. (default: 'housenumber')
22 convert-to-name: Define house numbers that should be treated as a name
23 instead of a house number. Either takes a single string
24 or a list of strings, where each string is a regular
25 expression that must match the full house number value.
27 from typing import Callable, Iterator, List
30 from nominatim.tokenizer.sanitizers.base import ProcessInfo, PlaceName
31 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
33 class _HousenumberSanitizer:
35 def __init__(self, config: SanitizerConfig) -> None:
36 self.filter_kind = config.get_filter_kind('housenumber')
37 self.split_regexp = config.get_delimiter()
39 nameregexps = config.get_string_list('convert-to-name', [])
40 self.is_name_regexp = [re.compile(r) for r in nameregexps]
44 def __call__(self, obj: ProcessInfo) -> None:
48 new_address: List[PlaceName] = []
49 for item in obj.address:
50 if self.filter_kind(item.kind):
51 if self._treat_as_name(item.name):
52 obj.names.append(item.clone(kind='housenumber'))
54 new_address.extend(item.clone(kind='housenumber', name=n)
55 for n in self.sanitize(item.name))
57 # Don't touch other address items.
58 new_address.append(item)
60 obj.address = new_address
63 def sanitize(self, value: str) -> Iterator[str]:
64 """ Extract housenumbers in a regularized format from an OSM value.
66 The function works as a generator that yields all valid housenumbers
67 that can be created from the value.
69 for hnr in self.split_regexp.split(value):
71 yield from self._regularize(hnr)
74 def _regularize(self, hnr: str) -> Iterator[str]:
78 def _treat_as_name(self, housenumber: str) -> bool:
79 return any(r.fullmatch(housenumber) is not None for r in self.is_name_regexp)
82 def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
83 """ Create a housenumber processing function.
86 return _HousenumberSanitizer(config)