1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Sanitizer that filters postcodes by their officially allowed pattern.
11 convert-to-address: If set to 'yes' (the default), then postcodes that do
12 not conform with their country-specific pattern are
13 converted to an address component. That means that
14 the postcode does not take part when computing the
15 postcode centroids of a country but is still searchable.
16 When set to 'no', non-conforming postcodes are not
18 default-pattern: Pattern to use, when there is none available for the
19 country in question. Warning: will not be used for
20 objects that have no country assigned. These are always
21 assumed to have no postcode.
23 from nominatim.data.postcode_format import PostcodeFormatter
25 class _PostcodeSanitizer:
27 def __init__(self, config):
28 self.convert_to_address = config.get_bool('convert-to-address', True)
29 self.matcher = PostcodeFormatter()
31 default_pattern = config.get('default-pattern')
32 if default_pattern is not None and isinstance(default_pattern, str):
33 self.matcher.set_default_pattern(default_pattern)
36 def __call__(self, obj):
40 postcodes = ((i, o) for i, o in enumerate(obj.address) if o.kind == 'postcode')
42 for pos, postcode in postcodes:
43 formatted = self.scan(postcode.name, obj.place.country_code)
46 if self.convert_to_address:
47 postcode.kind = 'unofficial_postcode'
51 postcode.name = formatted[0]
52 postcode.set_attr('variant', formatted[1])
55 def scan(self, postcode, country):
56 """ Check the postcode for correct formatting and return the
57 normalized version. Returns None if the postcode does not
58 correspond to the oficial format of the given country.
60 match = self.matcher.match(country, postcode)
64 return self.matcher.normalize(country, match),\
65 ' '.join(filter(lambda p: p is not None, match.groups()))
71 """ Create a housenumber processing function.
74 return _PostcodeSanitizer(config)