1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Functions for formatting postcodes according to their country-specific
13 from nominatim.errors import UsageError
14 from nominatim.data import country_info
16 class CountryPostcodeMatcher:
17 """ Matches and formats a postcode according to a format definition
20 def __init__(self, country_code, config):
21 if 'pattern' not in config:
22 raise UsageError("Field 'pattern' required for 'postcode' "
23 f"for country '{country_code}'")
25 pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
27 self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
28 self.pattern = re.compile(pc_pattern)
30 self.output = config.get('output', r'\g<0>')
33 def match(self, postcode):
34 """ Match the given postcode against the postcode pattern for this
35 matcher. Returns a `re.Match` object if the match was successful
38 # Upper-case, strip spaces and leading country code.
39 normalized = self.norm_pattern.fullmatch(postcode.upper())
42 return self.pattern.fullmatch(normalized.group(1))
47 def normalize(self, match):
48 """ Return the default format of the postcode for the given match.
49 `match` must be a `re.Match` object previously returned by
52 return match.expand(self.output)
55 class PostcodeFormatter:
56 """ Container for different postcode formats of the world and
60 # Objects without a country code can't have a postcode per definition.
61 self.country_without_postcode = {None}
62 self.country_matcher = {}
63 self.default_matcher = CountryPostcodeMatcher('', {'pattern': '.*'})
65 for ccode, prop in country_info.iterate('postcode'):
67 self.country_without_postcode.add(ccode)
68 elif isinstance(prop, dict):
69 self.country_matcher[ccode] = CountryPostcodeMatcher(ccode, prop)
71 raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
74 def set_default_pattern(self, pattern):
75 """ Set the postcode match pattern to use, when a country does not
76 have a specific pattern or is marked as country without postcode.
78 self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
81 def get_matcher(self, country_code):
82 """ Return the CountryPostcodeMatcher for the given country.
83 Returns None if the country doesn't have a postcode and the
84 default matcher if there is no specific matcher configured for
87 if country_code in self.country_without_postcode:
90 return self.country_matcher.get(country_code, self.default_matcher)
93 def match(self, country_code, postcode):
94 """ Match the given postcode against the postcode pattern for this
95 matcher. Returns a `re.Match` object if the country has a pattern
96 and the match was successful or None if the match failed.
98 if country_code in self.country_without_postcode:
101 return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
104 def normalize(self, country_code, match):
105 """ Return the default format of the postcode for the given match.
106 `match` must be a `re.Match` object previously returned by
109 return self.country_matcher.get(country_code, self.default_matcher).normalize(match)