pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
- self.pattern = re.compile(f'(?:{country_code.upper()}[ -]?)?({pc_pattern})')
+ self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+ self.pattern = re.compile(pc_pattern)
+
+ self.output = config.get('output', r'\g<0>')
def normalize(self, postcode):
""" Return the normalized version of the postcode. If the given postcode
does not correspond to the usage-pattern, return null.
"""
- normalized = postcode.strip().upper()
+ # Upper-case, strip spaces and leading country code.
+ normalized = self.norm_pattern.fullmatch(postcode.upper())
- match = self.pattern.fullmatch(normalized)
+ if normalized:
+ match = self.pattern.fullmatch(normalized.group(1))
+ return match.expand(self.output) if match else None
- return match.group(1) if match else None
+ return None
class _PostcodeSanitizer:
partition: 124
languages: cs
names: !include country-names/cz.yaml
+ postcode:
+ pattern: "(ddd) ?(dd)"
+ output: \1 \2
# Germany (Deutschland)
partition: 112
languages: sv
names: !include country-names/se.yaml
+ postcode:
+ pattern: "(ddd) ?(dd)"
+ output: \1 \2
# Singapore (Singapore)
partition: 172
languages: sk
names: !include country-names/sk.yaml
+ postcode:
+ pattern: "(ddd) ?(dd)"
+ output: \1 \2
# Sierra Leone (Sierra Leone)
def test_postcode_kazakhstan_fail(sanitize, postcode):
assert sanitize(country='kz', postcode=postcode) == []
+
+@pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534'))
+def test_postcode_sweden_pass(sanitize, postcode):
+ assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')]
+
+
+@pytest.mark.parametrize("postcode", ('67 345', '671123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_sweden_fail(sanitize, postcode):
+ assert sanitize(country='se', postcode=postcode) == []
+