]> git.openstreetmap.org Git - nominatim.git/commitdiff
postcodes: strip leading country codes
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 23 May 2022 09:01:57 +0000 (11:01 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Jun 2022 21:42:31 +0000 (23:42 +0200)
nominatim/tokenizer/sanitizers/clean_postcodes.py
test/python/tokenizer/sanitizers/test_clean_postcodes.py

index b07908cdee9339118b058d26aa1736451c5659c1..ae1cd62d8d09f7c9afe5b1fac3949fccaca8b941 100644 (file)
@@ -29,8 +29,9 @@ class _PostcodeMatcher:
             raise UsageError("Field 'pattern' required for 'postcode' "
                              f"for country '{country_code}'")
 
-        self.pattern = re.compile(config['pattern'].replace('d', '[0-9]')
-                                                   .replace('l', '[A-Z]'))
+        pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
+
+        self.pattern = re.compile(f'(?:{country_code.upper()}[ -]?)?({pc_pattern})')
 
 
     def normalize(self, postcode):
@@ -39,7 +40,9 @@ class _PostcodeMatcher:
         """
         normalized = postcode.strip().upper()
 
-        return normalized if self.pattern.fullmatch(normalized) else None
+        match = self.pattern.fullmatch(normalized)
+
+        return match.group(1) if match else None
 
 
 class _PostcodeSanitizer:
index d6371e075ba52c448a097780d7c19be042de1758..e5c07596a7942c7a34678d1c93deab257c02e983 100644 (file)
@@ -43,12 +43,14 @@ def test_postcode_no_country_drop(sanitize, country):
     assert sanitize(country=country, postcode='23231') == []
 
 
-@pytest.mark.parametrize("postcode", ('12345', '  34009  '))
+@pytest.mark.parametrize("postcode", ('12345', '  12345  ', 'de 12345',
+                                      'DE12345', 'DE 12345', 'DE-12345'))
 def test_postcode_pass_good_format(sanitize, postcode):
-    assert sanitize(country='de', postcode=postcode) == [('postcode', postcode.strip())]
+    assert sanitize(country='de', postcode=postcode) == [('postcode', '12345')]
 
 
-@pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....'))
+@pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....',
+                                      'DE  12345', 'DEF12345', 'CH 12345'))
 @pytest.mark.sanitizer_params(convert_to_address=False)
 def test_postcode_drop_bad_format(sanitize, postcode):
     assert sanitize(country='de', postcode=postcode) == []