]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/sanitizers/clean_postcodes.py
postcodes: introduce a default pattern for countries without postcodes
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_postcodes.py
index a968c9db0787f8a4bb009d72b20db72800145094..c6292a2942b217ae866c8b43ded81a5aa75ff089 100644 (file)
@@ -37,20 +37,28 @@ class _PostcodeMatcher:
         self.output = config.get('output', r'\g<0>')
 
 
-    def normalize(self, postcode):
-        """ Return the normalized version of the postcode. If the given postcode
-            does not correspond to the usage-pattern, return null.
+    def match(self, postcode):
+        """ Match the given postcode against the postcode pattern for this
+            matcher. Returns a `re.Match` object if the match was successful
+            and None otherwise.
         """
         # Upper-case, strip spaces and leading country code.
         normalized = self.norm_pattern.fullmatch(postcode.upper())
 
         if normalized:
-            match = self.pattern.fullmatch(normalized.group(1))
-            return match.expand(self.output) if match else None
+            return self.pattern.fullmatch(normalized.group(1))
 
         return None
 
 
+    def normalize(self, match):
+        """ Return the default format of the postcode for the given match.
+            `match` must be a `re.Match` object previously returned by
+            `match()`
+        """
+        return match.expand(self.output)
+
+
 class _PostcodeSanitizer:
 
     def __init__(self, config):
@@ -67,6 +75,12 @@ class _PostcodeSanitizer:
             else:
                 raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
 
+        default_pattern = config.get('default-pattern')
+        if default_pattern is not None and isinstance(default_pattern, str):
+            self.default_matcher = _PostcodeMatcher('', {'pattern': default_pattern})
+        else:
+            self.default_matcher = None
+
 
     def __call__(self, obj):
         if not obj.address:
@@ -83,7 +97,8 @@ class _PostcodeSanitizer:
                 else:
                     obj.address.pop(pos)
             else:
-                postcode.name = formatted
+                postcode.name = formatted[0]
+                postcode.set_attr('lookup', formatted[1])
 
 
     def scan(self, postcode, country):
@@ -94,10 +109,16 @@ class _PostcodeSanitizer:
         if country in self.country_without_postcode:
             return None
 
-        if country in self.country_matcher:
-            return self.country_matcher[country].normalize(postcode)
+        matcher = self.country_matcher.get(country, self.default_matcher)
+        if matcher is None:
+            return postcode.upper(), ''
+
+        match = matcher.match(postcode)
+        if match is None:
+            return None
+
+        return matcher.normalize(match), ' '.join(match.groups())
 
-        return postcode.upper()