]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/sanitizers/clean_postcodes.py
add debug output for unit tests
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_postcodes.py
index fbc46fa582215460f7f7e62f6fe63087b3019ab9..593f770db9ade7b1e9d8915d6de85d19003d63b5 100644 (file)
@@ -15,12 +15,20 @@ Arguments:
                         postcode centroids of a country but is still searchable.
                         When set to 'no', non-conforming postcodes are not
                         searchable either.
                         postcode centroids of a country but is still searchable.
                         When set to 'no', non-conforming postcodes are not
                         searchable either.
+    default-pattern:    Pattern to use, when there is none available for the
+                        country in question. Warning: will not be used for
+                        objects that have no country assigned. These are always
+                        assumed to have no postcode.
 """
 """
+from typing import Callable, Optional, Tuple
+
 from nominatim.data.postcode_format import PostcodeFormatter
 from nominatim.data.postcode_format import PostcodeFormatter
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
 
 class _PostcodeSanitizer:
 
 
 class _PostcodeSanitizer:
 
-    def __init__(self, config):
+    def __init__(self, config: SanitizerConfig) -> None:
         self.convert_to_address = config.get_bool('convert-to-address', True)
         self.matcher = PostcodeFormatter()
 
         self.convert_to_address = config.get_bool('convert-to-address', True)
         self.matcher = PostcodeFormatter()
 
@@ -29,7 +37,7 @@ class _PostcodeSanitizer:
             self.matcher.set_default_pattern(default_pattern)
 
 
             self.matcher.set_default_pattern(default_pattern)
 
 
-    def __call__(self, obj):
+    def __call__(self, obj: ProcessInfo) -> None:
         if not obj.address:
             return
 
         if not obj.address:
             return
 
@@ -48,21 +56,24 @@ class _PostcodeSanitizer:
                 postcode.set_attr('variant', formatted[1])
 
 
                 postcode.set_attr('variant', formatted[1])
 
 
-    def scan(self, postcode, country):
+    def scan(self, postcode: str, country: Optional[str]) -> Optional[Tuple[str, str]]:
         """ Check the postcode for correct formatting and return the
             normalized version. Returns None if the postcode does not
         """ Check the postcode for correct formatting and return the
             normalized version. Returns None if the postcode does not
-            correspond to the oficial format of the given country.
+            correspond to the official format of the given country.
         """
         match = self.matcher.match(country, postcode)
         if match is None:
             return None
 
         """
         match = self.matcher.match(country, postcode)
         if match is None:
             return None
 
-        return self.matcher.normalize(country, match), ' '.join(match.groups())
+        assert country is not None
+
+        return self.matcher.normalize(country, match),\
+               ' '.join(filter(lambda p: p is not None, match.groups()))
 
 
 
 
 
 
 
 
-def create(config):
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
     """ Create a housenumber processing function.
     """
 
     """ Create a housenumber processing function.
     """