]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/sanitizers/clean_postcodes.py
add japanese sanitizer
[nominatim.git] / nominatim / tokenizer / sanitizers / clean_postcodes.py
index 05e90ca122fa71eb4f8eb8f482bd15819fa623c2..5eaea3917c7aea9a2e8047f773cd03ac17990d34 100644 (file)
@@ -20,11 +20,15 @@ Arguments:
                         objects that have no country assigned. These are always
                         assumed to have no postcode.
 """
                         objects that have no country assigned. These are always
                         assumed to have no postcode.
 """
+from typing import Callable, Optional, Tuple
+
 from nominatim.data.postcode_format import PostcodeFormatter
 from nominatim.data.postcode_format import PostcodeFormatter
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
 
 class _PostcodeSanitizer:
 
 
 class _PostcodeSanitizer:
 
-    def __init__(self, config):
+    def __init__(self, config: SanitizerConfig) -> None:
         self.convert_to_address = config.get_bool('convert-to-address', True)
         self.matcher = PostcodeFormatter()
 
         self.convert_to_address = config.get_bool('convert-to-address', True)
         self.matcher = PostcodeFormatter()
 
@@ -33,7 +37,7 @@ class _PostcodeSanitizer:
             self.matcher.set_default_pattern(default_pattern)
 
 
             self.matcher.set_default_pattern(default_pattern)
 
 
-    def __call__(self, obj):
+    def __call__(self, obj: ProcessInfo) -> None:
         if not obj.address:
             return
 
         if not obj.address:
             return
 
@@ -52,23 +56,25 @@ class _PostcodeSanitizer:
                 postcode.set_attr('variant', formatted[1])
 
 
                 postcode.set_attr('variant', formatted[1])
 
 
-    def scan(self, postcode, country):
+    def scan(self, postcode: str, country: Optional[str]) -> Optional[Tuple[str, str]]:
         """ Check the postcode for correct formatting and return the
             normalized version. Returns None if the postcode does not
         """ Check the postcode for correct formatting and return the
             normalized version. Returns None if the postcode does not
-            correspond to the oficial format of the given country.
+            correspond to the official format of the given country.
         """
         match = self.matcher.match(country, postcode)
         if match is None:
             return None
 
         """
         match = self.matcher.match(country, postcode)
         if match is None:
             return None
 
+        assert country is not None
+
         return self.matcher.normalize(country, match),\
                ' '.join(filter(lambda p: p is not None, match.groups()))
 
 
 
 
         return self.matcher.normalize(country, match),\
                ' '.join(filter(lambda p: p is not None, match.groups()))
 
 
 
 
-def create(config):
-    """ Create a housenumber processing function.
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+    """ Create a function that filters postcodes by their officially allowed pattern.
     """
 
     return _PostcodeSanitizer(config)
     """
 
     return _PostcodeSanitizer(config)