postcode centroids of a country but is still searchable.
When set to 'no', non-conforming postcodes are not
searchable either.
+ default-pattern: Pattern to use, when there is none available for the
+ country in question. Warning: will not be used for
+ objects that have no country assigned. These are always
+ assumed to have no postcode.
"""
-import re
-
-from nominatim.errors import UsageError
-from nominatim.tools import country_info
-
-class _PostcodeMatcher:
- """ Matches and formats a postcode according to the format definition.
- """
- def __init__(self, country_code, config):
- if 'pattern' not in config:
- raise UsageError("Field 'pattern' required for 'postcode' "
- f"for country '{country_code}'")
-
- pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
-
- self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
- self.pattern = re.compile(pc_pattern)
-
- self.output = config.get('output', r'\g<0>')
-
-
- def match(self, postcode):
- """ Match the given postcode against the postcode pattern for this
- matcher. Returns a `re.Match` object if the match was successful
- and None otherwise.
- """
- # Upper-case, strip spaces and leading country code.
- normalized = self.norm_pattern.fullmatch(postcode.upper())
-
- if normalized:
- return self.pattern.fullmatch(normalized.group(1))
-
- return None
-
-
- def normalize(self, match):
- """ Return the default format of the postcode for the given match.
- `match` must be a `re.Match` object previously returned by
- `match()`
- """
- return match.expand(self.output)
+from typing import Callable, Optional, Tuple
+from nominatim.data.postcode_format import PostcodeFormatter
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
class _PostcodeSanitizer:
- def __init__(self, config):
+ def __init__(self, config: SanitizerConfig) -> None:
self.convert_to_address = config.get_bool('convert-to-address', True)
- # Objects without a country code can't have a postcode per definition.
- self.country_without_postcode = {None}
- self.country_matcher = {}
-
- for ccode, prop in country_info.iterate('postcode'):
- if prop is False:
- self.country_without_postcode.add(ccode)
- elif isinstance(prop, dict):
- self.country_matcher[ccode] = _PostcodeMatcher(ccode, prop)
- else:
- raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
+ self.matcher = PostcodeFormatter()
default_pattern = config.get('default-pattern')
if default_pattern is not None and isinstance(default_pattern, str):
- self.default_matcher = _PostcodeMatcher('', {'pattern': default_pattern})
- else:
- self.default_matcher = None
+ self.matcher.set_default_pattern(default_pattern)
- def __call__(self, obj):
+ def __call__(self, obj: ProcessInfo) -> None:
if not obj.address:
return
postcode.set_attr('variant', formatted[1])
- def scan(self, postcode, country):
+ def scan(self, postcode: str, country: Optional[str]) -> Optional[Tuple[str, str]]:
""" Check the postcode for correct formatting and return the
normalized version. Returns None if the postcode does not
- correspond to the oficial format of the given country.
+ correspond to the official format of the given country.
"""
- if country in self.country_without_postcode:
- return None
-
- matcher = self.country_matcher.get(country, self.default_matcher)
- if matcher is None:
- return postcode.upper(), ''
-
- match = matcher.match(postcode)
+ match = self.matcher.match(country, postcode)
if match is None:
return None
- return matcher.normalize(match), ' '.join(match.groups())
+ assert country is not None
+
+ return self.matcher.normalize(country, match),\
+ ' '.join(filter(lambda p: p is not None, match.groups()))
-def create(config):
- """ Create a housenumber processing function.
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+ """ Create a function that filters postcodes by their officially allowed pattern.
"""
return _PostcodeSanitizer(config)