From: Sarah Hoffmann Date: Tue, 24 May 2022 15:11:40 +0000 (+0200) Subject: postcode: generate a generic form X-Git-Tag: v4.1.0~22^2~20 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/5ba75df507617162907c2b42a7825ee406218582 postcode: generate a generic form --- diff --git a/nominatim/tokenizer/sanitizers/clean_postcodes.py b/nominatim/tokenizer/sanitizers/clean_postcodes.py index a968c9db..42beea37 100644 --- a/nominatim/tokenizer/sanitizers/clean_postcodes.py +++ b/nominatim/tokenizer/sanitizers/clean_postcodes.py @@ -37,20 +37,28 @@ class _PostcodeMatcher: self.output = config.get('output', r'\g<0>') - def normalize(self, postcode): - """ Return the normalized version of the postcode. If the given postcode - does not correspond to the usage-pattern, return null. + def match(self, postcode): + """ Match the given postcode against the postcode pattern for this + matcher. Returns a `re.Match` object if the match was successful + and None otherwise. """ # Upper-case, strip spaces and leading country code. normalized = self.norm_pattern.fullmatch(postcode.upper()) if normalized: - match = self.pattern.fullmatch(normalized.group(1)) - return match.expand(self.output) if match else None + return self.pattern.fullmatch(normalized.group(1)) return None + def normalize(self, match): + """ Return the default format of the postcode for the given match. + `match` must be a `re.Match` object previously returned by + `match()` + """ + return match.expand(self.output) + + class _PostcodeSanitizer: def __init__(self, config): @@ -83,7 +91,8 @@ class _PostcodeSanitizer: else: obj.address.pop(pos) else: - postcode.name = formatted + postcode.name = formatted[0] + postcode.set_attr('lookup', formatted[1]) def scan(self, postcode, country): @@ -94,10 +103,14 @@ class _PostcodeSanitizer: if country in self.country_without_postcode: return None - if country in self.country_matcher: - return self.country_matcher[country].normalize(postcode) + matcher = self.country_matcher.get(country) + if matcher is not None: + match = matcher.match(postcode) + if match is None: + return None + return matcher.normalize(match), ' '.join(match.groups()) - return postcode.upper() + return postcode.upper(), ''