X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/90d4d339dbed83cc90823401634f01a20e129548..cbbcbb1fd74c118d51dc8a85d4c1d2234587dde7:/nominatim/tokenizer/sanitizers/clean_postcodes.py?ds=inline diff --git a/nominatim/tokenizer/sanitizers/clean_postcodes.py b/nominatim/tokenizer/sanitizers/clean_postcodes.py index b07908cd..05e90ca1 100644 --- a/nominatim/tokenizer/sanitizers/clean_postcodes.py +++ b/nominatim/tokenizer/sanitizers/clean_postcodes.py @@ -15,48 +15,22 @@ Arguments: postcode centroids of a country but is still searchable. When set to 'no', non-conforming postcodes are not searchable either. + default-pattern: Pattern to use, when there is none available for the + country in question. Warning: will not be used for + objects that have no country assigned. These are always + assumed to have no postcode. """ -import re - -from nominatim.errors import UsageError -from nominatim.tools import country_info - -class _PostcodeMatcher: - """ Matches and formats a postcode according to the format definition. - """ - def __init__(self, country_code, config): - if 'pattern' not in config: - raise UsageError("Field 'pattern' required for 'postcode' " - f"for country '{country_code}'") - - self.pattern = re.compile(config['pattern'].replace('d', '[0-9]') - .replace('l', '[A-Z]')) - - - def normalize(self, postcode): - """ Return the normalized version of the postcode. If the given postcode - does not correspond to the usage-pattern, return null. - """ - normalized = postcode.strip().upper() - - return normalized if self.pattern.fullmatch(normalized) else None - +from nominatim.data.postcode_format import PostcodeFormatter class _PostcodeSanitizer: def __init__(self, config): self.convert_to_address = config.get_bool('convert-to-address', True) - # Objects without a country code can't have a postcode per definition. - self.country_without_postcode = {None} - self.country_matcher = {} - - for ccode, prop in country_info.iterate('postcode'): - if prop is False: - self.country_without_postcode.add(ccode) - elif isinstance(prop, dict): - self.country_matcher[ccode] = _PostcodeMatcher(ccode, prop) - else: - raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'") + self.matcher = PostcodeFormatter() + + default_pattern = config.get('default-pattern') + if default_pattern is not None and isinstance(default_pattern, str): + self.matcher.set_default_pattern(default_pattern) def __call__(self, obj): @@ -74,7 +48,8 @@ class _PostcodeSanitizer: else: obj.address.pop(pos) else: - postcode.name = formatted + postcode.name = formatted[0] + postcode.set_attr('variant', formatted[1]) def scan(self, postcode, country): @@ -82,13 +57,13 @@ class _PostcodeSanitizer: normalized version. Returns None if the postcode does not correspond to the oficial format of the given country. """ - if country in self.country_without_postcode: + match = self.matcher.match(country, postcode) + if match is None: return None - if country in self.country_matcher: - return self.country_matcher[country].normalize(postcode) + return self.matcher.normalize(country, match),\ + ' '.join(filter(lambda p: p is not None, match.groups())) - return postcode.upper()