--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Functions for formatting postcodes according to their country-specific
+format.
+"""
+import re
+
+from nominatim.errors import UsageError
+from nominatim.tools import country_info
+
+class CountryPostcodeMatcher:
+ """ Matches and formats a postcode according to a format definition
+ of the given country.
+ """
+ def __init__(self, country_code, config):
+ if 'pattern' not in config:
+ raise UsageError("Field 'pattern' required for 'postcode' "
+ f"for country '{country_code}'")
+
+ pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
+
+ self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+ self.pattern = re.compile(pc_pattern)
+
+ self.output = config.get('output', r'\g<0>')
+
+
+ def match(self, postcode):
+ """ Match the given postcode against the postcode pattern for this
+ matcher. Returns a `re.Match` object if the match was successful
+ and None otherwise.
+ """
+ # Upper-case, strip spaces and leading country code.
+ normalized = self.norm_pattern.fullmatch(postcode.upper())
+
+ if normalized:
+ return self.pattern.fullmatch(normalized.group(1))
+
+ return None
+
+
+ def normalize(self, match):
+ """ Return the default format of the postcode for the given match.
+ `match` must be a `re.Match` object previously returned by
+ `match()`
+ """
+ return match.expand(self.output)
+
+
+class PostcodeFormatter:
+ """ Container for different postcode formats of the world and
+ access functions.
+ """
+ def __init__(self):
+ # Objects without a country code can't have a postcode per definition.
+ self.country_without_postcode = {None}
+ self.country_matcher = {}
+ self.default_matcher = CountryPostcodeMatcher('', {'pattern': '.*'})
+
+ for ccode, prop in country_info.iterate('postcode'):
+ if prop is False:
+ self.country_without_postcode.add(ccode)
+ elif isinstance(prop, dict):
+ self.country_matcher[ccode] = CountryPostcodeMatcher(ccode, prop)
+ else:
+ raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
+
+
+ def set_default_pattern(self, pattern):
+ """ Set the postcode match pattern to use, when a country does not
+ have a specific pattern or is marked as country without postcode.
+ """
+ self.default_matcher = CountryPostcodeMatcher('', {'pattern': pattern})
+
+
+ def match(self, country_code, postcode):
+ """ Match the given postcode against the postcode pattern for this
+ matcher. Returns a `re.Match` object if the country has a pattern
+ and the match was successful or None if the match failed.
+ """
+ if country_code in self.country_without_postcode:
+ return None
+
+ return self.country_matcher.get(country_code, self.default_matcher).match(postcode)
+
+
+ def normalize(self, country_code, match):
+ """ Return the default format of the postcode for the given match.
+ `match` must be a `re.Match` object previously returned by
+ `match()`
+ """
+ return self.country_matcher.get(country_code, self.default_matcher).normalize(match)
def _add_postcode(self, item):
""" Make sure the normalized postcode is present in the word table.
"""
- analyzer = self.token_analysis.get_analyzer('@postcode')
+ analyzer = self.token_analysis.analysis.get('@postcode')
if analyzer is None:
postcode_name = item.name.strip().upper()
When set to 'no', non-conforming postcodes are not
searchable either.
"""
-import re
-
-from nominatim.errors import UsageError
-from nominatim.tools import country_info
-
-class _PostcodeMatcher:
- """ Matches and formats a postcode according to the format definition.
- """
- def __init__(self, country_code, config):
- if 'pattern' not in config:
- raise UsageError("Field 'pattern' required for 'postcode' "
- f"for country '{country_code}'")
-
- pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
-
- self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
- self.pattern = re.compile(pc_pattern)
-
- self.output = config.get('output', r'\g<0>')
-
-
- def match(self, postcode):
- """ Match the given postcode against the postcode pattern for this
- matcher. Returns a `re.Match` object if the match was successful
- and None otherwise.
- """
- # Upper-case, strip spaces and leading country code.
- normalized = self.norm_pattern.fullmatch(postcode.upper())
-
- if normalized:
- return self.pattern.fullmatch(normalized.group(1))
-
- return None
-
-
- def normalize(self, match):
- """ Return the default format of the postcode for the given match.
- `match` must be a `re.Match` object previously returned by
- `match()`
- """
- return match.expand(self.output)
-
+from nominatim.data.postcode_format import PostcodeFormatter
class _PostcodeSanitizer:
def __init__(self, config):
self.convert_to_address = config.get_bool('convert-to-address', True)
- # Objects without a country code can't have a postcode per definition.
- self.country_without_postcode = {None}
- self.country_matcher = {}
-
- for ccode, prop in country_info.iterate('postcode'):
- if prop is False:
- self.country_without_postcode.add(ccode)
- elif isinstance(prop, dict):
- self.country_matcher[ccode] = _PostcodeMatcher(ccode, prop)
- else:
- raise UsageError(f"Invalid entry 'postcode' for country '{ccode}'")
+ self.matcher = PostcodeFormatter()
default_pattern = config.get('default-pattern')
if default_pattern is not None and isinstance(default_pattern, str):
- self.default_matcher = _PostcodeMatcher('', {'pattern': default_pattern})
- else:
- self.default_matcher = None
+ self.matcher.set_default_pattern(default_pattern)
def __call__(self, obj):
normalized version. Returns None if the postcode does not
correspond to the oficial format of the given country.
"""
- if country in self.country_without_postcode:
- return None
-
- matcher = self.country_matcher.get(country, self.default_matcher)
- if matcher is None:
- return postcode.upper(), ''
-
- match = matcher.match(postcode)
+ match = self.matcher.match(country, postcode)
if match is None:
return None
- return matcher.normalize(match), ' '.join(match.groups())
+ return self.matcher.normalize(country, match), ' '.join(match.groups())
assert word_table.get_postcodes() == {pcode, }
- @pytest.mark.parametrize('pcode', ['12:23', 'ab;cd;f', '123;836'])
- def test_process_place_bad_postcode(self, word_table, pcode):
- self.process_address(postcode=pcode)
-
- assert not word_table.get_postcodes()
-
-
@pytest.mark.parametrize('hnr', ['123a', '1', '101'])
def test_process_place_housenumbers_simple(self, hnr, getorcreate_hnr_id):
info = self.process_address(housenumber=hnr)