1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for the sanitizer that normalizes postcodes.
12 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
13 from nominatim.indexer.place_info import PlaceInfo
14 from nominatim.tools import country_info
17 def sanitize(def_config, request):
18 country_info.setup_country_config(def_config)
19 sanitizer_args = {'step': 'clean-postcodes'}
20 for mark in request.node.iter_markers(name="sanitizer_params"):
21 sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
23 def _run(country=None, **kwargs):
24 pi = {'address': kwargs}
25 if country is not None:
26 pi['country_code'] = country
28 _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
30 return sorted([(p.kind, p.name) for p in address])
35 @pytest.mark.parametrize("country", (None, 'ae'))
36 def test_postcode_no_country(sanitize, country):
37 assert sanitize(country=country, postcode='23231') == [('unofficial_postcode', '23231')]
40 @pytest.mark.parametrize("country", (None, 'ae'))
41 @pytest.mark.sanitizer_params(convert_to_address=False)
42 def test_postcode_no_country_drop(sanitize, country):
43 assert sanitize(country=country, postcode='23231') == []
46 @pytest.mark.parametrize("postcode", ('12345', ' 12345 ', 'de 12345',
47 'DE12345', 'DE 12345', 'DE-12345'))
48 def test_postcode_pass_good_format(sanitize, postcode):
49 assert sanitize(country='de', postcode=postcode) == [('postcode', '12345')]
52 @pytest.mark.parametrize("postcode", ('123456', '', ' ', '.....',
53 'DE 12345', 'DEF12345', 'CH 12345'))
54 @pytest.mark.sanitizer_params(convert_to_address=False)
55 def test_postcode_drop_bad_format(sanitize, postcode):
56 assert sanitize(country='de', postcode=postcode) == []
59 @pytest.mark.parametrize("postcode", ('1234', '9435', '99000'))
60 def test_postcode_cyprus_pass(sanitize, postcode):
61 assert sanitize(country='cy', postcode=postcode) == [('postcode', postcode)]
64 @pytest.mark.parametrize("postcode", ('91234', '99a45', '567'))
65 @pytest.mark.sanitizer_params(convert_to_address=False)
66 def test_postcode_cyprus_fail(sanitize, postcode):
67 assert sanitize(country='cy', postcode=postcode) == []
70 @pytest.mark.parametrize("postcode", ('123456', 'A33F2G7'))
71 def test_postcode_kazakhstan_pass(sanitize, postcode):
72 assert sanitize(country='kz', postcode=postcode) == [('postcode', postcode)]
75 @pytest.mark.parametrize("postcode", ('V34T6Y923456', '99345'))
76 @pytest.mark.sanitizer_params(convert_to_address=False)
77 def test_postcode_kazakhstan_fail(sanitize, postcode):
78 assert sanitize(country='kz', postcode=postcode) == []
81 @pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534'))
82 def test_postcode_sweden_pass(sanitize, postcode):
83 assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')]
86 @pytest.mark.parametrize("postcode", ('67 345', '671123'))
87 @pytest.mark.sanitizer_params(convert_to_address=False)
88 def test_postcode_sweden_fail(sanitize, postcode):
89 assert sanitize(country='se', postcode=postcode) == []
92 @pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
93 @pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
94 def test_postcode_default_pattern_pass(sanitize, postcode):
95 assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())]
98 @pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224'))
99 @pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
100 def test_postcode_default_pattern_fail(sanitize, postcode):
101 assert sanitize(country='an', postcode=postcode) == []