]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/sanitizers/test_clean_postcodes.py
add postcodes patterns without optional spaces
[nominatim.git] / test / python / tokenizer / sanitizers / test_clean_postcodes.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for the sanitizer that normalizes postcodes.
9 """
10 import pytest
11
12 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
13 from nominatim.indexer.place_info import PlaceInfo
14 from nominatim.tools import country_info
15
16 @pytest.fixture
17 def sanitize(def_config, request):
18     country_info.setup_country_config(def_config)
19     sanitizer_args = {'step': 'clean-postcodes'}
20     for mark in request.node.iter_markers(name="sanitizer_params"):
21         sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
22
23     def _run(country=None, **kwargs):
24         pi = {'address': kwargs}
25         if country is not None:
26             pi['country_code'] = country
27
28         _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
29
30         return sorted([(p.kind, p.name) for p in address])
31
32     return _run
33
34
35 @pytest.mark.parametrize("country", (None, 'ae'))
36 def test_postcode_no_country(sanitize, country):
37     assert sanitize(country=country, postcode='23231') == [('unofficial_postcode', '23231')]
38
39
40 @pytest.mark.parametrize("country", (None, 'ae'))
41 @pytest.mark.sanitizer_params(convert_to_address=False)
42 def test_postcode_no_country_drop(sanitize, country):
43     assert sanitize(country=country, postcode='23231') == []
44
45
46 @pytest.mark.parametrize("postcode", ('12345', '  34009  '))
47 def test_postcode_pass_good_format(sanitize, postcode):
48     assert sanitize(country='de', postcode=postcode) == [('postcode', postcode.strip())]
49
50
51 @pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....'))
52 @pytest.mark.sanitizer_params(convert_to_address=False)
53 def test_postcode_drop_bad_format(sanitize, postcode):
54     assert sanitize(country='de', postcode=postcode) == []
55
56
57 @pytest.mark.parametrize("postcode", ('1234', '9435', '99000'))
58 def test_postcode_cyprus_pass(sanitize, postcode):
59     assert sanitize(country='cy', postcode=postcode) == [('postcode', postcode)]
60
61
62 @pytest.mark.parametrize("postcode", ('91234', '99a45', '567'))
63 @pytest.mark.sanitizer_params(convert_to_address=False)
64 def test_postcode_cyprus_fail(sanitize, postcode):
65     assert sanitize(country='cy', postcode=postcode) == []
66
67
68 @pytest.mark.parametrize("postcode", ('123456', 'A33F2G7'))
69 def test_postcode_kazakhstan_pass(sanitize, postcode):
70     assert sanitize(country='kz', postcode=postcode) == [('postcode', postcode)]
71
72
73 @pytest.mark.parametrize("postcode", ('V34T6Y923456', '99345'))
74 @pytest.mark.sanitizer_params(convert_to_address=False)
75 def test_postcode_kazakhstan_fail(sanitize, postcode):
76     assert sanitize(country='kz', postcode=postcode) == []
77