X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/90d4d339dbed83cc90823401634f01a20e129548..8dfdf64dd5b3f0d88a011654076a0a041b7b252c:/test/python/tokenizer/sanitizers/test_clean_postcodes.py diff --git a/test/python/tokenizer/sanitizers/test_clean_postcodes.py b/test/python/tokenizer/sanitizers/test_clean_postcodes.py index 7cb3c70f..f2c965ad 100644 --- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py +++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py @@ -10,8 +10,8 @@ Tests for the sanitizer that normalizes postcodes. import pytest from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo -from nominatim.tools import country_info +from nominatim.data.place_info import PlaceInfo +from nominatim.data import country_info @pytest.fixture def sanitize(def_config, request): @@ -25,7 +25,7 @@ def sanitize(def_config, request): if country is not None: pi['country_code'] = country - _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi)) + _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi)) return sorted([(p.kind, p.name) for p in address]) @@ -43,12 +43,60 @@ def test_postcode_no_country_drop(sanitize, country): assert sanitize(country=country, postcode='23231') == [] -@pytest.mark.parametrize("postcode", ('12345', ' 34009 ')) +@pytest.mark.parametrize("postcode", ('12345', ' 12345 ', 'de 12345', + 'DE12345', 'DE 12345', 'DE-12345')) def test_postcode_pass_good_format(sanitize, postcode): - assert sanitize(country='de', postcode=postcode) == [('postcode', postcode.strip())] + assert sanitize(country='de', postcode=postcode) == [('postcode', '12345')] -@pytest.mark.parametrize("postcode", ('123456', '', ' ', '.....')) +@pytest.mark.parametrize("postcode", ('123456', '', ' ', '.....', + 'DE 12345', 'DEF12345', 'CH 12345')) @pytest.mark.sanitizer_params(convert_to_address=False) def test_postcode_drop_bad_format(sanitize, postcode): assert sanitize(country='de', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('1234', '9435', '99000')) +def test_postcode_cyprus_pass(sanitize, postcode): + assert sanitize(country='cy', postcode=postcode) == [('postcode', postcode)] + + +@pytest.mark.parametrize("postcode", ('91234', '99a45', '567')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_cyprus_fail(sanitize, postcode): + assert sanitize(country='cy', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('123456', 'A33F2G7')) +def test_postcode_kazakhstan_pass(sanitize, postcode): + assert sanitize(country='kz', postcode=postcode) == [('postcode', postcode)] + + +@pytest.mark.parametrize("postcode", ('V34T6Y923456', '99345')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_kazakhstan_fail(sanitize, postcode): + assert sanitize(country='kz', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534')) +def test_postcode_sweden_pass(sanitize, postcode): + assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')] + + +@pytest.mark.parametrize("postcode", ('67 345', '671123')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_sweden_fail(sanitize, postcode): + assert sanitize(country='se', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44')) +@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}') +def test_postcode_default_pattern_pass(sanitize, postcode): + assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())] + + +@pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224')) +@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}') +def test_postcode_default_pattern_fail(sanitize, postcode): + assert sanitize(country='an', postcode=postcode) == [] +