X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/9172696324d1a3cd489428d16c2c8d88cf9adaca..7f11de0db9ecbb2a8f7551815285d7553747a96e:/test/python/tokenizer/sanitizers/test_clean_postcodes.py?ds=inline diff --git a/test/python/tokenizer/sanitizers/test_clean_postcodes.py b/test/python/tokenizer/sanitizers/test_clean_postcodes.py index 228c2f3a..70cc5256 100644 --- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py +++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py @@ -1,17 +1,17 @@ -# SPDX-License-Identifier: GPL-2.0-only +# SPDX-License-Identifier: GPL-3.0-or-later # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2022 by the Nominatim developer community. +# Copyright (C) 2024 by the Nominatim developer community. # For a full list of authors see the git log. """ Tests for the sanitizer that normalizes postcodes. """ import pytest -from nominatim.tokenizer.place_sanitizer import PlaceSanitizer -from nominatim.indexer.place_info import PlaceInfo -from nominatim.tools import country_info +from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer +from nominatim_db.data.place_info import PlaceInfo +from nominatim_db.data import country_info @pytest.fixture def sanitize(def_config, request): @@ -25,7 +25,7 @@ def sanitize(def_config, request): if country is not None: pi['country_code'] = country - _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi)) + _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi)) return sorted([(p.kind, p.name) for p in address]) @@ -88,3 +88,151 @@ def test_postcode_sweden_pass(sanitize, postcode): def test_postcode_sweden_fail(sanitize, postcode): assert sanitize(country='se', postcode=postcode) == [] + +@pytest.mark.parametrize("postcode", ('AD123', '123', 'AD 123', 'AD-123')) +def test_postcode_andorra_pass(sanitize, postcode): + assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')] + + +@pytest.mark.parametrize("postcode", ('AD1234', 'AD AD123', 'XX123')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_andorra_fail(sanitize, postcode): + assert sanitize(country='ad', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('AI-2640', '2640', 'AI 2640')) +def test_postcode_anguilla_pass(sanitize, postcode): + assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')] + + +@pytest.mark.parametrize("postcode", ('AI-2000', 'AI US-2640', 'AI AI-2640')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_anguilla_fail(sanitize, postcode): + assert sanitize(country='ai', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111')) +def test_postcode_brunei_pass(sanitize, postcode): + assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')] + + +@pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_brunei_fail(sanitize, postcode): + assert sanitize(country='bn', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA')) +def test_postcode_isle_of_man_pass(sanitize, postcode): + assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')] + + +@pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_isle_of_man_fail(sanitize, postcode): + assert sanitize(country='im', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA')) +def test_postcode_jersey_pass(sanitize, postcode): + assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')] + + +@pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_jersey_fail(sanitize, postcode): + assert sanitize(country='je', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('KY1-1234', '1-1234', 'KY 1-1234')) +def test_postcode_cayman_islands_pass(sanitize, postcode): + assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')] + + +@pytest.mark.parametrize("postcode", ('KY-1234', 'KZ1-1234', 'KY1 1234', 'KY1-123', 'KY KY1-1234')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_cayman_islands_fail(sanitize, postcode): + assert sanitize(country='ky', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('LC11 222', '11 222', '11222', 'LC 11 222')) +def test_postcode_saint_lucia_pass(sanitize, postcode): + assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')] + + +@pytest.mark.parametrize("postcode", ('11 2222', 'LC LC11 222')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_saint_lucia_fail(sanitize, postcode): + assert sanitize(country='lc', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('LV-1111', '1111', 'LV 1111', 'LV1111',)) +def test_postcode_latvia_pass(sanitize, postcode): + assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')] + + +@pytest.mark.parametrize("postcode", ('111', '11111', 'LV LV-1111')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_latvia_fail(sanitize, postcode): + assert sanitize(country='lv', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('MD-1111', '1111', 'MD 1111', 'MD1111')) +def test_postcode_moldova_pass(sanitize, postcode): + assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')] + + +@pytest.mark.parametrize("postcode", ("MD MD-1111", "MD MD1111", "MD MD 1111")) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_moldova_fail(sanitize, postcode): + assert sanitize(country='md', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222')) +def test_postcode_malta_pass(sanitize, postcode): + assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)] + + +@pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111')) +def test_postcode_malta_mtarfa_pass(sanitize, postcode): + assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')] + + +@pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_malta_fail(sanitize, postcode): + assert sanitize(country='mt', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('VC1111', '1111', 'VC-1111', 'VC 1111')) +def test_postcode_saint_vincent_pass(sanitize, postcode): + assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')] + + +@pytest.mark.parametrize("postcode", ('VC11', 'VC VC1111')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_saint_vincent_fail(sanitize, postcode): + assert sanitize(country='vc', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('VG1111', '1111', 'VG 1111', 'VG-1111')) +def test_postcode_virgin_islands_pass(sanitize, postcode): + assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')] + + +@pytest.mark.parametrize("postcode", ('111', '11111', 'VG VG1111')) +@pytest.mark.sanitizer_params(convert_to_address=False) +def test_postcode_virgin_islands_fail(sanitize, postcode): + assert sanitize(country='vg', postcode=postcode) == [] + + +@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44')) +@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}') +def test_postcode_default_pattern_pass(sanitize, postcode): + assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())] + + +@pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224')) +@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}') +def test_postcode_default_pattern_fail(sanitize, postcode): + assert sanitize(country='an', postcode=postcode) == []