]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/tokenizer/sanitizers/test_clean_postcodes.py
add query analyser for legacy tokenizer
[nominatim.git] / test / python / tokenizer / sanitizers / test_clean_postcodes.py
index d6371e075ba52c448a097780d7c19be042de1758..f2c965ad9b1db0017864b5bbaec1677023b1d838 100644 (file)
@@ -10,8 +10,8 @@ Tests for the sanitizer that normalizes postcodes.
 import pytest
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
-from nominatim.indexer.place_info import PlaceInfo
-from nominatim.tools import country_info
+from nominatim.data.place_info import PlaceInfo
+from nominatim.data import country_info
 
 @pytest.fixture
 def sanitize(def_config, request):
@@ -25,7 +25,7 @@ def sanitize(def_config, request):
         if country is not None:
             pi['country_code'] = country
 
-        _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
+        _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi))
 
         return sorted([(p.kind, p.name) for p in address])
 
@@ -43,12 +43,14 @@ def test_postcode_no_country_drop(sanitize, country):
     assert sanitize(country=country, postcode='23231') == []
 
 
-@pytest.mark.parametrize("postcode", ('12345', '  34009  '))
+@pytest.mark.parametrize("postcode", ('12345', '  12345  ', 'de 12345',
+                                      'DE12345', 'DE 12345', 'DE-12345'))
 def test_postcode_pass_good_format(sanitize, postcode):
-    assert sanitize(country='de', postcode=postcode) == [('postcode', postcode.strip())]
+    assert sanitize(country='de', postcode=postcode) == [('postcode', '12345')]
 
 
-@pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....'))
+@pytest.mark.parametrize("postcode", ('123456', '', '   ', '.....',
+                                      'DE  12345', 'DEF12345', 'CH 12345'))
 @pytest.mark.sanitizer_params(convert_to_address=False)
 def test_postcode_drop_bad_format(sanitize, postcode):
     assert sanitize(country='de', postcode=postcode) == []
@@ -75,3 +77,26 @@ def test_postcode_kazakhstan_pass(sanitize, postcode):
 def test_postcode_kazakhstan_fail(sanitize, postcode):
     assert sanitize(country='kz', postcode=postcode) == []
 
+
+@pytest.mark.parametrize("postcode", ('675 34', '67534', 'SE-675 34', 'SE67534'))
+def test_postcode_sweden_pass(sanitize, postcode):
+    assert sanitize(country='se', postcode=postcode) == [('postcode', '675 34')]
+
+
+@pytest.mark.parametrize("postcode", ('67 345', '671123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_sweden_fail(sanitize, postcode):
+    assert sanitize(country='se', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
+@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
+def test_postcode_default_pattern_pass(sanitize, postcode):
+    assert sanitize(country='an', postcode=postcode) == [('postcode', postcode.upper())]
+
+
+@pytest.mark.parametrize("postcode", ('C', '12', 'ABC123DEF 456', '1234,5678', '11223;11224'))
+@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
+def test_postcode_default_pattern_fail(sanitize, postcode):
+    assert sanitize(country='an', postcode=postcode) == []
+