2 Tests for the sanitizer that splitts multivalue lists.
6 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
7 from nominatim.indexer.place_info import PlaceInfo
9 from nominatim.errors import UsageError
11 def run_sanitizer_on(**kwargs):
12 place = PlaceInfo({'name': kwargs})
13 name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
15 return sorted([(p.name, p.kind, p.suffix) for p in name])
18 def sanitize_with_delimiter(delimiter, name):
19 place = PlaceInfo({'name': {'name': name}})
20 san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
21 name, _ = san.process_names(place)
23 return sorted([p.name for p in name])
27 assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
28 assert run_sanitizer_on(name='') == [('', 'name', None)]
32 assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
35 assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
36 ('boat', 'short_name', None)]
39 def test_empty_fields():
40 assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
42 assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
44 assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
45 assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
48 def test_custom_delimiters():
49 assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
50 assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
51 assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
52 assert sanitize_with_delimiter(' ', 'morning sun') == ['morning', 'sun']
55 def test_empty_delimiter_set():
56 with pytest.raises(UsageError):
57 sanitize_with_delimiter('', 'abc')
60 def test_no_name_list():
61 place = PlaceInfo({'address': {'housenumber': '3'}})
62 name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
65 assert len(address) == 1