]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/sanitizers/test_split_name_list.py
skip most addr: tags with suffixes
[nominatim.git] / test / python / tokenizer / sanitizers / test_split_name_list.py
1 """
2 Tests for the sanitizer that splitts multivalue lists.
3 """
4 import pytest
5
6 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
7 from nominatim.indexer.place_info import PlaceInfo
8
9 from nominatim.errors import UsageError
10
11 def run_sanitizer_on(**kwargs):
12     place = PlaceInfo({'name': kwargs})
13     name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
14
15     return sorted([(p.name, p.kind, p.suffix) for p in name])
16
17
18 def sanitize_with_delimiter(delimiter, name):
19     place = PlaceInfo({'name': {'name': name}})
20     san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
21     name, _ = san.process_names(place)
22
23     return sorted([p.name for p in name])
24
25
26 def test_simple():
27     assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
28     assert run_sanitizer_on(name='') == [('', 'name', None)]
29
30
31 def test_splits():
32     assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
33                                               ('B', 'name', None),
34                                               ('C', 'name', None)]
35     assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
36                                                             ('boat', 'short_name', None)]
37
38
39 def test_empty_fields():
40     assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
41                                              ('B', 'name', None)]
42     assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
43                                               ('B', 'name', None)]
44     assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
45     assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
46
47
48 def test_custom_delimiters():
49     assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
50     assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
51     assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
52     assert sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
53
54
55 def test_empty_delimiter_set():
56     with pytest.raises(UsageError):
57         sanitize_with_delimiter('', 'abc')
58
59
60 def test_no_name_list():
61     place = PlaceInfo({'address': {'housenumber': '3'}})
62     name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
63
64     assert not name
65     assert len(address) == 1