]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/sanitizers/test_split_name_list.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / test / python / tokenizer / sanitizers / test_split_name_list.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for the sanitizer that splits multivalue lists.
9 """
10 import pytest
11
12 from nominatim_db.tokenizer.place_sanitizer import PlaceSanitizer
13 from nominatim_db.data.place_info import PlaceInfo
14
15 from nominatim_db.errors import UsageError
16
17
18 class TestSplitName:
19
20     @pytest.fixture(autouse=True)
21     def setup_country(self, def_config):
22         self.config = def_config
23
24     def run_sanitizer_on(self, **kwargs):
25         place = PlaceInfo({'name': kwargs})
26         name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place)
27
28         return sorted([(p.name, p.kind, p.suffix) for p in name])
29
30     def sanitize_with_delimiter(self, delimiter, name):
31         place = PlaceInfo({'name': {'name': name}})
32         san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}],
33                              self.config)
34         name, _ = san.process_names(place)
35
36         return sorted([p.name for p in name])
37
38     def test_simple(self):
39         assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
40         assert self.run_sanitizer_on(name='') == [('', 'name', None)]
41
42     def test_splits(self):
43         assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
44                                                        ('B', 'name', None),
45                                                        ('C', 'name', None)]
46         assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
47                                                                      ('boat', 'short_name', None)]
48
49     def test_empty_fields(self):
50         assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None),
51                                                       ('B', 'name', None)]
52         assert self.run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
53                                                        ('B', 'name', None)]
54         assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
55         assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)]
56
57     def test_custom_delimiters(self):
58         assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
59         assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
60         assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
61         assert self.sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
62
63     def test_empty_delimiter_set(self):
64         with pytest.raises(UsageError):
65             self.sanitize_with_delimiter('', 'abc')
66
67
68 def test_no_name_list(def_config):
69     place = PlaceInfo({'address': {'housenumber': '3'}})
70     name, address = PlaceSanitizer([{'step': 'split-name-list'}], def_config).process_names(place)
71
72     assert not name
73     assert len(address) == 1