]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/sanitizers/test_strip_brace_terms.py
skip most addr: tags with suffixes
[nominatim.git] / test / python / tokenizer / sanitizers / test_strip_brace_terms.py
1 """
2 Tests for the sanitizer that handles braced suffixes.
3 """
4 import pytest
5
6 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
7 from nominatim.indexer.place_info import PlaceInfo
8
9 def run_sanitizer_on(**kwargs):
10     place = PlaceInfo({'name': kwargs})
11     name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
12
13     return sorted([(p.name, p.kind, p.suffix) for p in name])
14
15
16 def test_no_braces():
17     assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
18                                                       ('foo', 'name', None)]
19
20
21 def test_simple_braces():
22     assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
23       == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
24     assert run_sanitizer_on(name='ack ( bar')\
25       == [('ack', 'name', None), ('ack ( bar', 'name', None)]
26
27
28 def test_only_braces():
29     assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
30
31
32 def test_double_braces():
33     assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
34                                                ('a((b))', 'name', None)]
35     assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
36                                                   ('a (b) (c)', 'name', None)]
37
38
39 def test_no_names():
40     place = PlaceInfo({'address': {'housenumber': '3'}})
41     name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
42
43     assert not name
44     assert len(address) == 1