1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for tokenized query data structures.
12 from nominatim_api.search import query
15 class MyToken(query.Token):
17 def get_category(self):
21 def mktoken(tid: int):
22 return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,
28 return query.QueryNode(query.BREAK_PHRASE, query.PHRASE_ANY, 0.0, '', '')
31 @pytest.mark.parametrize('ptype,ttype', [(query.PHRASE_ANY, 'W'),
32 (query.PHRASE_AMENITY, 'Q'),
33 (query.PHRASE_STREET, 'w'),
34 (query.PHRASE_CITY, 'W'),
35 (query.PHRASE_COUNTRY, 'C'),
36 (query.PHRASE_POSTCODE, 'P')])
37 def test_phrase_compatible(ptype, ttype):
38 assert query._phrase_compatible_with(ptype, ttype, False)
41 @pytest.mark.parametrize('ptype', [query.PHRASE_COUNTRY, query.PHRASE_POSTCODE])
42 def test_phrase_incompatible(ptype):
43 assert not query._phrase_compatible_with(ptype, query.TOKEN_PARTIAL, True)
46 def test_query_node_empty(qnode):
47 assert qnode.get_tokens(3, query.TOKEN_WORD) is None
50 def test_query_node_with_content(qnode):
51 qnode.starting.append(query.TokenList(2, query.TOKEN_PARTIAL, [mktoken(100), mktoken(101)]))
52 qnode.starting.append(query.TokenList(2, query.TOKEN_WORD, [mktoken(1000)]))
54 assert not qnode.has_tokens(3, query.TOKEN_PARTIAL)
55 assert not qnode.has_tokens(2, query.TOKEN_COUNTRY)
56 assert qnode.has_tokens(2, query.TOKEN_PARTIAL)
57 assert qnode.has_tokens(2, query.TOKEN_WORD)
59 assert qnode.get_tokens(2, query.TOKEN_COUNTRY) is None
60 assert len(qnode.get_tokens(2, query.TOKEN_PARTIAL)) == 2
61 assert len(qnode.get_tokens(2, query.TOKEN_WORD)) == 1
64 def test_query_struct_empty():
65 q = query.QueryStruct([])
67 assert q.num_token_slots() == 0
70 def test_query_struct_with_tokens():
71 q = query.QueryStruct([query.Phrase(query.PHRASE_ANY, 'foo bar')])
72 q.add_node(query.BREAK_WORD, query.PHRASE_ANY)
73 q.add_node(query.BREAK_END, query.PHRASE_ANY)
75 assert q.num_token_slots() == 2
77 q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
78 q.add_token(query.TokenRange(1, 2), query.TOKEN_PARTIAL, mktoken(2))
79 q.add_token(query.TokenRange(1, 2), query.TOKEN_WORD, mktoken(99))
80 q.add_token(query.TokenRange(1, 2), query.TOKEN_WORD, mktoken(98))
82 assert q.get_tokens(query.TokenRange(0, 2), query.TOKEN_WORD) == []
83 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_WORD)) == 2
85 partials = list(q.iter_partials(query.TokenRange(0, 2)))
87 assert len(partials) == 2
88 assert [t.token for t in partials] == [1, 2]
90 assert q.find_lookup_word_by_id(4) == 'None'
91 assert q.find_lookup_word_by_id(99) == '[W]foo'
94 def test_query_struct_incompatible_token():
95 q = query.QueryStruct([query.Phrase(query.PHRASE_COUNTRY, 'foo bar')])
96 q.add_node(query.BREAK_WORD, query.PHRASE_COUNTRY)
97 q.add_node(query.BREAK_END, query.PHRASE_ANY)
99 q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
100 q.add_token(query.TokenRange(1, 2), query.TOKEN_COUNTRY, mktoken(100))
102 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_COUNTRY)) == 1
105 def test_query_struct_amenity_single_word():
106 q = query.QueryStruct([query.Phrase(query.PHRASE_AMENITY, 'bar')])
107 q.add_node(query.BREAK_END, query.PHRASE_ANY)
109 q.add_token(query.TokenRange(0, 1), query.TOKEN_PARTIAL, mktoken(1))
110 q.add_token(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM, mktoken(2))
111 q.add_token(query.TokenRange(0, 1), query.TOKEN_QUALIFIER, mktoken(3))
113 assert q.nodes[0].partial.token == 1
114 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 1
115 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 0
118 def test_query_struct_amenity_two_words():
119 q = query.QueryStruct([query.Phrase(query.PHRASE_AMENITY, 'foo bar')])
120 q.add_node(query.BREAK_WORD, query.PHRASE_AMENITY)
121 q.add_node(query.BREAK_END, query.PHRASE_ANY)
123 for trange in [(0, 1), (1, 2)]:
124 q.add_token(query.TokenRange(*trange), query.TOKEN_PARTIAL, mktoken(1))
125 q.add_token(query.TokenRange(*trange), query.TOKEN_NEAR_ITEM, mktoken(2))
126 q.add_token(query.TokenRange(*trange), query.TOKEN_QUALIFIER, mktoken(3))
128 assert q.nodes[0].partial.token == 1
129 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_NEAR_ITEM)) == 0
130 assert len(q.get_tokens(query.TokenRange(0, 1), query.TOKEN_QUALIFIER)) == 1
132 assert q.nodes[1].partial.token == 1
133 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_NEAR_ITEM)) == 0
134 assert len(q.get_tokens(query.TokenRange(1, 2), query.TOKEN_QUALIFIER)) == 1