1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for tokenized query data structures.
12 from nominatim_api.search import query
14 class MyToken(query.Token):
16 def get_category(self):
20 def mktoken(tid: int):
21 return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,
25 @pytest.mark.parametrize('ptype,ttype', [('NONE', 'WORD'),
26 ('AMENITY', 'QUALIFIER'),
27 ('STREET', 'PARTIAL'),
29 ('COUNTRY', 'COUNTRY'),
30 ('POSTCODE', 'POSTCODE')])
31 def test_phrase_compatible(ptype, ttype):
32 assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False)
35 @pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
36 def test_phrase_incompatible(ptype):
37 assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True)
40 def test_query_node_empty():
41 qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE)
43 assert not qn.has_tokens(3, query.TokenType.PARTIAL)
44 assert qn.get_tokens(3, query.TokenType.WORD) is None
47 def test_query_node_with_content():
48 qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE)
49 qn.starting.append(query.TokenList(2, query.TokenType.PARTIAL, [mktoken(100), mktoken(101)]))
50 qn.starting.append(query.TokenList(2, query.TokenType.WORD, [mktoken(1000)]))
52 assert not qn.has_tokens(3, query.TokenType.PARTIAL)
53 assert not qn.has_tokens(2, query.TokenType.COUNTRY)
54 assert qn.has_tokens(2, query.TokenType.PARTIAL)
55 assert qn.has_tokens(2, query.TokenType.WORD)
57 assert qn.get_tokens(3, query.TokenType.PARTIAL) is None
58 assert qn.get_tokens(2, query.TokenType.COUNTRY) is None
59 assert len(qn.get_tokens(2, query.TokenType.PARTIAL)) == 2
60 assert len(qn.get_tokens(2, query.TokenType.WORD)) == 1
63 def test_query_struct_empty():
64 q = query.QueryStruct([])
66 assert q.num_token_slots() == 0
69 def test_query_struct_with_tokens():
70 q = query.QueryStruct([query.Phrase(query.PhraseType.NONE, 'foo bar')])
71 q.add_node(query.BreakType.WORD, query.PhraseType.NONE)
72 q.add_node(query.BreakType.END, query.PhraseType.NONE)
74 assert q.num_token_slots() == 2
76 q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
77 q.add_token(query.TokenRange(1, 2), query.TokenType.PARTIAL, mktoken(2))
78 q.add_token(query.TokenRange(1, 2), query.TokenType.WORD, mktoken(99))
79 q.add_token(query.TokenRange(1, 2), query.TokenType.WORD, mktoken(98))
81 assert q.get_tokens(query.TokenRange(0, 2), query.TokenType.WORD) == []
82 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.WORD)) == 2
84 partials = q.get_partials_list(query.TokenRange(0, 2))
86 assert len(partials) == 2
87 assert [t.token for t in partials] == [1, 2]
89 assert q.find_lookup_word_by_id(4) == 'None'
90 assert q.find_lookup_word_by_id(99) == '[W]foo'
93 def test_query_struct_incompatible_token():
94 q = query.QueryStruct([query.Phrase(query.PhraseType.COUNTRY, 'foo bar')])
95 q.add_node(query.BreakType.WORD, query.PhraseType.COUNTRY)
96 q.add_node(query.BreakType.END, query.PhraseType.NONE)
98 q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
99 q.add_token(query.TokenRange(1, 2), query.TokenType.COUNTRY, mktoken(100))
101 assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == []
102 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1
105 def test_query_struct_amenity_single_word():
106 q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
107 q.add_node(query.BreakType.END, query.PhraseType.NONE)
109 q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
110 q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
111 q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
113 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
114 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1
115 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
118 def test_query_struct_amenity_two_words():
119 q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
120 q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY)
121 q.add_node(query.BreakType.END, query.PhraseType.NONE)
123 for trange in [(0, 1), (1, 2)]:
124 q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
125 q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2))
126 q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
128 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
129 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0
130 assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
132 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
133 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0
134 assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1