1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Test for creation of token assignments from tokenized queries.
12 from nominatim_api.search.query import QueryStruct, Phrase, TokenRange, Token
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.token_assignment import (yield_token_assignments,
20 def get_category(self):
24 def make_query(*args):
25 q = QueryStruct([Phrase(args[0][1], '')])
26 dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
29 for btype, ptype, _ in args[1:]:
30 q.add_node(btype, ptype)
31 q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
33 for start, t in enumerate(args):
34 for end, ttype in t[2]:
35 q.add_token(TokenRange(start, end), ttype, dummy)
40 def check_assignments(actual, *expected):
42 for assignment in actual:
43 assert assignment in todo, f"Unexpected assignment: {assignment}"
44 todo.remove(assignment)
46 assert not todo, f"Missing assignments: {expected}"
49 def test_query_with_missing_tokens():
50 q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
51 q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
53 assert list(yield_token_assignments(q)) == []
56 def test_one_word_query():
57 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY,
58 [(1, qmod.TOKEN_PARTIAL),
60 (1, qmod.TOKEN_HOUSENUMBER)]))
62 res = list(yield_token_assignments(q))
63 assert res == [TokenAssignment(name=TokenRange(0, 1))]
66 def test_single_postcode():
67 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY,
68 [(1, qmod.TOKEN_POSTCODE)]))
70 res = list(yield_token_assignments(q))
71 assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
74 def test_single_country_name():
75 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY,
76 [(1, qmod.TOKEN_COUNTRY)]))
78 res = list(yield_token_assignments(q))
79 assert res == [TokenAssignment(country=TokenRange(0, 1))]
82 def test_single_word_poi_search():
83 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY,
84 [(1, qmod.TOKEN_NEAR_ITEM),
85 (1, qmod.TOKEN_QUALIFIER)]))
87 res = list(yield_token_assignments(q))
88 assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
91 @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN])
92 def test_multiple_simple_words(btype):
93 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
94 (btype, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
95 (btype, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
97 penalty = PENALTY_TOKENCHANGE[btype]
99 check_assignments(yield_token_assignments(q),
100 TokenAssignment(name=TokenRange(0, 3)),
101 TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
102 address=[TokenRange(2, 3)]),
103 TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
104 address=[TokenRange(1, 3)]),
105 TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
106 address=[TokenRange(0, 1)]),
107 TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
108 address=[TokenRange(0, 2)]))
111 def test_multiple_words_respect_phrase_break():
112 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
113 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]))
115 check_assignments(yield_token_assignments(q),
116 TokenAssignment(name=TokenRange(0, 1),
117 address=[TokenRange(1, 2)]),
118 TokenAssignment(name=TokenRange(1, 2),
119 address=[TokenRange(0, 1)]))
122 def test_housenumber_and_street():
123 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]),
124 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]))
126 check_assignments(yield_token_assignments(q),
127 TokenAssignment(name=TokenRange(1, 2),
128 housenumber=TokenRange(0, 1)),
129 TokenAssignment(address=[TokenRange(1, 2)],
130 housenumber=TokenRange(0, 1)))
133 def test_housenumber_and_street_backwards():
134 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
135 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)]))
137 check_assignments(yield_token_assignments(q),
138 TokenAssignment(name=TokenRange(0, 1),
139 housenumber=TokenRange(1, 2)),
140 TokenAssignment(address=[TokenRange(0, 1)],
141 housenumber=TokenRange(1, 2)))
144 def test_housenumber_and_postcode():
145 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
146 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)]),
147 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]),
148 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_POSTCODE)]))
150 check_assignments(yield_token_assignments(q),
151 TokenAssignment(penalty=pytest.approx(0.3),
152 name=TokenRange(0, 1),
153 housenumber=TokenRange(1, 2),
154 address=[TokenRange(2, 3)],
155 postcode=TokenRange(3, 4)),
156 TokenAssignment(penalty=pytest.approx(0.3),
157 housenumber=TokenRange(1, 2),
158 address=[TokenRange(0, 1), TokenRange(2, 3)],
159 postcode=TokenRange(3, 4)))
162 def test_postcode_and_housenumber():
163 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
164 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_POSTCODE)]),
165 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]),
166 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_HOUSENUMBER)]))
168 check_assignments(yield_token_assignments(q),
169 TokenAssignment(penalty=pytest.approx(0.3),
170 name=TokenRange(2, 3),
171 housenumber=TokenRange(3, 4),
172 address=[TokenRange(0, 1)],
173 postcode=TokenRange(1, 2)),
174 TokenAssignment(penalty=pytest.approx(0.3),
175 housenumber=TokenRange(3, 4),
176 address=[TokenRange(0, 1), TokenRange(2, 3)],
177 postcode=TokenRange(1, 2)))
180 def test_country_housenumber_postcode():
181 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_COUNTRY)]),
182 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
183 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_HOUSENUMBER)]),
184 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_POSTCODE)]))
186 check_assignments(yield_token_assignments(q))
189 @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_COUNTRY,
190 qmod.TOKEN_NEAR_ITEM, qmod.TOKEN_QUALIFIER])
191 def test_housenumber_with_only_special_terms(ttype):
192 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]),
193 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, ttype)]))
195 check_assignments(yield_token_assignments(q))
198 @pytest.mark.parametrize('ttype', [qmod.TOKEN_POSTCODE, qmod.TOKEN_HOUSENUMBER, qmod.TOKEN_COUNTRY])
199 def test_multiple_special_tokens(ttype):
200 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, ttype)]),
201 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
202 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(3, ttype)]))
204 check_assignments(yield_token_assignments(q))
207 def test_housenumber_many_phrases():
208 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
209 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
210 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]),
211 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(4, qmod.TOKEN_HOUSENUMBER)]),
212 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)]))
214 check_assignments(yield_token_assignments(q),
215 TokenAssignment(penalty=0.1,
216 name=TokenRange(4, 5),
217 housenumber=TokenRange(3, 4),
218 address=[TokenRange(0, 1), TokenRange(1, 2),
220 TokenAssignment(penalty=0.1,
221 housenumber=TokenRange(3, 4),
222 address=[TokenRange(0, 1), TokenRange(1, 2),
223 TokenRange(2, 3), TokenRange(4, 5)]))
226 def test_country_at_beginning():
227 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_COUNTRY)]),
228 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]))
230 check_assignments(yield_token_assignments(q),
231 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
232 country=TokenRange(0, 1)))
235 def test_country_at_end():
236 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
237 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_COUNTRY)]))
239 check_assignments(yield_token_assignments(q),
240 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
241 country=TokenRange(1, 2)))
244 def test_country_in_middle():
245 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
246 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_COUNTRY)]),
247 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
249 check_assignments(yield_token_assignments(q))
252 def test_postcode_with_designation():
253 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_POSTCODE)]),
254 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]))
256 check_assignments(yield_token_assignments(q),
257 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
258 postcode=TokenRange(0, 1)),
259 TokenAssignment(postcode=TokenRange(0, 1),
260 address=[TokenRange(1, 2)]))
263 def test_postcode_with_designation_backwards():
264 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
265 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_POSTCODE)]))
267 check_assignments(yield_token_assignments(q),
268 TokenAssignment(name=TokenRange(0, 1),
269 postcode=TokenRange(1, 2)),
270 TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
271 address=[TokenRange(0, 1)]))
274 def test_near_item_at_beginning():
275 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_NEAR_ITEM)]),
276 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]))
278 check_assignments(yield_token_assignments(q),
279 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
280 near_item=TokenRange(0, 1)))
283 def test_near_item_at_end():
284 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
285 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_NEAR_ITEM)]))
287 check_assignments(yield_token_assignments(q),
288 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
289 near_item=TokenRange(1, 2)))
292 def test_near_item_in_middle():
293 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
294 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_NEAR_ITEM)]),
295 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
297 check_assignments(yield_token_assignments(q))
300 def test_qualifier_at_beginning():
301 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_QUALIFIER)]),
302 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
303 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
305 check_assignments(yield_token_assignments(q),
306 TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
307 qualifier=TokenRange(0, 1)),
308 TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
309 qualifier=TokenRange(0, 1),
310 address=[TokenRange(2, 3)]))
313 def test_qualifier_after_name():
314 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
315 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
316 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_QUALIFIER)]),
317 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_PARTIAL)]),
318 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)]))
320 check_assignments(yield_token_assignments(q),
321 TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
322 qualifier=TokenRange(2, 3),
323 address=[TokenRange(3, 5)]),
324 TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
325 qualifier=TokenRange(2, 3),
326 address=[TokenRange(0, 2)]))
329 def test_qualifier_before_housenumber():
330 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_QUALIFIER)]),
331 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_HOUSENUMBER)]),
332 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
334 check_assignments(yield_token_assignments(q))
337 def test_qualifier_after_housenumber():
338 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_HOUSENUMBER)]),
339 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(2, qmod.TOKEN_QUALIFIER)]),
340 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_PARTIAL)]))
342 check_assignments(yield_token_assignments(q))
345 def test_qualifier_in_middle_of_phrase():
346 q = make_query((qmod.BREAK_START, qmod.PHRASE_ANY, [(1, qmod.TOKEN_PARTIAL)]),
347 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(2, qmod.TOKEN_PARTIAL)]),
348 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(3, qmod.TOKEN_QUALIFIER)]),
349 (qmod.BREAK_WORD, qmod.PHRASE_ANY, [(4, qmod.TOKEN_PARTIAL)]),
350 (qmod.BREAK_PHRASE, qmod.PHRASE_ANY, [(5, qmod.TOKEN_PARTIAL)]))
352 check_assignments(yield_token_assignments(q))