1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Test for creation of token assignments from tokenized queries.
12 from nominatim.api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token
13 from nominatim.api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
16 def get_category(self):
20 def make_query(*args):
21 q = QueryStruct([Phrase(args[0][1], '')])
22 dummy = MyToken(3.0, 45, 1, 'foo', True)
24 for btype, ptype, _ in args[1:]:
25 q.add_node(btype, ptype)
26 q.add_node(BreakType.END, PhraseType.NONE)
28 for start, t in enumerate(args):
29 for end, ttype in t[2]:
30 q.add_token(TokenRange(start, end), ttype, dummy)
35 def check_assignments(actual, *expected):
37 for assignment in actual:
38 assert assignment in todo, f"Unexpected assignment: {assignment}"
39 todo.remove(assignment)
41 assert not todo, f"Missing assignments: {expected}"
44 def test_query_with_missing_tokens():
45 q = QueryStruct([Phrase(PhraseType.NONE, '')])
46 q.add_node(BreakType.END, PhraseType.NONE)
48 assert list(yield_token_assignments(q)) == []
51 def test_one_word_query():
52 q = make_query((BreakType.START, PhraseType.NONE,
53 [(1, TokenType.PARTIAL),
55 (1, TokenType.HOUSENUMBER)]))
57 res = list(yield_token_assignments(q))
58 assert res == [TokenAssignment(name=TokenRange(0, 1))]
61 def test_single_postcode():
62 q = make_query((BreakType.START, PhraseType.NONE,
63 [(1, TokenType.POSTCODE)]))
65 res = list(yield_token_assignments(q))
66 assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
69 def test_single_country_name():
70 q = make_query((BreakType.START, PhraseType.NONE,
71 [(1, TokenType.COUNTRY)]))
73 res = list(yield_token_assignments(q))
74 assert res == [TokenAssignment(country=TokenRange(0, 1))]
77 def test_single_word_poi_search():
78 q = make_query((BreakType.START, PhraseType.NONE,
79 [(1, TokenType.NEAR_ITEM),
80 (1, TokenType.QUALIFIER)]))
82 res = list(yield_token_assignments(q))
83 assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
86 @pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
87 def test_multiple_simple_words(btype):
88 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
89 (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
90 (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
92 penalty = PENALTY_TOKENCHANGE[btype]
94 check_assignments(yield_token_assignments(q),
95 TokenAssignment(name=TokenRange(0, 3)),
96 TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
97 address=[TokenRange(2, 3)]),
98 TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
99 address=[TokenRange(1, 3)]),
100 TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
101 address=[TokenRange(0, 1)]),
102 TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
103 address=[TokenRange(0, 2)])
107 def test_multiple_words_respect_phrase_break():
108 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
109 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
111 check_assignments(yield_token_assignments(q),
112 TokenAssignment(name=TokenRange(0, 1),
113 address=[TokenRange(1, 2)]),
114 TokenAssignment(name=TokenRange(1, 2),
115 address=[TokenRange(0, 1)]))
118 def test_housenumber_and_street():
119 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
120 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
122 check_assignments(yield_token_assignments(q),
123 TokenAssignment(name=TokenRange(1, 2),
124 housenumber=TokenRange(0, 1)),
125 TokenAssignment(address=[TokenRange(1, 2)],
126 housenumber=TokenRange(0, 1)))
129 def test_housenumber_and_street_backwards():
130 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
131 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
133 check_assignments(yield_token_assignments(q),
134 TokenAssignment(name=TokenRange(0, 1),
135 housenumber=TokenRange(1, 2)),
136 TokenAssignment(address=[TokenRange(0, 1)],
137 housenumber=TokenRange(1, 2)))
140 def test_housenumber_and_postcode():
141 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
142 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
143 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
144 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
146 check_assignments(yield_token_assignments(q),
147 TokenAssignment(penalty=pytest.approx(0.3),
148 name=TokenRange(0, 1),
149 housenumber=TokenRange(1, 2),
150 address=[TokenRange(2, 3)],
151 postcode=TokenRange(3, 4)),
152 TokenAssignment(penalty=pytest.approx(0.3),
153 housenumber=TokenRange(1, 2),
154 address=[TokenRange(0, 1), TokenRange(2, 3)],
155 postcode=TokenRange(3, 4)))
157 def test_postcode_and_housenumber():
158 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
159 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
160 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
161 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
163 check_assignments(yield_token_assignments(q),
164 TokenAssignment(penalty=pytest.approx(0.3),
165 name=TokenRange(2, 3),
166 housenumber=TokenRange(3, 4),
167 address=[TokenRange(0, 1)],
168 postcode=TokenRange(1, 2)),
169 TokenAssignment(penalty=pytest.approx(0.3),
170 housenumber=TokenRange(3, 4),
171 address=[TokenRange(0, 1), TokenRange(2, 3)],
172 postcode=TokenRange(1, 2)))
175 def test_country_housenumber_postcode():
176 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
177 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
178 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
179 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
181 check_assignments(yield_token_assignments(q))
184 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
185 TokenType.NEAR_ITEM, TokenType.QUALIFIER])
186 def test_housenumber_with_only_special_terms(ttype):
187 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
188 (BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
190 check_assignments(yield_token_assignments(q))
193 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
194 def test_multiple_special_tokens(ttype):
195 q = make_query((BreakType.START, PhraseType.NONE, [(1, ttype)]),
196 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
197 (BreakType.PHRASE, PhraseType.NONE, [(3, ttype)]))
199 check_assignments(yield_token_assignments(q))
202 def test_housenumber_many_phrases():
203 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
204 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
205 (BreakType.PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
206 (BreakType.PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
207 (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
209 check_assignments(yield_token_assignments(q),
210 TokenAssignment(penalty=0.1,
211 name=TokenRange(4, 5),
212 housenumber=TokenRange(3, 4),\
213 address=[TokenRange(0, 1), TokenRange(1, 2),
215 TokenAssignment(penalty=0.1,
216 housenumber=TokenRange(3, 4),\
217 address=[TokenRange(0, 1), TokenRange(1, 2),
218 TokenRange(2, 3), TokenRange(4, 5)]))
221 def test_country_at_beginning():
222 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
223 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
225 check_assignments(yield_token_assignments(q),
226 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
227 country=TokenRange(0, 1)))
230 def test_country_at_end():
231 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
232 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
234 check_assignments(yield_token_assignments(q),
235 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
236 country=TokenRange(1, 2)))
239 def test_country_in_middle():
240 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
241 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
242 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
244 check_assignments(yield_token_assignments(q))
247 def test_postcode_with_designation():
248 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
249 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
251 check_assignments(yield_token_assignments(q),
252 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
253 postcode=TokenRange(0, 1)),
254 TokenAssignment(postcode=TokenRange(0, 1),
255 address=[TokenRange(1, 2)]))
258 def test_postcode_with_designation_backwards():
259 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
260 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
262 check_assignments(yield_token_assignments(q),
263 TokenAssignment(name=TokenRange(0, 1),
264 postcode=TokenRange(1, 2)),
265 TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
266 address=[TokenRange(0, 1)]))
269 def test_near_item_at_beginning():
270 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
271 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
273 check_assignments(yield_token_assignments(q),
274 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
275 near_item=TokenRange(0, 1)))
278 def test_near_item_at_end():
279 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
280 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
282 check_assignments(yield_token_assignments(q),
283 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
284 near_item=TokenRange(1, 2)))
287 def test_near_item_in_middle():
288 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
289 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
290 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
292 check_assignments(yield_token_assignments(q))
295 def test_qualifier_at_beginning():
296 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
297 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
298 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
301 check_assignments(yield_token_assignments(q),
302 TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
303 qualifier=TokenRange(0, 1)),
304 TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
305 qualifier=TokenRange(0, 1),
306 address=[TokenRange(2, 3)]))
309 def test_qualifier_after_name():
310 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
311 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
312 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
313 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
314 (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
317 check_assignments(yield_token_assignments(q),
318 TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
319 qualifier=TokenRange(2, 3),
320 address=[TokenRange(3, 5)]),
321 TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
322 qualifier=TokenRange(2, 3),
323 address=[TokenRange(0, 2)]))
326 def test_qualifier_before_housenumber():
327 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
328 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
329 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
331 check_assignments(yield_token_assignments(q))
334 def test_qualifier_after_housenumber():
335 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
336 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
337 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
339 check_assignments(yield_token_assignments(q))
342 def test_qualifier_in_middle_of_phrase():
343 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
344 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
345 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
346 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
347 (BreakType.PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
349 check_assignments(yield_token_assignments(q))