1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Test for creation of token assignments from tokenized queries.
12 from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token
13 from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
16 def get_category(self):
20 def make_query(*args):
21 q = QueryStruct([Phrase(args[0][1], '')])
22 dummy = MyToken(penalty=3.0, token=45, count=1, addr_count=1,
23 lookup_word='foo', is_indexed=True)
25 for btype, ptype, _ in args[1:]:
26 q.add_node(btype, ptype)
27 q.add_node(BreakType.END, PhraseType.NONE)
29 for start, t in enumerate(args):
30 for end, ttype in t[2]:
31 q.add_token(TokenRange(start, end), ttype, dummy)
36 def check_assignments(actual, *expected):
38 for assignment in actual:
39 assert assignment in todo, f"Unexpected assignment: {assignment}"
40 todo.remove(assignment)
42 assert not todo, f"Missing assignments: {expected}"
45 def test_query_with_missing_tokens():
46 q = QueryStruct([Phrase(PhraseType.NONE, '')])
47 q.add_node(BreakType.END, PhraseType.NONE)
49 assert list(yield_token_assignments(q)) == []
52 def test_one_word_query():
53 q = make_query((BreakType.START, PhraseType.NONE,
54 [(1, TokenType.PARTIAL),
56 (1, TokenType.HOUSENUMBER)]))
58 res = list(yield_token_assignments(q))
59 assert res == [TokenAssignment(name=TokenRange(0, 1))]
62 def test_single_postcode():
63 q = make_query((BreakType.START, PhraseType.NONE,
64 [(1, TokenType.POSTCODE)]))
66 res = list(yield_token_assignments(q))
67 assert res == [TokenAssignment(postcode=TokenRange(0, 1))]
70 def test_single_country_name():
71 q = make_query((BreakType.START, PhraseType.NONE,
72 [(1, TokenType.COUNTRY)]))
74 res = list(yield_token_assignments(q))
75 assert res == [TokenAssignment(country=TokenRange(0, 1))]
78 def test_single_word_poi_search():
79 q = make_query((BreakType.START, PhraseType.NONE,
80 [(1, TokenType.NEAR_ITEM),
81 (1, TokenType.QUALIFIER)]))
83 res = list(yield_token_assignments(q))
84 assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
87 @pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
88 def test_multiple_simple_words(btype):
89 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
90 (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
91 (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
93 penalty = PENALTY_TOKENCHANGE[btype]
95 check_assignments(yield_token_assignments(q),
96 TokenAssignment(name=TokenRange(0, 3)),
97 TokenAssignment(penalty=penalty, name=TokenRange(0, 2),
98 address=[TokenRange(2, 3)]),
99 TokenAssignment(penalty=penalty, name=TokenRange(0, 1),
100 address=[TokenRange(1, 3)]),
101 TokenAssignment(penalty=penalty, name=TokenRange(1, 3),
102 address=[TokenRange(0, 1)]),
103 TokenAssignment(penalty=penalty, name=TokenRange(2, 3),
104 address=[TokenRange(0, 2)])
108 def test_multiple_words_respect_phrase_break():
109 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
110 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
112 check_assignments(yield_token_assignments(q),
113 TokenAssignment(name=TokenRange(0, 1),
114 address=[TokenRange(1, 2)]),
115 TokenAssignment(name=TokenRange(1, 2),
116 address=[TokenRange(0, 1)]))
119 def test_housenumber_and_street():
120 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
121 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
123 check_assignments(yield_token_assignments(q),
124 TokenAssignment(name=TokenRange(1, 2),
125 housenumber=TokenRange(0, 1)),
126 TokenAssignment(address=[TokenRange(1, 2)],
127 housenumber=TokenRange(0, 1)))
130 def test_housenumber_and_street_backwards():
131 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
132 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
134 check_assignments(yield_token_assignments(q),
135 TokenAssignment(name=TokenRange(0, 1),
136 housenumber=TokenRange(1, 2)),
137 TokenAssignment(address=[TokenRange(0, 1)],
138 housenumber=TokenRange(1, 2)))
141 def test_housenumber_and_postcode():
142 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
143 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
144 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
145 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
147 check_assignments(yield_token_assignments(q),
148 TokenAssignment(penalty=pytest.approx(0.3),
149 name=TokenRange(0, 1),
150 housenumber=TokenRange(1, 2),
151 address=[TokenRange(2, 3)],
152 postcode=TokenRange(3, 4)),
153 TokenAssignment(penalty=pytest.approx(0.3),
154 housenumber=TokenRange(1, 2),
155 address=[TokenRange(0, 1), TokenRange(2, 3)],
156 postcode=TokenRange(3, 4)))
158 def test_postcode_and_housenumber():
159 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
160 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
161 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
162 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
164 check_assignments(yield_token_assignments(q),
165 TokenAssignment(penalty=pytest.approx(0.3),
166 name=TokenRange(2, 3),
167 housenumber=TokenRange(3, 4),
168 address=[TokenRange(0, 1)],
169 postcode=TokenRange(1, 2)),
170 TokenAssignment(penalty=pytest.approx(0.3),
171 housenumber=TokenRange(3, 4),
172 address=[TokenRange(0, 1), TokenRange(2, 3)],
173 postcode=TokenRange(1, 2)))
176 def test_country_housenumber_postcode():
177 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
178 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
179 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
180 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
182 check_assignments(yield_token_assignments(q))
185 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
186 TokenType.NEAR_ITEM, TokenType.QUALIFIER])
187 def test_housenumber_with_only_special_terms(ttype):
188 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
189 (BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
191 check_assignments(yield_token_assignments(q))
194 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
195 def test_multiple_special_tokens(ttype):
196 q = make_query((BreakType.START, PhraseType.NONE, [(1, ttype)]),
197 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
198 (BreakType.PHRASE, PhraseType.NONE, [(3, ttype)]))
200 check_assignments(yield_token_assignments(q))
203 def test_housenumber_many_phrases():
204 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
205 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
206 (BreakType.PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
207 (BreakType.PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
208 (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
210 check_assignments(yield_token_assignments(q),
211 TokenAssignment(penalty=0.1,
212 name=TokenRange(4, 5),
213 housenumber=TokenRange(3, 4),\
214 address=[TokenRange(0, 1), TokenRange(1, 2),
216 TokenAssignment(penalty=0.1,
217 housenumber=TokenRange(3, 4),\
218 address=[TokenRange(0, 1), TokenRange(1, 2),
219 TokenRange(2, 3), TokenRange(4, 5)]))
222 def test_country_at_beginning():
223 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
224 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
226 check_assignments(yield_token_assignments(q),
227 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
228 country=TokenRange(0, 1)))
231 def test_country_at_end():
232 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
233 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
235 check_assignments(yield_token_assignments(q),
236 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
237 country=TokenRange(1, 2)))
240 def test_country_in_middle():
241 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
242 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
243 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
245 check_assignments(yield_token_assignments(q))
248 def test_postcode_with_designation():
249 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
250 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
252 check_assignments(yield_token_assignments(q),
253 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
254 postcode=TokenRange(0, 1)),
255 TokenAssignment(postcode=TokenRange(0, 1),
256 address=[TokenRange(1, 2)]))
259 def test_postcode_with_designation_backwards():
260 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
261 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
263 check_assignments(yield_token_assignments(q),
264 TokenAssignment(name=TokenRange(0, 1),
265 postcode=TokenRange(1, 2)),
266 TokenAssignment(penalty=0.1, postcode=TokenRange(1, 2),
267 address=[TokenRange(0, 1)]))
270 def test_near_item_at_beginning():
271 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
272 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
274 check_assignments(yield_token_assignments(q),
275 TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
276 near_item=TokenRange(0, 1)))
279 def test_near_item_at_end():
280 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
281 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
283 check_assignments(yield_token_assignments(q),
284 TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
285 near_item=TokenRange(1, 2)))
288 def test_near_item_in_middle():
289 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
290 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
291 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
293 check_assignments(yield_token_assignments(q))
296 def test_qualifier_at_beginning():
297 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
298 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
299 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
302 check_assignments(yield_token_assignments(q),
303 TokenAssignment(penalty=0.1, name=TokenRange(1, 3),
304 qualifier=TokenRange(0, 1)),
305 TokenAssignment(penalty=0.2, name=TokenRange(1, 2),
306 qualifier=TokenRange(0, 1),
307 address=[TokenRange(2, 3)]))
310 def test_qualifier_after_name():
311 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
312 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
313 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
314 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
315 (BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
318 check_assignments(yield_token_assignments(q),
319 TokenAssignment(penalty=0.2, name=TokenRange(0, 2),
320 qualifier=TokenRange(2, 3),
321 address=[TokenRange(3, 5)]),
322 TokenAssignment(penalty=0.2, name=TokenRange(3, 5),
323 qualifier=TokenRange(2, 3),
324 address=[TokenRange(0, 2)]))
327 def test_qualifier_before_housenumber():
328 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
329 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
330 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
332 check_assignments(yield_token_assignments(q))
335 def test_qualifier_after_housenumber():
336 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
337 (BreakType.WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
338 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
340 check_assignments(yield_token_assignments(q))
343 def test_qualifier_in_middle_of_phrase():
344 q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
345 (BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
346 (BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
347 (BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
348 (BreakType.PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
350 check_assignments(yield_token_assignments(q))