1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for creating abstract searches from token assignments.
12 from nominatim_api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
13 from nominatim_api.search.db_search_builder import SearchBuilder
14 from nominatim_api.search.token_assignment import TokenAssignment
15 from nominatim_api.types import SearchDetails
16 import nominatim_api.search.db_searches as dbs
19 def get_category(self):
23 def make_query(*args):
24 q = QueryStruct([Phrase(PhraseType.NONE, '')])
26 for _ in range(max(inner[0] for tlist in args for inner in tlist)):
27 q.add_node(BreakType.WORD, PhraseType.NONE)
28 q.add_node(BreakType.END, PhraseType.NONE)
30 for start, tlist in enumerate(args):
31 for end, ttype, tinfo in tlist:
32 for tid, word in tinfo:
33 q.add_token(TokenRange(start, end), ttype,
34 MyToken(penalty=0.5 if ttype == TokenType.PARTIAL else 0.0,
35 token=tid, count=1, addr_count=1,
36 lookup_word=word, is_indexed=True))
42 def test_country_search():
43 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
44 builder = SearchBuilder(q, SearchDetails())
46 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
48 assert len(searches) == 1
52 assert isinstance(search, dbs.CountrySearch)
53 assert set(search.countries.values) == {'de', 'en'}
56 def test_country_search_with_country_restriction():
57 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
58 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
60 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
62 assert len(searches) == 1
66 assert isinstance(search, dbs.CountrySearch)
67 assert set(search.countries.values) == {'en'}
70 def test_country_search_with_conflicting_country_restriction():
71 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
72 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
74 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
76 assert len(searches) == 0
79 def test_postcode_search_simple():
80 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])])
81 builder = SearchBuilder(q, SearchDetails())
83 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
85 assert len(searches) == 1
88 assert isinstance(search, dbs.PostcodeSearch)
89 assert search.postcodes.values == ['2367']
90 assert not search.countries.values
91 assert not search.lookups
92 assert not search.rankings
95 def test_postcode_with_country():
96 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
97 [(2, TokenType.COUNTRY, [(1, 'xx')])])
98 builder = SearchBuilder(q, SearchDetails())
100 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
101 country=TokenRange(1, 2))))
103 assert len(searches) == 1
106 assert isinstance(search, dbs.PostcodeSearch)
107 assert search.postcodes.values == ['2367']
108 assert search.countries.values == ['xx']
109 assert not search.lookups
110 assert not search.rankings
113 def test_postcode_with_address():
114 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
115 [(2, TokenType.PARTIAL, [(100, 'word')])])
116 builder = SearchBuilder(q, SearchDetails())
118 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
119 address=[TokenRange(1, 2)])))
121 assert len(searches) == 1
124 assert isinstance(search, dbs.PostcodeSearch)
125 assert search.postcodes.values == ['2367']
126 assert not search.countries
127 assert search.lookups
128 assert not search.rankings
131 def test_postcode_with_address_with_full_word():
132 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
133 [(2, TokenType.PARTIAL, [(100, 'word')]),
134 (2, TokenType.WORD, [(1, 'full')])])
135 builder = SearchBuilder(q, SearchDetails())
137 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
138 address=[TokenRange(1, 2)])))
140 assert len(searches) == 1
143 assert isinstance(search, dbs.PostcodeSearch)
144 assert search.postcodes.values == ['2367']
145 assert not search.countries
146 assert search.lookups
147 assert len(search.rankings) == 1
150 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
152 def test_near_item_only(kwargs):
153 q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
154 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
156 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
158 assert len(searches) == 1
162 assert isinstance(search, dbs.PoiSearch)
163 assert search.qualifiers.values == [('this', 'that')]
166 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
168 def test_near_item_skipped(kwargs):
169 q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
170 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
172 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
174 assert len(searches) == 0
177 def test_name_only_search():
178 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
179 (1, TokenType.WORD, [(100, 'a')])])
180 builder = SearchBuilder(q, SearchDetails())
182 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
184 assert len(searches) == 1
187 assert isinstance(search, dbs.PlaceSearch)
188 assert not search.postcodes.values
189 assert not search.countries.values
190 assert not search.housenumbers.values
191 assert not search.qualifiers.values
192 assert len(search.lookups) == 1
193 assert len(search.rankings) == 1
196 def test_name_with_qualifier():
197 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
198 (1, TokenType.WORD, [(100, 'a')])],
199 [(2, TokenType.QUALIFIER, [(55, 'hotel')])])
200 builder = SearchBuilder(q, SearchDetails())
202 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
203 qualifier=TokenRange(1, 2))))
205 assert len(searches) == 1
208 assert isinstance(search, dbs.PlaceSearch)
209 assert not search.postcodes.values
210 assert not search.countries.values
211 assert not search.housenumbers.values
212 assert search.qualifiers.values == [('this', 'that')]
213 assert len(search.lookups) == 1
214 assert len(search.rankings) == 1
217 def test_name_with_housenumber_search():
218 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
219 (1, TokenType.WORD, [(100, 'a')])],
220 [(2, TokenType.HOUSENUMBER, [(66, '66')])])
221 builder = SearchBuilder(q, SearchDetails())
223 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
224 housenumber=TokenRange(1, 2))))
226 assert len(searches) == 1
229 assert isinstance(search, dbs.PlaceSearch)
230 assert not search.postcodes.values
231 assert not search.countries.values
232 assert search.housenumbers.values == ['66']
233 assert len(search.lookups) == 1
234 assert len(search.rankings) == 1
237 def test_name_and_address():
238 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
239 (1, TokenType.WORD, [(100, 'a')])],
240 [(2, TokenType.PARTIAL, [(2, 'b')]),
241 (2, TokenType.WORD, [(101, 'b')])],
242 [(3, TokenType.PARTIAL, [(3, 'c')]),
243 (3, TokenType.WORD, [(102, 'c')])]
245 builder = SearchBuilder(q, SearchDetails())
247 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
248 address=[TokenRange(1, 2),
251 assert len(searches) == 1
254 assert isinstance(search, dbs.PlaceSearch)
255 assert not search.postcodes.values
256 assert not search.countries.values
257 assert not search.housenumbers.values
258 assert len(search.lookups) == 2
259 assert len(search.rankings) == 3
262 def test_name_and_complex_address():
263 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
264 (1, TokenType.WORD, [(100, 'a')])],
265 [(2, TokenType.PARTIAL, [(2, 'b')]),
266 (3, TokenType.WORD, [(101, 'bc')])],
267 [(3, TokenType.PARTIAL, [(3, 'c')])],
268 [(4, TokenType.PARTIAL, [(4, 'd')]),
269 (4, TokenType.WORD, [(103, 'd')])]
271 builder = SearchBuilder(q, SearchDetails())
273 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
274 address=[TokenRange(1, 2),
277 assert len(searches) == 1
280 assert isinstance(search, dbs.PlaceSearch)
281 assert not search.postcodes.values
282 assert not search.countries.values
283 assert not search.housenumbers.values
284 assert len(search.lookups) == 2
285 assert len(search.rankings) == 2
288 def test_name_only_near_search():
289 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
290 [(2, TokenType.PARTIAL, [(1, 'a')]),
291 (2, TokenType.WORD, [(100, 'a')])])
292 builder = SearchBuilder(q, SearchDetails())
294 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
295 near_item=TokenRange(0, 1))))
297 assert len(searches) == 1
300 assert isinstance(search, dbs.NearSearch)
301 assert isinstance(search.search, dbs.PlaceSearch)
304 def test_name_only_search_with_category():
305 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
306 (1, TokenType.WORD, [(100, 'a')])])
307 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
309 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
311 assert len(searches) == 1
314 assert isinstance(search, dbs.PlaceSearch)
315 assert search.qualifiers.values == [('foo', 'bar')]
318 def test_name_with_near_item_search_with_category_mismatch():
319 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
320 [(2, TokenType.PARTIAL, [(1, 'a')]),
321 (2, TokenType.WORD, [(100, 'a')])])
322 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
324 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
325 near_item=TokenRange(0, 1))))
327 assert len(searches) == 0
330 def test_name_with_near_item_search_with_category_match():
331 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
332 [(2, TokenType.PARTIAL, [(1, 'a')]),
333 (2, TokenType.WORD, [(100, 'a')])])
334 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
337 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
338 near_item=TokenRange(0, 1))))
340 assert len(searches) == 1
343 assert isinstance(search, dbs.NearSearch)
344 assert isinstance(search.search, dbs.PlaceSearch)
347 def test_name_with_qualifier_search_with_category_mismatch():
348 q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
349 [(2, TokenType.PARTIAL, [(1, 'a')]),
350 (2, TokenType.WORD, [(100, 'a')])])
351 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
353 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
354 qualifier=TokenRange(0, 1))))
356 assert len(searches) == 0
359 def test_name_with_qualifier_search_with_category_match():
360 q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
361 [(2, TokenType.PARTIAL, [(1, 'a')]),
362 (2, TokenType.WORD, [(100, 'a')])])
363 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
366 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
367 qualifier=TokenRange(0, 1))))
369 assert len(searches) == 1
372 assert isinstance(search, dbs.PlaceSearch)
373 assert search.qualifiers.values == [('this', 'that')]
376 def test_name_only_search_with_countries():
377 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
378 (1, TokenType.WORD, [(100, 'a')])])
379 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
381 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
383 assert len(searches) == 1
386 assert isinstance(search, dbs.PlaceSearch)
387 assert not search.postcodes.values
388 assert set(search.countries.values) == {'de', 'en'}
389 assert not search.housenumbers.values
392 def make_counted_searches(name_part, name_full, address_part, address_full,
393 num_address_parts=1):
394 q = QueryStruct([Phrase(PhraseType.NONE, '')])
395 for i in range(1 + num_address_parts):
396 q.add_node(BreakType.WORD, PhraseType.NONE)
397 q.add_node(BreakType.END, PhraseType.NONE)
399 q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
400 MyToken(0.5, 1, name_part, 1, 'name_part', True))
401 q.add_token(TokenRange(0, 1), TokenType.WORD,
402 MyToken(0, 101, name_full, 1, 'name_full', True))
403 for i in range(num_address_parts):
404 q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
405 MyToken(0.5, 2, address_part, 1, 'address_part', True))
406 q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
407 MyToken(0, 102, address_full, 1, 'address_full', True))
409 builder = SearchBuilder(q, SearchDetails())
411 return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
412 address=[TokenRange(1, 1 + num_address_parts)])))
415 def test_infrequent_partials_in_name():
416 searches = make_counted_searches(1, 1, 1, 1)
418 assert len(searches) == 1
421 assert isinstance(search, dbs.PlaceSearch)
422 assert len(search.lookups) == 2
423 assert len(search.rankings) == 2
425 assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \
426 {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
429 def test_frequent_partials_in_name_and_address():
430 searches = make_counted_searches(9999, 1, 9999, 1)
432 assert len(searches) == 2
434 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
435 searches.sort(key=lambda s: s.penalty)
437 assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
438 {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
439 assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \
440 {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
443 def test_too_frequent_partials_in_name_and_address():
444 searches = make_counted_searches(20000, 1, 10000, 1)
446 assert len(searches) == 1
448 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
449 searches.sort(key=lambda s: s.penalty)
451 assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
452 {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}