1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for creating abstract searches from token assignments.
12 from nominatim_api.search.query import Token, TokenRange, PhraseType, QueryStruct, Phrase
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.db_search_builder import SearchBuilder
15 from nominatim_api.search.token_assignment import TokenAssignment
16 from nominatim_api.types import SearchDetails
17 import nominatim_api.search.db_searches as dbs
20 def get_category(self):
24 def make_query(*args):
25 q = QueryStruct([Phrase(PhraseType.NONE, '')])
27 for _ in range(max(inner[0] for tlist in args for inner in tlist)):
28 q.add_node(qmod.BREAK_WORD, PhraseType.NONE)
29 q.add_node(qmod.BREAK_END, PhraseType.NONE)
31 for start, tlist in enumerate(args):
32 for end, ttype, tinfo in tlist:
33 for tid, word in tinfo:
34 q.add_token(TokenRange(start, end), ttype,
35 MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
36 token=tid, count=1, addr_count=1,
43 def test_country_search():
44 q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
45 builder = SearchBuilder(q, SearchDetails())
47 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
49 assert len(searches) == 1
53 assert isinstance(search, dbs.CountrySearch)
54 assert set(search.countries.values) == {'de', 'en'}
57 def test_country_search_with_country_restriction():
58 q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
59 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
61 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
63 assert len(searches) == 1
67 assert isinstance(search, dbs.CountrySearch)
68 assert set(search.countries.values) == {'en'}
71 def test_country_search_with_conflicting_country_restriction():
72 q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
73 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
75 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
77 assert len(searches) == 0
80 def test_postcode_search_simple():
81 q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])])
82 builder = SearchBuilder(q, SearchDetails())
84 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
86 assert len(searches) == 1
89 assert isinstance(search, dbs.PostcodeSearch)
90 assert search.postcodes.values == ['2367']
91 assert not search.countries.values
92 assert not search.lookups
93 assert not search.rankings
96 def test_postcode_with_country():
97 q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
98 [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
99 builder = SearchBuilder(q, SearchDetails())
101 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
102 country=TokenRange(1, 2))))
104 assert len(searches) == 1
107 assert isinstance(search, dbs.PostcodeSearch)
108 assert search.postcodes.values == ['2367']
109 assert search.countries.values == ['xx']
110 assert not search.lookups
111 assert not search.rankings
114 def test_postcode_with_address():
115 q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
116 [(2, qmod.TOKEN_PARTIAL, [(100, 'word')])])
117 builder = SearchBuilder(q, SearchDetails())
119 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
120 address=[TokenRange(1, 2)])))
122 assert len(searches) == 1
125 assert isinstance(search, dbs.PostcodeSearch)
126 assert search.postcodes.values == ['2367']
127 assert not search.countries
128 assert search.lookups
129 assert not search.rankings
132 def test_postcode_with_address_with_full_word():
133 q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
134 [(2, qmod.TOKEN_PARTIAL, [(100, 'word')]),
135 (2, qmod.TOKEN_WORD, [(1, 'full')])])
136 builder = SearchBuilder(q, SearchDetails())
138 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
139 address=[TokenRange(1, 2)])))
141 assert len(searches) == 1
144 assert isinstance(search, dbs.PostcodeSearch)
145 assert search.postcodes.values == ['2367']
146 assert not search.countries
147 assert search.lookups
148 assert len(search.rankings) == 1
151 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
153 def test_near_item_only(kwargs):
154 q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
155 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
157 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
159 assert len(searches) == 1
163 assert isinstance(search, dbs.PoiSearch)
164 assert search.qualifiers.values == [('this', 'that')]
167 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
169 def test_near_item_skipped(kwargs):
170 q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
171 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
173 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
175 assert len(searches) == 0
178 def test_name_only_search():
179 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
180 (1, qmod.TOKEN_WORD, [(100, 'a')])])
181 builder = SearchBuilder(q, SearchDetails())
183 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
185 assert len(searches) == 1
188 assert isinstance(search, dbs.PlaceSearch)
189 assert not search.postcodes.values
190 assert not search.countries.values
191 assert not search.housenumbers.values
192 assert not search.qualifiers.values
193 assert len(search.lookups) == 1
194 assert len(search.rankings) == 1
197 def test_name_with_qualifier():
198 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
199 (1, qmod.TOKEN_WORD, [(100, 'a')])],
200 [(2, qmod.TOKEN_QUALIFIER, [(55, 'hotel')])])
201 builder = SearchBuilder(q, SearchDetails())
203 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
204 qualifier=TokenRange(1, 2))))
206 assert len(searches) == 1
209 assert isinstance(search, dbs.PlaceSearch)
210 assert not search.postcodes.values
211 assert not search.countries.values
212 assert not search.housenumbers.values
213 assert search.qualifiers.values == [('this', 'that')]
214 assert len(search.lookups) == 1
215 assert len(search.rankings) == 1
218 def test_name_with_housenumber_search():
219 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
220 (1, qmod.TOKEN_WORD, [(100, 'a')])],
221 [(2, qmod.TOKEN_HOUSENUMBER, [(66, '66')])])
222 builder = SearchBuilder(q, SearchDetails())
224 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
225 housenumber=TokenRange(1, 2))))
227 assert len(searches) == 1
230 assert isinstance(search, dbs.PlaceSearch)
231 assert not search.postcodes.values
232 assert not search.countries.values
233 assert search.housenumbers.values == ['66']
234 assert len(search.lookups) == 1
235 assert len(search.rankings) == 1
238 def test_name_and_address():
239 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
240 (1, qmod.TOKEN_WORD, [(100, 'a')])],
241 [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
242 (2, qmod.TOKEN_WORD, [(101, 'b')])],
243 [(3, qmod.TOKEN_PARTIAL, [(3, 'c')]),
244 (3, qmod.TOKEN_WORD, [(102, 'c')])]
246 builder = SearchBuilder(q, SearchDetails())
248 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
249 address=[TokenRange(1, 2),
252 assert len(searches) == 1
255 assert isinstance(search, dbs.PlaceSearch)
256 assert not search.postcodes.values
257 assert not search.countries.values
258 assert not search.housenumbers.values
259 assert len(search.lookups) == 2
260 assert len(search.rankings) == 3
263 def test_name_and_complex_address():
264 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
265 (1, qmod.TOKEN_WORD, [(100, 'a')])],
266 [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
267 (3, qmod.TOKEN_WORD, [(101, 'bc')])],
268 [(3, qmod.TOKEN_PARTIAL, [(3, 'c')])],
269 [(4, qmod.TOKEN_PARTIAL, [(4, 'd')]),
270 (4, qmod.TOKEN_WORD, [(103, 'd')])]
272 builder = SearchBuilder(q, SearchDetails())
274 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
275 address=[TokenRange(1, 2),
278 assert len(searches) == 1
281 assert isinstance(search, dbs.PlaceSearch)
282 assert not search.postcodes.values
283 assert not search.countries.values
284 assert not search.housenumbers.values
285 assert len(search.lookups) == 2
286 assert len(search.rankings) == 2
289 def test_name_only_near_search():
290 q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
291 [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
292 (2, qmod.TOKEN_WORD, [(100, 'a')])])
293 builder = SearchBuilder(q, SearchDetails())
295 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
296 near_item=TokenRange(0, 1))))
298 assert len(searches) == 1
301 assert isinstance(search, dbs.NearSearch)
302 assert isinstance(search.search, dbs.PlaceSearch)
305 def test_name_only_search_with_category():
306 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
307 (1, qmod.TOKEN_WORD, [(100, 'a')])])
308 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
310 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
312 assert len(searches) == 1
315 assert isinstance(search, dbs.PlaceSearch)
316 assert search.qualifiers.values == [('foo', 'bar')]
319 def test_name_with_near_item_search_with_category_mismatch():
320 q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
321 [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
322 (2, qmod.TOKEN_WORD, [(100, 'a')])])
323 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
325 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
326 near_item=TokenRange(0, 1))))
328 assert len(searches) == 0
331 def test_name_with_near_item_search_with_category_match():
332 q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
333 [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
334 (2, qmod.TOKEN_WORD, [(100, 'a')])])
335 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
338 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
339 near_item=TokenRange(0, 1))))
341 assert len(searches) == 1
344 assert isinstance(search, dbs.NearSearch)
345 assert isinstance(search.search, dbs.PlaceSearch)
348 def test_name_with_qualifier_search_with_category_mismatch():
349 q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
350 [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
351 (2, qmod.TOKEN_WORD, [(100, 'a')])])
352 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
354 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
355 qualifier=TokenRange(0, 1))))
357 assert len(searches) == 0
360 def test_name_with_qualifier_search_with_category_match():
361 q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
362 [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
363 (2, qmod.TOKEN_WORD, [(100, 'a')])])
364 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
367 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
368 qualifier=TokenRange(0, 1))))
370 assert len(searches) == 1
373 assert isinstance(search, dbs.PlaceSearch)
374 assert search.qualifiers.values == [('this', 'that')]
377 def test_name_only_search_with_countries():
378 q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
379 (1, qmod.TOKEN_WORD, [(100, 'a')])])
380 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
382 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
384 assert len(searches) == 1
387 assert isinstance(search, dbs.PlaceSearch)
388 assert not search.postcodes.values
389 assert set(search.countries.values) == {'de', 'en'}
390 assert not search.housenumbers.values
393 def make_counted_searches(name_part, name_full, address_part, address_full,
394 num_address_parts=1):
395 q = QueryStruct([Phrase(PhraseType.NONE, '')])
396 for i in range(1 + num_address_parts):
397 q.add_node(qmod.BREAK_WORD, PhraseType.NONE)
398 q.add_node(qmod.BREAK_END, PhraseType.NONE)
400 q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL,
401 MyToken(0.5, 1, name_part, 1, 'name_part'))
402 q.add_token(TokenRange(0, 1), qmod.TOKEN_WORD,
403 MyToken(0, 101, name_full, 1, 'name_full'))
404 for i in range(num_address_parts):
405 q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_PARTIAL,
406 MyToken(0.5, 2, address_part, 1, 'address_part'))
407 q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_WORD,
408 MyToken(0, 102, address_full, 1, 'address_full'))
410 builder = SearchBuilder(q, SearchDetails())
412 return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
413 address=[TokenRange(1, 1 + num_address_parts)])))
416 def test_infrequent_partials_in_name():
417 searches = make_counted_searches(1, 1, 1, 1)
419 assert len(searches) == 1
422 assert isinstance(search, dbs.PlaceSearch)
423 assert len(search.lookups) == 2
424 assert len(search.rankings) == 2
426 assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \
427 {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
430 def test_frequent_partials_in_name_and_address():
431 searches = make_counted_searches(9999, 1, 9999, 1)
433 assert len(searches) == 2
435 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
436 searches.sort(key=lambda s: s.penalty)
438 assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
439 {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
440 assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \
441 {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
444 def test_too_frequent_partials_in_name_and_address():
445 searches = make_counted_searches(20000, 1, 10000, 1)
447 assert len(searches) == 1
449 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
450 searches.sort(key=lambda s: s.penalty)
452 assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
453 {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}