1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for creating abstract searches from token assignments.
12 from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
13 from nominatim.api.search.db_search_builder import SearchBuilder
14 from nominatim.api.search.token_assignment import TokenAssignment
15 from nominatim.api.types import SearchDetails
16 import nominatim.api.search.db_searches as dbs
19 def get_category(self):
23 def make_query(*args):
24 q = QueryStruct([Phrase(PhraseType.NONE, '')])
26 for _ in range(max(inner[0] for tlist in args for inner in tlist)):
27 q.add_node(BreakType.WORD, PhraseType.NONE)
28 q.add_node(BreakType.END, PhraseType.NONE)
30 for start, tlist in enumerate(args):
31 for end, ttype, tinfo in tlist:
32 for tid, word in tinfo:
33 q.add_token(TokenRange(start, end), ttype,
34 MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
40 def test_country_search():
41 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
42 builder = SearchBuilder(q, SearchDetails())
44 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
46 assert len(searches) == 1
50 assert isinstance(search, dbs.CountrySearch)
51 assert set(search.countries.values) == {'de', 'en'}
54 def test_country_search_with_country_restriction():
55 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
56 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
58 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
60 assert len(searches) == 1
64 assert isinstance(search, dbs.CountrySearch)
65 assert set(search.countries.values) == {'en'}
68 def test_country_search_with_conflicting_country_restriction():
69 q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
70 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
72 searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
74 assert len(searches) == 0
77 def test_postcode_search_simple():
78 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])])
79 builder = SearchBuilder(q, SearchDetails())
81 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
83 assert len(searches) == 1
86 assert isinstance(search, dbs.PostcodeSearch)
87 assert search.postcodes.values == ['2367']
88 assert not search.countries.values
89 assert not search.lookups
90 assert not search.rankings
93 def test_postcode_with_country():
94 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
95 [(2, TokenType.COUNTRY, [(1, 'xx')])])
96 builder = SearchBuilder(q, SearchDetails())
98 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
99 country=TokenRange(1, 2))))
101 assert len(searches) == 1
104 assert isinstance(search, dbs.PostcodeSearch)
105 assert search.postcodes.values == ['2367']
106 assert search.countries.values == ['xx']
107 assert not search.lookups
108 assert not search.rankings
111 def test_postcode_with_address():
112 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
113 [(2, TokenType.PARTIAL, [(100, 'word')])])
114 builder = SearchBuilder(q, SearchDetails())
116 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
117 address=[TokenRange(1, 2)])))
119 assert len(searches) == 1
122 assert isinstance(search, dbs.PostcodeSearch)
123 assert search.postcodes.values == ['2367']
124 assert not search.countries
125 assert search.lookups
126 assert not search.rankings
129 def test_postcode_with_address_with_full_word():
130 q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
131 [(2, TokenType.PARTIAL, [(100, 'word')]),
132 (2, TokenType.WORD, [(1, 'full')])])
133 builder = SearchBuilder(q, SearchDetails())
135 searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
136 address=[TokenRange(1, 2)])))
138 assert len(searches) == 1
141 assert isinstance(search, dbs.PostcodeSearch)
142 assert search.postcodes.values == ['2367']
143 assert not search.countries
144 assert search.lookups
145 assert len(search.rankings) == 1
148 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
150 def test_near_item_only(kwargs):
151 q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
152 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
154 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
156 assert len(searches) == 1
160 assert isinstance(search, dbs.PoiSearch)
161 assert search.qualifiers.values == [('this', 'that')]
164 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
166 def test_near_item_skipped(kwargs):
167 q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
168 builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
170 searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
172 assert len(searches) == 0
175 def test_name_only_search():
176 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
177 (1, TokenType.WORD, [(100, 'a')])])
178 builder = SearchBuilder(q, SearchDetails())
180 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
182 assert len(searches) == 1
185 assert isinstance(search, dbs.PlaceSearch)
186 assert not search.postcodes.values
187 assert not search.countries.values
188 assert not search.housenumbers.values
189 assert not search.qualifiers.values
190 assert len(search.lookups) == 1
191 assert len(search.rankings) == 1
194 def test_name_with_qualifier():
195 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
196 (1, TokenType.WORD, [(100, 'a')])],
197 [(2, TokenType.QUALIFIER, [(55, 'hotel')])])
198 builder = SearchBuilder(q, SearchDetails())
200 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
201 qualifier=TokenRange(1, 2))))
203 assert len(searches) == 1
206 assert isinstance(search, dbs.PlaceSearch)
207 assert not search.postcodes.values
208 assert not search.countries.values
209 assert not search.housenumbers.values
210 assert search.qualifiers.values == [('this', 'that')]
211 assert len(search.lookups) == 1
212 assert len(search.rankings) == 1
215 def test_name_with_housenumber_search():
216 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
217 (1, TokenType.WORD, [(100, 'a')])],
218 [(2, TokenType.HOUSENUMBER, [(66, '66')])])
219 builder = SearchBuilder(q, SearchDetails())
221 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
222 housenumber=TokenRange(1, 2))))
224 assert len(searches) == 1
227 assert isinstance(search, dbs.PlaceSearch)
228 assert not search.postcodes.values
229 assert not search.countries.values
230 assert search.housenumbers.values == ['66']
231 assert len(search.lookups) == 1
232 assert len(search.rankings) == 1
235 def test_name_and_address():
236 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
237 (1, TokenType.WORD, [(100, 'a')])],
238 [(2, TokenType.PARTIAL, [(2, 'b')]),
239 (2, TokenType.WORD, [(101, 'b')])],
240 [(3, TokenType.PARTIAL, [(3, 'c')]),
241 (3, TokenType.WORD, [(102, 'c')])]
243 builder = SearchBuilder(q, SearchDetails())
245 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
246 address=[TokenRange(1, 2),
249 assert len(searches) == 1
252 assert isinstance(search, dbs.PlaceSearch)
253 assert not search.postcodes.values
254 assert not search.countries.values
255 assert not search.housenumbers.values
256 assert len(search.lookups) == 2
257 assert len(search.rankings) == 3
260 def test_name_and_complex_address():
261 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
262 (1, TokenType.WORD, [(100, 'a')])],
263 [(2, TokenType.PARTIAL, [(2, 'b')]),
264 (3, TokenType.WORD, [(101, 'bc')])],
265 [(3, TokenType.PARTIAL, [(3, 'c')])],
266 [(4, TokenType.PARTIAL, [(4, 'd')]),
267 (4, TokenType.WORD, [(103, 'd')])]
269 builder = SearchBuilder(q, SearchDetails())
271 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
272 address=[TokenRange(1, 2),
275 assert len(searches) == 1
278 assert isinstance(search, dbs.PlaceSearch)
279 assert not search.postcodes.values
280 assert not search.countries.values
281 assert not search.housenumbers.values
282 assert len(search.lookups) == 2
283 assert len(search.rankings) == 2
286 def test_name_only_near_search():
287 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
288 [(2, TokenType.PARTIAL, [(1, 'a')]),
289 (2, TokenType.WORD, [(100, 'a')])])
290 builder = SearchBuilder(q, SearchDetails())
292 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
293 near_item=TokenRange(0, 1))))
295 assert len(searches) == 1
298 assert isinstance(search, dbs.NearSearch)
299 assert isinstance(search.search, dbs.PlaceSearch)
302 def test_name_only_search_with_category():
303 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
304 (1, TokenType.WORD, [(100, 'a')])])
305 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
307 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
309 assert len(searches) == 1
312 assert isinstance(search, dbs.PlaceSearch)
313 assert search.qualifiers.values == [('foo', 'bar')]
316 def test_name_with_near_item_search_with_category_mismatch():
317 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
318 [(2, TokenType.PARTIAL, [(1, 'a')]),
319 (2, TokenType.WORD, [(100, 'a')])])
320 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
322 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
323 near_item=TokenRange(0, 1))))
325 assert len(searches) == 0
328 def test_name_with_near_item_search_with_category_match():
329 q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
330 [(2, TokenType.PARTIAL, [(1, 'a')]),
331 (2, TokenType.WORD, [(100, 'a')])])
332 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
335 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
336 near_item=TokenRange(0, 1))))
338 assert len(searches) == 1
341 assert isinstance(search, dbs.NearSearch)
342 assert isinstance(search.search, dbs.PlaceSearch)
345 def test_name_with_qualifier_search_with_category_mismatch():
346 q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
347 [(2, TokenType.PARTIAL, [(1, 'a')]),
348 (2, TokenType.WORD, [(100, 'a')])])
349 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
351 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
352 qualifier=TokenRange(0, 1))))
354 assert len(searches) == 0
357 def test_name_with_qualifier_search_with_category_match():
358 q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
359 [(2, TokenType.PARTIAL, [(1, 'a')]),
360 (2, TokenType.WORD, [(100, 'a')])])
361 builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
364 searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
365 qualifier=TokenRange(0, 1))))
367 assert len(searches) == 1
370 assert isinstance(search, dbs.PlaceSearch)
371 assert search.qualifiers.values == [('this', 'that')]
374 def test_name_only_search_with_countries():
375 q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
376 (1, TokenType.WORD, [(100, 'a')])])
377 builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
379 searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
381 assert len(searches) == 1
384 assert isinstance(search, dbs.PlaceSearch)
385 assert not search.postcodes.values
386 assert set(search.countries.values) == {'de', 'en'}
387 assert not search.housenumbers.values
390 def make_counted_searches(name_part, name_full, address_part, address_full,
391 num_address_parts=1):
392 q = QueryStruct([Phrase(PhraseType.NONE, '')])
393 for i in range(1 + num_address_parts):
394 q.add_node(BreakType.WORD, PhraseType.NONE)
395 q.add_node(BreakType.END, PhraseType.NONE)
397 q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
398 MyToken(0.5, 1, name_part, 'name_part', True))
399 q.add_token(TokenRange(0, 1), TokenType.WORD,
400 MyToken(0, 101, name_full, 'name_full', True))
401 for i in range(num_address_parts):
402 q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
403 MyToken(0.5, 2, address_part, 'address_part', True))
404 q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
405 MyToken(0, 102, address_full, 'address_full', True))
407 builder = SearchBuilder(q, SearchDetails())
409 return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
410 address=[TokenRange(1, 1 + num_address_parts)])))
413 def test_infrequent_partials_in_name():
414 searches = make_counted_searches(1, 1, 1, 1)
416 assert len(searches) == 1
419 assert isinstance(search, dbs.PlaceSearch)
420 assert len(search.lookups) == 2
421 assert len(search.rankings) == 2
423 assert set((l.column, l.lookup_type) for l in search.lookups) == \
424 {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
427 def test_frequent_partials_in_name_and_address():
428 searches = make_counted_searches(9999, 1, 9999, 1)
430 assert len(searches) == 2
432 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
433 searches.sort(key=lambda s: s.penalty)
435 assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
436 {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
437 assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
438 {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
441 def test_too_frequent_partials_in_name_and_address():
442 searches = make_counted_searches(20000, 1, 10000, 1)
444 assert len(searches) == 1
446 assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
447 searches.sort(key=lambda s: s.penalty)
449 assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
450 {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}