]> git.openstreetmap.org Git - nominatim.git/blob - test/python/api/search/test_db_search_builder.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / test / python / api / search / test_db_search_builder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for creating abstract searches from token assignments.
9 """
10 import pytest
11
12 from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.db_search_builder import SearchBuilder
15 from nominatim_api.search.token_assignment import TokenAssignment
16 from nominatim_api.types import SearchDetails
17 import nominatim_api.search.db_searches as dbs
18
19
20 class MyToken(Token):
21     def get_category(self):
22         return 'this', 'that'
23
24
25 def make_query(*args):
26     q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
27
28     for _ in range(max(inner[0] for tlist in args for inner in tlist)):
29         q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY)
30     q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
31
32     for start, tlist in enumerate(args):
33         for end, ttype, tinfo in tlist:
34             for tid, word in tinfo:
35                 q.add_token(TokenRange(start, end), ttype,
36                             MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
37                                     token=tid, count=1, addr_count=1,
38                                     lookup_word=word))
39
40     return q
41
42
43 def test_country_search():
44     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
45     builder = SearchBuilder(q, SearchDetails())
46
47     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
48
49     assert len(searches) == 1
50
51     search = searches[0]
52
53     assert isinstance(search, dbs.CountrySearch)
54     assert set(search.countries.values) == {'de', 'en'}
55
56
57 def test_country_search_with_country_restriction():
58     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
59     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
60
61     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
62
63     assert len(searches) == 1
64
65     search = searches[0]
66
67     assert isinstance(search, dbs.CountrySearch)
68     assert set(search.countries.values) == {'en'}
69
70
71 def test_country_search_with_conflicting_country_restriction():
72     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
73     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
74
75     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
76
77     assert len(searches) == 0
78
79
80 def test_postcode_search_simple():
81     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])])
82     builder = SearchBuilder(q, SearchDetails())
83
84     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
85
86     assert len(searches) == 1
87     search = searches[0]
88
89     assert isinstance(search, dbs.PostcodeSearch)
90     assert search.postcodes.values == ['2367']
91     assert not search.countries.values
92     assert not search.lookups
93     assert not search.rankings
94
95
96 def test_postcode_with_country():
97     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
98                    [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
99     builder = SearchBuilder(q, SearchDetails())
100
101     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
102                                                   country=TokenRange(1, 2))))
103
104     assert len(searches) == 1
105     search = searches[0]
106
107     assert isinstance(search, dbs.PostcodeSearch)
108     assert search.postcodes.values == ['2367']
109     assert search.countries.values == ['xx']
110     assert not search.lookups
111     assert not search.rankings
112
113
114 def test_postcode_with_address():
115     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
116                    [(2, qmod.TOKEN_PARTIAL, [(100, 'word')])])
117     builder = SearchBuilder(q, SearchDetails())
118
119     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
120                                                   address=[TokenRange(1, 2)])))
121
122     assert len(searches) == 1
123     search = searches[0]
124
125     assert isinstance(search, dbs.PostcodeSearch)
126     assert search.postcodes.values == ['2367']
127     assert not search.countries
128     assert search.lookups
129     assert not search.rankings
130
131
132 def test_postcode_with_address_with_full_word():
133     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
134                    [(2, qmod.TOKEN_PARTIAL, [(100, 'word')]),
135                     (2, qmod.TOKEN_WORD, [(1, 'full')])])
136     builder = SearchBuilder(q, SearchDetails())
137
138     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
139                                                   address=[TokenRange(1, 2)])))
140
141     assert len(searches) == 1
142     search = searches[0]
143
144     assert isinstance(search, dbs.PostcodeSearch)
145     assert search.postcodes.values == ['2367']
146     assert not search.countries
147     assert search.lookups
148     assert len(search.rankings) == 1
149
150
151 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
152                                     {'near': '10,10'}])
153 def test_near_item_only(kwargs):
154     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
155     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
156
157     searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
158
159     assert len(searches) == 1
160
161     search = searches[0]
162
163     assert isinstance(search, dbs.PoiSearch)
164     assert search.qualifiers.values == [('this', 'that')]
165
166
167 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
168                                     {}])
169 def test_near_item_skipped(kwargs):
170     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
171     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
172
173     searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
174
175     assert len(searches) == 0
176
177
178 def test_name_only_search():
179     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
180                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
181     builder = SearchBuilder(q, SearchDetails())
182
183     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
184
185     assert len(searches) == 1
186     search = searches[0]
187
188     assert isinstance(search, dbs.PlaceSearch)
189     assert not search.postcodes.values
190     assert not search.countries.values
191     assert not search.housenumbers.values
192     assert not search.qualifiers.values
193     assert len(search.lookups) == 1
194     assert len(search.rankings) == 1
195
196
197 def test_name_with_qualifier():
198     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
199                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
200                    [(2, qmod.TOKEN_QUALIFIER, [(55, 'hotel')])])
201     builder = SearchBuilder(q, SearchDetails())
202
203     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
204                                                   qualifier=TokenRange(1, 2))))
205
206     assert len(searches) == 1
207     search = searches[0]
208
209     assert isinstance(search, dbs.PlaceSearch)
210     assert not search.postcodes.values
211     assert not search.countries.values
212     assert not search.housenumbers.values
213     assert search.qualifiers.values == [('this', 'that')]
214     assert len(search.lookups) == 1
215     assert len(search.rankings) == 1
216
217
218 def test_name_with_housenumber_search():
219     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
220                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
221                    [(2, qmod.TOKEN_HOUSENUMBER, [(66, '66')])])
222     builder = SearchBuilder(q, SearchDetails())
223
224     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
225                                                   housenumber=TokenRange(1, 2))))
226
227     assert len(searches) == 1
228     search = searches[0]
229
230     assert isinstance(search, dbs.PlaceSearch)
231     assert not search.postcodes.values
232     assert not search.countries.values
233     assert search.housenumbers.values == ['66']
234     assert len(search.lookups) == 1
235     assert len(search.rankings) == 1
236
237
238 def test_name_and_address():
239     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
240                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
241                    [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
242                     (2, qmod.TOKEN_WORD, [(101, 'b')])],
243                    [(3, qmod.TOKEN_PARTIAL, [(3, 'c')]),
244                     (3, qmod.TOKEN_WORD, [(102, 'c')])])
245     builder = SearchBuilder(q, SearchDetails())
246
247     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
248                                                   address=[TokenRange(1, 2),
249                                                            TokenRange(2, 3)])))
250
251     assert len(searches) == 1
252     search = searches[0]
253
254     assert isinstance(search, dbs.PlaceSearch)
255     assert not search.postcodes.values
256     assert not search.countries.values
257     assert not search.housenumbers.values
258     assert len(search.lookups) == 2
259     assert len(search.rankings) == 3
260
261
262 def test_name_and_complex_address():
263     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
264                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
265                    [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
266                     (3, qmod.TOKEN_WORD, [(101, 'bc')])],
267                    [(3, qmod.TOKEN_PARTIAL, [(3, 'c')])],
268                    [(4, qmod.TOKEN_PARTIAL, [(4, 'd')]),
269                     (4, qmod.TOKEN_WORD, [(103, 'd')])])
270     builder = SearchBuilder(q, SearchDetails())
271
272     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
273                                                   address=[TokenRange(1, 2),
274                                                            TokenRange(2, 4)])))
275
276     assert len(searches) == 1
277     search = searches[0]
278
279     assert isinstance(search, dbs.PlaceSearch)
280     assert not search.postcodes.values
281     assert not search.countries.values
282     assert not search.housenumbers.values
283     assert len(search.lookups) == 2
284     assert len(search.rankings) == 2
285
286
287 def test_name_only_near_search():
288     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
289                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
290                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
291     builder = SearchBuilder(q, SearchDetails())
292
293     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
294                                                   near_item=TokenRange(0, 1))))
295
296     assert len(searches) == 1
297     search = searches[0]
298
299     assert isinstance(search, dbs.NearSearch)
300     assert isinstance(search.search, dbs.PlaceSearch)
301
302
303 def test_name_only_search_with_category():
304     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
305                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
306     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
307
308     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
309
310     assert len(searches) == 1
311     search = searches[0]
312
313     assert isinstance(search, dbs.PlaceSearch)
314     assert search.qualifiers.values == [('foo', 'bar')]
315
316
317 def test_name_with_near_item_search_with_category_mismatch():
318     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
319                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
320                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
321     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
322
323     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
324                                                   near_item=TokenRange(0, 1))))
325
326     assert len(searches) == 0
327
328
329 def test_name_with_near_item_search_with_category_match():
330     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
331                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
332                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
333     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
334                                                                          ('this', 'that')]}))
335
336     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
337                                                   near_item=TokenRange(0, 1))))
338
339     assert len(searches) == 1
340     search = searches[0]
341
342     assert isinstance(search, dbs.NearSearch)
343     assert isinstance(search.search, dbs.PlaceSearch)
344
345
346 def test_name_with_qualifier_search_with_category_mismatch():
347     q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
348                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
349                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
350     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
351
352     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
353                                                   qualifier=TokenRange(0, 1))))
354
355     assert len(searches) == 0
356
357
358 def test_name_with_qualifier_search_with_category_match():
359     q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
360                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
361                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
362     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
363                                                                          ('this', 'that')]}))
364
365     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
366                                                   qualifier=TokenRange(0, 1))))
367
368     assert len(searches) == 1
369     search = searches[0]
370
371     assert isinstance(search, dbs.PlaceSearch)
372     assert search.qualifiers.values == [('this', 'that')]
373
374
375 def test_name_only_search_with_countries():
376     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
377                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
378     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
379
380     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
381
382     assert len(searches) == 1
383     search = searches[0]
384
385     assert isinstance(search, dbs.PlaceSearch)
386     assert not search.postcodes.values
387     assert set(search.countries.values) == {'de', 'en'}
388     assert not search.housenumbers.values
389
390
391 def make_counted_searches(name_part, name_full, address_part, address_full,
392                           num_address_parts=1):
393     q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
394     for i in range(1 + num_address_parts):
395         q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY)
396     q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
397
398     q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL,
399                 MyToken(0.5, 1, name_part, 1, 'name_part'))
400     q.add_token(TokenRange(0, 1), qmod.TOKEN_WORD,
401                 MyToken(0, 101, name_full, 1, 'name_full'))
402     for i in range(num_address_parts):
403         q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_PARTIAL,
404                     MyToken(0.5, 2, address_part, 1, 'address_part'))
405         q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_WORD,
406                     MyToken(0, 102, address_full, 1, 'address_full'))
407
408     builder = SearchBuilder(q, SearchDetails())
409
410     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
411                                               address=[TokenRange(1, 1 + num_address_parts)])))
412
413
414 def test_infrequent_partials_in_name():
415     searches = make_counted_searches(1, 1, 1, 1)
416
417     assert len(searches) == 1
418     search = searches[0]
419
420     assert isinstance(search, dbs.PlaceSearch)
421     assert len(search.lookups) == 2
422     assert len(search.rankings) == 2
423
424     assert set((s.column, s.lookup_type.__name__) for s in search.lookups) == \
425         {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
426
427
428 def test_frequent_partials_in_name_and_address():
429     searches = make_counted_searches(9999, 1, 9999, 1)
430
431     assert len(searches) == 2
432
433     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
434     searches.sort(key=lambda s: s.penalty)
435
436     assert set((s.column, s.lookup_type.__name__) for s in searches[0].lookups) == \
437         {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
438     assert set((s.column, s.lookup_type.__name__) for s in searches[1].lookups) == \
439         {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
440
441
442 def test_too_frequent_partials_in_name_and_address():
443     searches = make_counted_searches(20000, 1, 10000, 1)
444
445     assert len(searches) == 1
446
447     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
448     searches.sort(key=lambda s: s.penalty)
449
450     assert set((s.column, s.lookup_type.__name__) for s in searches[0].lookups) == \
451         {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}