]> git.openstreetmap.org Git - nominatim.git/blob - test/python/api/search/test_db_search_builder.py
replace datrie library with a more simple pure-Python class
[nominatim.git] / test / python / api / search / test_db_search_builder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for creating abstract searches from token assignments.
9 """
10 import pytest
11
12 from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase
13 import nominatim_api.search.query as qmod
14 from nominatim_api.search.db_search_builder import SearchBuilder
15 from nominatim_api.search.token_assignment import TokenAssignment
16 from nominatim_api.types import SearchDetails
17 import nominatim_api.search.db_searches as dbs
18
19 class MyToken(Token):
20     def get_category(self):
21         return 'this', 'that'
22
23
24 def make_query(*args):
25     q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
26
27     for _ in range(max(inner[0] for tlist in args for inner in tlist)):
28         q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY)
29     q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
30
31     for start, tlist in enumerate(args):
32         for end, ttype, tinfo in tlist:
33             for tid, word in tinfo:
34                 q.add_token(TokenRange(start, end), ttype,
35                             MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
36                                     token=tid, count=1, addr_count=1,
37                                     lookup_word=word))
38
39
40     return q
41
42
43 def test_country_search():
44     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
45     builder = SearchBuilder(q, SearchDetails())
46
47     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
48
49     assert len(searches) == 1
50
51     search = searches[0]
52
53     assert isinstance(search, dbs.CountrySearch)
54     assert set(search.countries.values) == {'de', 'en'}
55
56
57 def test_country_search_with_country_restriction():
58     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
59     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
60
61     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
62
63     assert len(searches) == 1
64
65     search = searches[0]
66
67     assert isinstance(search, dbs.CountrySearch)
68     assert set(search.countries.values) == {'en'}
69
70
71 def test_country_search_with_conflicting_country_restriction():
72     q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
73     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
74
75     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
76
77     assert len(searches) == 0
78
79
80 def test_postcode_search_simple():
81     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])])
82     builder = SearchBuilder(q, SearchDetails())
83
84     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
85
86     assert len(searches) == 1
87     search = searches[0]
88
89     assert isinstance(search, dbs.PostcodeSearch)
90     assert search.postcodes.values == ['2367']
91     assert not search.countries.values
92     assert not search.lookups
93     assert not search.rankings
94
95
96 def test_postcode_with_country():
97     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
98                    [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
99     builder = SearchBuilder(q, SearchDetails())
100
101     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
102                                                   country=TokenRange(1, 2))))
103
104     assert len(searches) == 1
105     search = searches[0]
106
107     assert isinstance(search, dbs.PostcodeSearch)
108     assert search.postcodes.values == ['2367']
109     assert search.countries.values == ['xx']
110     assert not search.lookups
111     assert not search.rankings
112
113
114 def test_postcode_with_address():
115     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
116                    [(2, qmod.TOKEN_PARTIAL, [(100, 'word')])])
117     builder = SearchBuilder(q, SearchDetails())
118
119     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
120                                                   address=[TokenRange(1, 2)])))
121
122     assert len(searches) == 1
123     search = searches[0]
124
125     assert isinstance(search, dbs.PostcodeSearch)
126     assert search.postcodes.values == ['2367']
127     assert not search.countries
128     assert search.lookups
129     assert not search.rankings
130
131
132 def test_postcode_with_address_with_full_word():
133     q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
134                    [(2, qmod.TOKEN_PARTIAL, [(100, 'word')]),
135                     (2, qmod.TOKEN_WORD, [(1, 'full')])])
136     builder = SearchBuilder(q, SearchDetails())
137
138     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
139                                                   address=[TokenRange(1, 2)])))
140
141     assert len(searches) == 1
142     search = searches[0]
143
144     assert isinstance(search, dbs.PostcodeSearch)
145     assert search.postcodes.values == ['2367']
146     assert not search.countries
147     assert search.lookups
148     assert len(search.rankings) == 1
149
150
151 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
152                                     {'near': '10,10'}])
153 def test_near_item_only(kwargs):
154     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
155     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
156
157     searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
158
159     assert len(searches) == 1
160
161     search = searches[0]
162
163     assert isinstance(search, dbs.PoiSearch)
164     assert search.qualifiers.values == [('this', 'that')]
165
166
167 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
168                                     {}])
169 def test_near_item_skipped(kwargs):
170     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])])
171     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
172
173     searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
174
175     assert len(searches) == 0
176
177
178 def test_name_only_search():
179     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
180                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
181     builder = SearchBuilder(q, SearchDetails())
182
183     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
184
185     assert len(searches) == 1
186     search = searches[0]
187
188     assert isinstance(search, dbs.PlaceSearch)
189     assert not search.postcodes.values
190     assert not search.countries.values
191     assert not search.housenumbers.values
192     assert not search.qualifiers.values
193     assert len(search.lookups) == 1
194     assert len(search.rankings) == 1
195
196
197 def test_name_with_qualifier():
198     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
199                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
200                    [(2, qmod.TOKEN_QUALIFIER, [(55, 'hotel')])])
201     builder = SearchBuilder(q, SearchDetails())
202
203     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
204                                                   qualifier=TokenRange(1, 2))))
205
206     assert len(searches) == 1
207     search = searches[0]
208
209     assert isinstance(search, dbs.PlaceSearch)
210     assert not search.postcodes.values
211     assert not search.countries.values
212     assert not search.housenumbers.values
213     assert search.qualifiers.values == [('this', 'that')]
214     assert len(search.lookups) == 1
215     assert len(search.rankings) == 1
216
217
218 def test_name_with_housenumber_search():
219     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
220                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
221                    [(2, qmod.TOKEN_HOUSENUMBER, [(66, '66')])])
222     builder = SearchBuilder(q, SearchDetails())
223
224     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
225                                                   housenumber=TokenRange(1, 2))))
226
227     assert len(searches) == 1
228     search = searches[0]
229
230     assert isinstance(search, dbs.PlaceSearch)
231     assert not search.postcodes.values
232     assert not search.countries.values
233     assert search.housenumbers.values == ['66']
234     assert len(search.lookups) == 1
235     assert len(search.rankings) == 1
236
237
238 def test_name_and_address():
239     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
240                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
241                    [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
242                     (2, qmod.TOKEN_WORD, [(101, 'b')])],
243                    [(3, qmod.TOKEN_PARTIAL, [(3, 'c')]),
244                     (3, qmod.TOKEN_WORD, [(102, 'c')])]
245                   )
246     builder = SearchBuilder(q, SearchDetails())
247
248     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
249                                                   address=[TokenRange(1, 2),
250                                                            TokenRange(2, 3)])))
251
252     assert len(searches) == 1
253     search = searches[0]
254
255     assert isinstance(search, dbs.PlaceSearch)
256     assert not search.postcodes.values
257     assert not search.countries.values
258     assert not search.housenumbers.values
259     assert len(search.lookups) == 2
260     assert len(search.rankings) == 3
261
262
263 def test_name_and_complex_address():
264     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
265                     (1, qmod.TOKEN_WORD, [(100, 'a')])],
266                    [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]),
267                     (3, qmod.TOKEN_WORD, [(101, 'bc')])],
268                    [(3, qmod.TOKEN_PARTIAL, [(3, 'c')])],
269                    [(4, qmod.TOKEN_PARTIAL, [(4, 'd')]),
270                     (4, qmod.TOKEN_WORD, [(103, 'd')])]
271                   )
272     builder = SearchBuilder(q, SearchDetails())
273
274     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
275                                                   address=[TokenRange(1, 2),
276                                                            TokenRange(2, 4)])))
277
278     assert len(searches) == 1
279     search = searches[0]
280
281     assert isinstance(search, dbs.PlaceSearch)
282     assert not search.postcodes.values
283     assert not search.countries.values
284     assert not search.housenumbers.values
285     assert len(search.lookups) == 2
286     assert len(search.rankings) == 2
287
288
289 def test_name_only_near_search():
290     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
291                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
292                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
293     builder = SearchBuilder(q, SearchDetails())
294
295     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
296                                                   near_item=TokenRange(0, 1))))
297
298     assert len(searches) == 1
299     search = searches[0]
300
301     assert isinstance(search, dbs.NearSearch)
302     assert isinstance(search.search, dbs.PlaceSearch)
303
304
305 def test_name_only_search_with_category():
306     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
307                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
308     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
309
310     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
311
312     assert len(searches) == 1
313     search = searches[0]
314
315     assert isinstance(search, dbs.PlaceSearch)
316     assert search.qualifiers.values == [('foo', 'bar')]
317
318
319 def test_name_with_near_item_search_with_category_mismatch():
320     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
321                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
322                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
323     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
324
325     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
326                                                   near_item=TokenRange(0, 1))))
327
328     assert len(searches) == 0
329
330
331 def test_name_with_near_item_search_with_category_match():
332     q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])],
333                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
334                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
335     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
336                                                                          ('this', 'that')]}))
337
338     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
339                                                   near_item=TokenRange(0, 1))))
340
341     assert len(searches) == 1
342     search = searches[0]
343
344     assert isinstance(search, dbs.NearSearch)
345     assert isinstance(search.search, dbs.PlaceSearch)
346
347
348 def test_name_with_qualifier_search_with_category_mismatch():
349     q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
350                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
351                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
352     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
353
354     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
355                                                   qualifier=TokenRange(0, 1))))
356
357     assert len(searches) == 0
358
359
360 def test_name_with_qualifier_search_with_category_match():
361     q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])],
362                    [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]),
363                     (2, qmod.TOKEN_WORD, [(100, 'a')])])
364     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
365                                                                          ('this', 'that')]}))
366
367     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
368                                                   qualifier=TokenRange(0, 1))))
369
370     assert len(searches) == 1
371     search = searches[0]
372
373     assert isinstance(search, dbs.PlaceSearch)
374     assert search.qualifiers.values == [('this', 'that')]
375
376
377 def test_name_only_search_with_countries():
378     q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]),
379                     (1, qmod.TOKEN_WORD, [(100, 'a')])])
380     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
381
382     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
383
384     assert len(searches) == 1
385     search = searches[0]
386
387     assert isinstance(search, dbs.PlaceSearch)
388     assert not search.postcodes.values
389     assert set(search.countries.values) == {'de', 'en'}
390     assert not search.housenumbers.values
391
392
393 def make_counted_searches(name_part, name_full, address_part, address_full,
394                           num_address_parts=1):
395     q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
396     for i in range(1 + num_address_parts):
397         q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY)
398     q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
399
400     q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL,
401                 MyToken(0.5, 1, name_part, 1, 'name_part'))
402     q.add_token(TokenRange(0, 1), qmod.TOKEN_WORD,
403                 MyToken(0, 101, name_full, 1, 'name_full'))
404     for i in range(num_address_parts):
405         q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_PARTIAL,
406                     MyToken(0.5, 2, address_part, 1, 'address_part'))
407         q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_WORD,
408                     MyToken(0, 102, address_full, 1, 'address_full'))
409
410     builder = SearchBuilder(q, SearchDetails())
411
412     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
413                                               address=[TokenRange(1, 1 + num_address_parts)])))
414
415
416 def test_infrequent_partials_in_name():
417     searches = make_counted_searches(1, 1, 1, 1)
418
419     assert len(searches) == 1
420     search = searches[0]
421
422     assert isinstance(search, dbs.PlaceSearch)
423     assert len(search.lookups) == 2
424     assert len(search.rankings) == 2
425
426     assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \
427             {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
428
429
430 def test_frequent_partials_in_name_and_address():
431     searches = make_counted_searches(9999, 1, 9999, 1)
432
433     assert len(searches) == 2
434
435     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
436     searches.sort(key=lambda s: s.penalty)
437
438     assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
439             {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
440     assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \
441             {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
442
443
444 def test_too_frequent_partials_in_name_and_address():
445     searches = make_counted_searches(20000, 1, 10000, 1)
446
447     assert len(searches) == 1
448
449     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
450     searches.sort(key=lambda s: s.penalty)
451
452     assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
453             {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}