]> git.openstreetmap.org Git - nominatim.git/blob - test/python/api/search/test_db_search_builder.py
restrict deduplication to results from placex
[nominatim.git] / test / python / api / search / test_db_search_builder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for creating abstract searches from token assignments.
9 """
10 import pytest
11
12 from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
13 from nominatim.api.search.db_search_builder import SearchBuilder
14 from nominatim.api.search.token_assignment import TokenAssignment
15 from nominatim.api.types import SearchDetails
16 import nominatim.api.search.db_searches as dbs
17
18 class MyToken(Token):
19     def get_category(self):
20         return 'this', 'that'
21
22
23 def make_query(*args):
24     q = None
25
26     for tlist in args:
27         if q is None:
28             q = QueryStruct([Phrase(PhraseType.NONE, '')])
29         else:
30             q.add_node(BreakType.WORD, PhraseType.NONE)
31
32         start = len(q.nodes) - 1
33         for end, ttype, tinfo in tlist:
34             for tid, word in tinfo:
35                 q.add_token(TokenRange(start, end), ttype,
36                             MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
37
38     q.add_node(BreakType.END, PhraseType.NONE)
39
40     return q
41
42
43 def test_country_search():
44     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
45     builder = SearchBuilder(q, SearchDetails())
46
47     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
48
49     assert len(searches) == 1
50
51     search = searches[0]
52
53     assert isinstance(search, dbs.CountrySearch)
54     assert set(search.countries.values) == {'de', 'en'}
55
56
57 def test_country_search_with_country_restriction():
58     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
59     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
60
61     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
62
63     assert len(searches) == 1
64
65     search = searches[0]
66
67     assert isinstance(search, dbs.CountrySearch)
68     assert set(search.countries.values) == {'en'}
69
70
71 def test_country_search_with_conflicting_country_restriction():
72     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
73     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
74
75     searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
76
77     assert len(searches) == 0
78
79
80 def test_postcode_search_simple():
81     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])])
82     builder = SearchBuilder(q, SearchDetails())
83
84     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1))))
85
86     assert len(searches) == 1
87     search = searches[0]
88
89     assert isinstance(search, dbs.PostcodeSearch)
90     assert search.postcodes.values == ['2367']
91     assert not search.countries.values
92     assert not search.lookups
93     assert not search.rankings
94
95
96 def test_postcode_with_country():
97     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
98                    [(2, TokenType.COUNTRY, [(1, 'xx')])])
99     builder = SearchBuilder(q, SearchDetails())
100
101     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
102                                                   country=TokenRange(1, 2))))
103
104     assert len(searches) == 1
105     search = searches[0]
106
107     assert isinstance(search, dbs.PostcodeSearch)
108     assert search.postcodes.values == ['2367']
109     assert search.countries.values == ['xx']
110     assert not search.lookups
111     assert not search.rankings
112
113
114 def test_postcode_with_address():
115     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
116                    [(2, TokenType.PARTIAL, [(100, 'word')])])
117     builder = SearchBuilder(q, SearchDetails())
118
119     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
120                                                   address=[TokenRange(1, 2)])))
121
122     assert len(searches) == 1
123     search = searches[0]
124
125     assert isinstance(search, dbs.PostcodeSearch)
126     assert search.postcodes.values == ['2367']
127     assert not search.countries
128     assert search.lookups
129     assert not search.rankings
130
131
132 def test_postcode_with_address_with_full_word():
133     q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])],
134                    [(2, TokenType.PARTIAL, [(100, 'word')]),
135                     (2, TokenType.WORD, [(1, 'full')])])
136     builder = SearchBuilder(q, SearchDetails())
137
138     searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
139                                                   address=[TokenRange(1, 2)])))
140
141     assert len(searches) == 1
142     search = searches[0]
143
144     assert isinstance(search, dbs.PostcodeSearch)
145     assert search.postcodes.values == ['2367']
146     assert not search.countries
147     assert search.lookups
148     assert len(search.rankings) == 1
149
150
151 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
152                                     {'near': '10,10'}])
153 def test_category_only(kwargs):
154     q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
155     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
156
157     searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
158
159     assert len(searches) == 1
160
161     search = searches[0]
162
163     assert isinstance(search, dbs.PoiSearch)
164     assert search.qualifiers.values == [('this', 'that')]
165
166
167 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
168                                     {}])
169 def test_category_skipped(kwargs):
170     q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
171     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
172
173     searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
174
175     assert len(searches) == 0
176
177
178 def test_name_only_search():
179     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
180                     (1, TokenType.WORD, [(100, 'a')])])
181     builder = SearchBuilder(q, SearchDetails())
182
183     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
184
185     assert len(searches) == 1
186     search = searches[0]
187
188     assert isinstance(search, dbs.PlaceSearch)
189     assert not search.postcodes.values
190     assert not search.countries.values
191     assert not search.housenumbers.values
192     assert not search.qualifiers.values
193     assert len(search.lookups) == 1
194     assert len(search.rankings) == 1
195
196
197 def test_name_with_qualifier():
198     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
199                     (1, TokenType.WORD, [(100, 'a')])],
200                    [(2, TokenType.QUALIFIER, [(55, 'hotel')])])
201     builder = SearchBuilder(q, SearchDetails())
202
203     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
204                                                   qualifier=TokenRange(1, 2))))
205
206     assert len(searches) == 1
207     search = searches[0]
208
209     assert isinstance(search, dbs.PlaceSearch)
210     assert not search.postcodes.values
211     assert not search.countries.values
212     assert not search.housenumbers.values
213     assert search.qualifiers.values == [('this', 'that')]
214     assert len(search.lookups) == 1
215     assert len(search.rankings) == 1
216
217
218 def test_name_with_housenumber_search():
219     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
220                     (1, TokenType.WORD, [(100, 'a')])],
221                    [(2, TokenType.HOUSENUMBER, [(66, '66')])])
222     builder = SearchBuilder(q, SearchDetails())
223
224     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
225                                                   housenumber=TokenRange(1, 2))))
226
227     assert len(searches) == 1
228     search = searches[0]
229
230     assert isinstance(search, dbs.PlaceSearch)
231     assert not search.postcodes.values
232     assert not search.countries.values
233     assert search.housenumbers.values == ['66']
234     assert len(search.lookups) == 1
235     assert len(search.rankings) == 1
236
237
238 def test_name_and_address():
239     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
240                     (1, TokenType.WORD, [(100, 'a')])],
241                    [(2, TokenType.PARTIAL, [(2, 'b')]),
242                     (2, TokenType.WORD, [(101, 'b')])],
243                    [(3, TokenType.PARTIAL, [(3, 'c')]),
244                     (3, TokenType.WORD, [(102, 'c')])]
245                   )
246     builder = SearchBuilder(q, SearchDetails())
247
248     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
249                                                   address=[TokenRange(1, 2),
250                                                            TokenRange(2, 3)])))
251
252     assert len(searches) == 1
253     search = searches[0]
254
255     assert isinstance(search, dbs.PlaceSearch)
256     assert not search.postcodes.values
257     assert not search.countries.values
258     assert not search.housenumbers.values
259     assert len(search.lookups) == 2
260     assert len(search.rankings) == 3
261
262
263 def test_name_and_complex_address():
264     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
265                     (1, TokenType.WORD, [(100, 'a')])],
266                    [(2, TokenType.PARTIAL, [(2, 'b')]),
267                     (3, TokenType.WORD, [(101, 'bc')])],
268                    [(3, TokenType.PARTIAL, [(3, 'c')])],
269                    [(4, TokenType.PARTIAL, [(4, 'd')]),
270                     (4, TokenType.WORD, [(103, 'd')])]
271                   )
272     builder = SearchBuilder(q, SearchDetails())
273
274     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1),
275                                                   address=[TokenRange(1, 2),
276                                                            TokenRange(2, 4)])))
277
278     assert len(searches) == 1
279     search = searches[0]
280
281     assert isinstance(search, dbs.PlaceSearch)
282     assert not search.postcodes.values
283     assert not search.countries.values
284     assert not search.housenumbers.values
285     assert len(search.lookups) == 2
286     assert len(search.rankings) == 2
287
288
289 def test_name_only_near_search():
290     q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])],
291                    [(2, TokenType.PARTIAL, [(1, 'a')]),
292                     (2, TokenType.WORD, [(100, 'a')])])
293     builder = SearchBuilder(q, SearchDetails())
294
295     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
296                                                   category=TokenRange(0, 1))))
297
298     assert len(searches) == 1
299     search = searches[0]
300
301     assert isinstance(search, dbs.NearSearch)
302     assert isinstance(search.search, dbs.PlaceSearch)
303
304
305 def test_name_only_search_with_category():
306     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
307                     (1, TokenType.WORD, [(100, 'a')])])
308     builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
309
310     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
311
312     assert len(searches) == 1
313     search = searches[0]
314
315     assert isinstance(search, dbs.NearSearch)
316     assert isinstance(search.search, dbs.PlaceSearch)
317
318
319 def test_name_only_search_with_countries():
320     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
321                     (1, TokenType.WORD, [(100, 'a')])])
322     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'}))
323
324     searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1))))
325
326     assert len(searches) == 1
327     search = searches[0]
328
329     assert isinstance(search, dbs.PlaceSearch)
330     assert not search.postcodes.values
331     assert set(search.countries.values) == {'de', 'en'}
332     assert not search.housenumbers.values
333
334
335 def make_counted_searches(name_part, name_full, address_part, address_full,
336                           num_address_parts=1):
337     q = QueryStruct([Phrase(PhraseType.NONE, '')])
338     for i in range(1 + num_address_parts):
339         q.add_node(BreakType.WORD, PhraseType.NONE)
340     q.add_node(BreakType.END, PhraseType.NONE)
341
342     q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
343                 MyToken(0.5, 1, name_part, 'name_part', True))
344     q.add_token(TokenRange(0, 1), TokenType.WORD,
345                 MyToken(0, 101, name_full, 'name_full', True))
346     for i in range(num_address_parts):
347         q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
348                     MyToken(0.5, 2, address_part, 'address_part', True))
349         q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
350                     MyToken(0, 102, address_full, 'address_full', True))
351
352     builder = SearchBuilder(q, SearchDetails())
353
354     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
355                                               address=[TokenRange(1, 1 + num_address_parts)])))
356
357
358 def test_infrequent_partials_in_name():
359     searches = make_counted_searches(1, 1, 1, 1)
360
361     assert len(searches) == 1
362     search = searches[0]
363
364     assert isinstance(search, dbs.PlaceSearch)
365     assert len(search.lookups) == 2
366     assert len(search.rankings) == 2
367
368     assert set((l.column, l.lookup_type) for l in search.lookups) == \
369             {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
370
371
372 def test_frequent_partials_in_name_and_address():
373     searches = make_counted_searches(9999, 1, 9999, 1)
374
375     assert len(searches) == 2
376
377     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
378     searches.sort(key=lambda s: s.penalty)
379
380     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
381             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
382     assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
383             {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
384
385
386 def test_too_frequent_partials_in_name_and_address():
387     searches = make_counted_searches(20000, 1, 10000, 1)
388
389     assert len(searches) == 1
390
391     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
392     searches.sort(key=lambda s: s.penalty)
393
394     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
395             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}