]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/api/search/test_db_search_builder.py
restrict deduplication to results from placex
[nominatim.git] / test / python / api / search / test_db_search_builder.py
index 9631850e1b284fcf760c3171b5cf2c09a78bed64..c93b8ead3c2fda0a49320726d72bda6c4282bbb1 100644 (file)
@@ -68,7 +68,7 @@ def test_country_search_with_country_restriction():
     assert set(search.countries.values) == {'en'}
 
 
-def test_country_search_with_confllicting_country_restriction():
+def test_country_search_with_conflicting_country_restriction():
     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
 
@@ -161,7 +161,7 @@ def test_category_only(kwargs):
     search = searches[0]
 
     assert isinstance(search, dbs.PoiSearch)
-    assert search.categories.values == [('this', 'that')]
+    assert search.qualifiers.values == [('this', 'that')]
 
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
@@ -332,9 +332,10 @@ def test_name_only_search_with_countries():
     assert not search.housenumbers.values
 
 
-def make_counted_searches(name_part, name_full, address_part, address_full):
+def make_counted_searches(name_part, name_full, address_part, address_full,
+                          num_address_parts=1):
     q = QueryStruct([Phrase(PhraseType.NONE, '')])
-    for i in range(2):
+    for i in range(1 + num_address_parts):
         q.add_node(BreakType.WORD, PhraseType.NONE)
     q.add_node(BreakType.END, PhraseType.NONE)
 
@@ -342,15 +343,16 @@ def make_counted_searches(name_part, name_full, address_part, address_full):
                 MyToken(0.5, 1, name_part, 'name_part', True))
     q.add_token(TokenRange(0, 1), TokenType.WORD,
                 MyToken(0, 101, name_full, 'name_full', True))
-    q.add_token(TokenRange(1, 2), TokenType.PARTIAL,
-                MyToken(0.5, 2, address_part, 'address_part', True))
-    q.add_token(TokenRange(1, 2), TokenType.WORD,
-                MyToken(0, 102, address_full, 'address_full', True))
+    for i in range(num_address_parts):
+        q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
+                    MyToken(0.5, 2, address_part, 'address_part', True))
+        q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
+                    MyToken(0, 102, address_full, 'address_full', True))
 
     builder = SearchBuilder(q, SearchDetails())
 
     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
-                                              address=[TokenRange(1, 2)])))
+                                              address=[TokenRange(1, 1 + num_address_parts)])))
 
 
 def test_infrequent_partials_in_name():
@@ -367,29 +369,27 @@ def test_infrequent_partials_in_name():
             {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
 
 
-def test_frequent_partials_in_name_but_not_in_address():
-    searches = make_counted_searches(10000, 1, 1, 1)
+def test_frequent_partials_in_name_and_address():
+    searches = make_counted_searches(9999, 1, 9999, 1)
 
-    assert len(searches) == 1
-    search = searches[0]
+    assert len(searches) == 2
 
-    assert isinstance(search, dbs.PlaceSearch)
-    assert len(search.lookups) == 2
-    assert len(search.rankings) == 2
+    assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
+    searches.sort(key=lambda s: s.penalty)
 
-    assert set((l.column, l.lookup_type) for l in search.lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'restrict')}
+    assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
+            {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
+    assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
+            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
 
 
-def test_frequent_partials_in_name_and_address():
-    searches = make_counted_searches(10000, 1, 10000, 1)
+def test_too_frequent_partials_in_name_and_address():
+    searches = make_counted_searches(20000, 1, 10000, 1)
 
-    assert len(searches) == 2
+    assert len(searches) == 1
 
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
-    assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}