]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/api/search/test_db_search_builder.py
prepare 4.5.0 release
[nominatim.git] / test / python / api / search / test_db_search_builder.py
index 0e5a8bfcd47d0b8ac13fa36a6290d1c383b5b858..5d984014d18113e15f6198d246db64e5b000edad 100644 (file)
@@ -9,11 +9,11 @@ Tests for creating abstract searches from token assignments.
 """
 import pytest
 
 """
 import pytest
 
-from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
-from nominatim.api.search.db_search_builder import SearchBuilder
-from nominatim.api.search.token_assignment import TokenAssignment
-from nominatim.api.types import SearchDetails
-import nominatim.api.search.db_searches as dbs
+from nominatim_api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase
+from nominatim_api.search.db_search_builder import SearchBuilder
+from nominatim_api.search.token_assignment import TokenAssignment
+from nominatim_api.types import SearchDetails
+import nominatim_api.search.db_searches as dbs
 
 class MyToken(Token):
     def get_category(self):
 
 class MyToken(Token):
     def get_category(self):
@@ -21,21 +21,20 @@ class MyToken(Token):
 
 
 def make_query(*args):
 
 
 def make_query(*args):
-    q = None
+    q = QueryStruct([Phrase(PhraseType.NONE, '')])
 
 
-    for tlist in args:
-        if q is None:
-            q = QueryStruct([Phrase(PhraseType.NONE, '')])
-        else:
-            q.add_node(BreakType.WORD, PhraseType.NONE)
+    for _ in range(max(inner[0] for tlist in args for inner in tlist)):
+        q.add_node(BreakType.WORD, PhraseType.NONE)
+    q.add_node(BreakType.END, PhraseType.NONE)
 
 
-        start = len(q.nodes) - 1
+    for start, tlist in enumerate(args):
         for end, ttype, tinfo in tlist:
             for tid, word in tinfo:
                 q.add_token(TokenRange(start, end), ttype,
         for end, ttype, tinfo in tlist:
             for tid, word in tinfo:
                 q.add_token(TokenRange(start, end), ttype,
-                            MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
+                            MyToken(penalty=0.5 if ttype == TokenType.PARTIAL else 0.0,
+                                    token=tid, count=1, addr_count=1,
+                                    lookup_word=word, is_indexed=True))
 
 
-    q.add_node(BreakType.END, PhraseType.NONE)
 
     return q
 
 
     return q
 
@@ -68,7 +67,7 @@ def test_country_search_with_country_restriction():
     assert set(search.countries.values) == {'en'}
 
 
     assert set(search.countries.values) == {'en'}
 
 
-def test_country_search_with_confllicting_country_restriction():
+def test_country_search_with_conflicting_country_restriction():
     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
 
     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
 
@@ -150,27 +149,27 @@ def test_postcode_with_address_with_full_word():
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
                                     {'near': '10,10'}])
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
                                     {'near': '10,10'}])
-def test_category_only(kwargs):
-    q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_only(kwargs):
+    q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
-    searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+    searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
 
     assert len(searches) == 1
 
     search = searches[0]
 
     assert isinstance(search, dbs.PoiSearch)
 
     assert len(searches) == 1
 
     search = searches[0]
 
     assert isinstance(search, dbs.PoiSearch)
-    assert search.categories.values == [('this', 'that')]
+    assert search.qualifiers.values == [('this', 'that')]
 
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
                                     {}])
 
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
                                     {}])
-def test_category_skipped(kwargs):
-    q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_skipped(kwargs):
+    q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
-    searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+    searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
 
     assert len(searches) == 0
 
 
     assert len(searches) == 0
 
@@ -287,13 +286,13 @@ def test_name_and_complex_address():
 
 
 def test_name_only_near_search():
 
 
 def test_name_only_near_search():
-    q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])],
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
                    [(2, TokenType.PARTIAL, [(1, 'a')]),
                     (2, TokenType.WORD, [(100, 'a')])])
     builder = SearchBuilder(q, SearchDetails())
 
     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
                    [(2, TokenType.PARTIAL, [(1, 'a')]),
                     (2, TokenType.WORD, [(100, 'a')])])
     builder = SearchBuilder(q, SearchDetails())
 
     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
-                                                  category=TokenRange(0, 1))))
+                                                  near_item=TokenRange(0, 1))))
 
     assert len(searches) == 1
     search = searches[0]
 
     assert len(searches) == 1
     search = searches[0]
@@ -312,10 +311,68 @@ def test_name_only_search_with_category():
     assert len(searches) == 1
     search = searches[0]
 
     assert len(searches) == 1
     search = searches[0]
 
+    assert isinstance(search, dbs.PlaceSearch)
+    assert search.qualifiers.values == [('foo', 'bar')]
+
+
+def test_name_with_near_item_search_with_category_mismatch():
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  near_item=TokenRange(0, 1))))
+
+    assert len(searches) == 0
+
+
+def test_name_with_near_item_search_with_category_match():
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+                                                                         ('this', 'that')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  near_item=TokenRange(0, 1))))
+
+    assert len(searches) == 1
+    search = searches[0]
+
     assert isinstance(search, dbs.NearSearch)
     assert isinstance(search.search, dbs.PlaceSearch)
 
 
     assert isinstance(search, dbs.NearSearch)
     assert isinstance(search.search, dbs.PlaceSearch)
 
 
+def test_name_with_qualifier_search_with_category_mismatch():
+    q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  qualifier=TokenRange(0, 1))))
+
+    assert len(searches) == 0
+
+
+def test_name_with_qualifier_search_with_category_match():
+    q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+                                                                         ('this', 'that')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  qualifier=TokenRange(0, 1))))
+
+    assert len(searches) == 1
+    search = searches[0]
+
+    assert isinstance(search, dbs.PlaceSearch)
+    assert search.qualifiers.values == [('this', 'that')]
+
+
 def test_name_only_search_with_countries():
     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
                     (1, TokenType.WORD, [(100, 'a')])])
 def test_name_only_search_with_countries():
     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
                     (1, TokenType.WORD, [(100, 'a')])])
@@ -340,14 +397,14 @@ def make_counted_searches(name_part, name_full, address_part, address_full,
     q.add_node(BreakType.END, PhraseType.NONE)
 
     q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
     q.add_node(BreakType.END, PhraseType.NONE)
 
     q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
-                MyToken(0.5, 1, name_part, 'name_part', True))
+                MyToken(0.5, 1, name_part, 1, 'name_part', True))
     q.add_token(TokenRange(0, 1), TokenType.WORD,
     q.add_token(TokenRange(0, 1), TokenType.WORD,
-                MyToken(0, 101, name_full, 'name_full', True))
+                MyToken(0, 101, name_full, 1, 'name_full', True))
     for i in range(num_address_parts):
         q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
     for i in range(num_address_parts):
         q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
-                    MyToken(0.5, 2, address_part, 'address_part', True))
+                    MyToken(0.5, 2, address_part, 1, 'address_part', True))
         q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
         q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
-                    MyToken(0, 102, address_full, 'address_full', True))
+                    MyToken(0, 102, address_full, 1, 'address_full', True))
 
     builder = SearchBuilder(q, SearchDetails())
 
 
     builder = SearchBuilder(q, SearchDetails())
 
@@ -365,22 +422,8 @@ def test_infrequent_partials_in_name():
     assert len(search.lookups) == 2
     assert len(search.rankings) == 2
 
     assert len(search.lookups) == 2
     assert len(search.rankings) == 2
 
-    assert set((l.column, l.lookup_type) for l in search.lookups) == \
-            {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
-
-
-def test_frequent_partials_in_name_but_not_in_address():
-    searches = make_counted_searches(10000, 1, 1, 1, num_address_parts=4)
-
-    assert len(searches) == 1
-    search = searches[0]
-
-    assert isinstance(search, dbs.PlaceSearch)
-    assert len(search.lookups) == 2
-    assert len(search.rankings) == 2
-
-    assert set((l.column, l.lookup_type) for l in search.lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'restrict')}
+    assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \
+            {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')}
 
 
 def test_frequent_partials_in_name_and_address():
 
 
 def test_frequent_partials_in_name_and_address():
@@ -391,19 +434,19 @@ def test_frequent_partials_in_name_and_address():
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
-    assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
-            {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
-    assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
+    assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
+            {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}
+    assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \
+            {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')}
 
 
 def test_too_frequent_partials_in_name_and_address():
 
 
 def test_too_frequent_partials_in_name_and_address():
-    searches = make_counted_searches(10000, 1, 10000, 1)
+    searches = make_counted_searches(20000, 1, 10000, 1)
 
     assert len(searches) == 1
 
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
 
     assert len(searches) == 1
 
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
-    assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
-            {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
+    assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \
+            {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}