X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/56f9535aa9cf3f9f500471603e5876cd3e0ddb0b..2aba0ad4bc0a924337a2541e983d61a26cf67ce6:/test/python/api/search/test_db_search_builder.py diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index c93b8ead..49d5f303 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -9,11 +9,12 @@ Tests for creating abstract searches from token assignments. """ import pytest -from nominatim.api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase -from nominatim.api.search.db_search_builder import SearchBuilder -from nominatim.api.search.token_assignment import TokenAssignment -from nominatim.api.types import SearchDetails -import nominatim.api.search.db_searches as dbs +from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase +import nominatim_api.search.query as qmod +from nominatim_api.search.db_search_builder import SearchBuilder +from nominatim_api.search.token_assignment import TokenAssignment +from nominatim_api.types import SearchDetails +import nominatim_api.search.db_searches as dbs class MyToken(Token): def get_category(self): @@ -21,27 +22,26 @@ class MyToken(Token): def make_query(*args): - q = None + q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')]) - for tlist in args: - if q is None: - q = QueryStruct([Phrase(PhraseType.NONE, '')]) - else: - q.add_node(BreakType.WORD, PhraseType.NONE) + for _ in range(max(inner[0] for tlist in args for inner in tlist)): + q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) - start = len(q.nodes) - 1 + for start, tlist in enumerate(args): for end, ttype, tinfo in tlist: for tid, word in tinfo: q.add_token(TokenRange(start, end), ttype, - MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True)) + MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0, + token=tid, count=1, addr_count=1, + lookup_word=word)) - q.add_node(BreakType.END, PhraseType.NONE) return q def test_country_search(): - q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) + q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) @@ -55,7 +55,7 @@ def test_country_search(): def test_country_search_with_country_restriction(): - q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) + q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'})) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) @@ -69,7 +69,7 @@ def test_country_search_with_country_restriction(): def test_country_search_with_conflicting_country_restriction(): - q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) + q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'})) searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1)))) @@ -78,7 +78,7 @@ def test_country_search_with_conflicting_country_restriction(): def test_postcode_search_simple(): - q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])]) + q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1)))) @@ -94,8 +94,8 @@ def test_postcode_search_simple(): def test_postcode_with_country(): - q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], - [(2, TokenType.COUNTRY, [(1, 'xx')])]) + q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])], + [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), @@ -112,8 +112,8 @@ def test_postcode_with_country(): def test_postcode_with_address(): - q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], - [(2, TokenType.PARTIAL, [(100, 'word')])]) + q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])], + [(2, qmod.TOKEN_PARTIAL, [(100, 'word')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), @@ -130,9 +130,9 @@ def test_postcode_with_address(): def test_postcode_with_address_with_full_word(): - q = make_query([(1, TokenType.POSTCODE, [(34, '2367')])], - [(2, TokenType.PARTIAL, [(100, 'word')]), - (2, TokenType.WORD, [(1, 'full')])]) + q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])], + [(2, qmod.TOKEN_PARTIAL, [(100, 'word')]), + (2, qmod.TOKEN_WORD, [(1, 'full')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1), @@ -150,11 +150,11 @@ def test_postcode_with_address_with_full_word(): @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True}, {'near': '10,10'}]) -def test_category_only(kwargs): - q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])]) +def test_near_item_only(kwargs): + q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) - searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1)))) + searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1)))) assert len(searches) == 1 @@ -166,18 +166,18 @@ def test_category_only(kwargs): @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'}, {}]) -def test_category_skipped(kwargs): - q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])]) +def test_near_item_skipped(kwargs): + q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(2, 'foo')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs)) - searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1)))) + searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1)))) assert len(searches) == 0 def test_name_only_search(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])]) + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) @@ -195,9 +195,9 @@ def test_name_only_search(): def test_name_with_qualifier(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])], - [(2, TokenType.QUALIFIER, [(55, 'hotel')])]) + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])], + [(2, qmod.TOKEN_QUALIFIER, [(55, 'hotel')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1), @@ -216,9 +216,9 @@ def test_name_with_qualifier(): def test_name_with_housenumber_search(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])], - [(2, TokenType.HOUSENUMBER, [(66, '66')])]) + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])], + [(2, qmod.TOKEN_HOUSENUMBER, [(66, '66')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1), @@ -236,12 +236,12 @@ def test_name_with_housenumber_search(): def test_name_and_address(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])], - [(2, TokenType.PARTIAL, [(2, 'b')]), - (2, TokenType.WORD, [(101, 'b')])], - [(3, TokenType.PARTIAL, [(3, 'c')]), - (3, TokenType.WORD, [(102, 'c')])] + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])], + [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]), + (2, qmod.TOKEN_WORD, [(101, 'b')])], + [(3, qmod.TOKEN_PARTIAL, [(3, 'c')]), + (3, qmod.TOKEN_WORD, [(102, 'c')])] ) builder = SearchBuilder(q, SearchDetails()) @@ -261,13 +261,13 @@ def test_name_and_address(): def test_name_and_complex_address(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])], - [(2, TokenType.PARTIAL, [(2, 'b')]), - (3, TokenType.WORD, [(101, 'bc')])], - [(3, TokenType.PARTIAL, [(3, 'c')])], - [(4, TokenType.PARTIAL, [(4, 'd')]), - (4, TokenType.WORD, [(103, 'd')])] + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])], + [(2, qmod.TOKEN_PARTIAL, [(2, 'b')]), + (3, qmod.TOKEN_WORD, [(101, 'bc')])], + [(3, qmod.TOKEN_PARTIAL, [(3, 'c')])], + [(4, qmod.TOKEN_PARTIAL, [(4, 'd')]), + (4, qmod.TOKEN_WORD, [(103, 'd')])] ) builder = SearchBuilder(q, SearchDetails()) @@ -287,13 +287,13 @@ def test_name_and_complex_address(): def test_name_only_near_search(): - q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])], - [(2, TokenType.PARTIAL, [(1, 'a')]), - (2, TokenType.WORD, [(100, 'a')])]) + q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])], + [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (2, qmod.TOKEN_WORD, [(100, 'a')])]) builder = SearchBuilder(q, SearchDetails()) searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), - category=TokenRange(0, 1)))) + near_item=TokenRange(0, 1)))) assert len(searches) == 1 search = searches[0] @@ -303,8 +303,8 @@ def test_name_only_near_search(): def test_name_only_search_with_category(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])]) + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) @@ -312,13 +312,71 @@ def test_name_only_search_with_category(): assert len(searches) == 1 search = searches[0] + assert isinstance(search, dbs.PlaceSearch) + assert search.qualifiers.values == [('foo', 'bar')] + + +def test_name_with_near_item_search_with_category_mismatch(): + q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])], + [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (2, qmod.TOKEN_WORD, [(100, 'a')])]) + builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) + + searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), + near_item=TokenRange(0, 1)))) + + assert len(searches) == 0 + + +def test_name_with_near_item_search_with_category_match(): + q = make_query([(1, qmod.TOKEN_NEAR_ITEM, [(88, 'g')])], + [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (2, qmod.TOKEN_WORD, [(100, 'a')])]) + builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'), + ('this', 'that')]})) + + searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), + near_item=TokenRange(0, 1)))) + + assert len(searches) == 1 + search = searches[0] + assert isinstance(search, dbs.NearSearch) assert isinstance(search.search, dbs.PlaceSearch) +def test_name_with_qualifier_search_with_category_mismatch(): + q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])], + [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (2, qmod.TOKEN_WORD, [(100, 'a')])]) + builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]})) + + searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), + qualifier=TokenRange(0, 1)))) + + assert len(searches) == 0 + + +def test_name_with_qualifier_search_with_category_match(): + q = make_query([(1, qmod.TOKEN_QUALIFIER, [(88, 'g')])], + [(2, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (2, qmod.TOKEN_WORD, [(100, 'a')])]) + builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'), + ('this', 'that')]})) + + searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2), + qualifier=TokenRange(0, 1)))) + + assert len(searches) == 1 + search = searches[0] + + assert isinstance(search, dbs.PlaceSearch) + assert search.qualifiers.values == [('this', 'that')] + + def test_name_only_search_with_countries(): - q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]), - (1, TokenType.WORD, [(100, 'a')])]) + q = make_query([(1, qmod.TOKEN_PARTIAL, [(1, 'a')]), + (1, qmod.TOKEN_WORD, [(100, 'a')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'de,en'})) searches = list(builder.build(TokenAssignment(name=TokenRange(0, 1)))) @@ -334,20 +392,20 @@ def test_name_only_search_with_countries(): def make_counted_searches(name_part, name_full, address_part, address_full, num_address_parts=1): - q = QueryStruct([Phrase(PhraseType.NONE, '')]) + q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')]) for i in range(1 + num_address_parts): - q.add_node(BreakType.WORD, PhraseType.NONE) - q.add_node(BreakType.END, PhraseType.NONE) + q.add_node(qmod.BREAK_WORD, qmod.PHRASE_ANY) + q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY) - q.add_token(TokenRange(0, 1), TokenType.PARTIAL, - MyToken(0.5, 1, name_part, 'name_part', True)) - q.add_token(TokenRange(0, 1), TokenType.WORD, - MyToken(0, 101, name_full, 'name_full', True)) + q.add_token(TokenRange(0, 1), qmod.TOKEN_PARTIAL, + MyToken(0.5, 1, name_part, 1, 'name_part')) + q.add_token(TokenRange(0, 1), qmod.TOKEN_WORD, + MyToken(0, 101, name_full, 1, 'name_full')) for i in range(num_address_parts): - q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL, - MyToken(0.5, 2, address_part, 'address_part', True)) - q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD, - MyToken(0, 102, address_full, 'address_full', True)) + q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_PARTIAL, + MyToken(0.5, 2, address_part, 1, 'address_part')) + q.add_token(TokenRange(i + 1, i + 2), qmod.TOKEN_WORD, + MyToken(0, 102, address_full, 1, 'address_full')) builder = SearchBuilder(q, SearchDetails()) @@ -365,8 +423,8 @@ def test_infrequent_partials_in_name(): assert len(search.lookups) == 2 assert len(search.rankings) == 2 - assert set((l.column, l.lookup_type) for l in search.lookups) == \ - {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')} + assert set((l.column, l.lookup_type.__name__) for l in search.lookups) == \ + {('name_vector', 'LookupAll'), ('nameaddress_vector', 'Restrict')} def test_frequent_partials_in_name_and_address(): @@ -377,10 +435,10 @@ def test_frequent_partials_in_name_and_address(): assert all(isinstance(s, dbs.PlaceSearch) for s in searches) searches.sort(key=lambda s: s.penalty) - assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \ - {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')} - assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \ - {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')} + assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \ + {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')} + assert set((l.column, l.lookup_type.__name__) for l in searches[1].lookups) == \ + {('nameaddress_vector', 'LookupAll'), ('name_vector', 'LookupAll')} def test_too_frequent_partials_in_name_and_address(): @@ -391,5 +449,5 @@ def test_too_frequent_partials_in_name_and_address(): assert all(isinstance(s, dbs.PlaceSearch) for s in searches) searches.sort(key=lambda s: s.penalty) - assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \ - {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')} + assert set((l.column, l.lookup_type.__name__) for l in searches[0].lookups) == \ + {('name_vector', 'LookupAny'), ('nameaddress_vector', 'Restrict')}