From 78648f1fafd66d851681991a2050595e77850bd7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 6 Aug 2023 19:20:10 +0200 Subject: [PATCH] remove lookup by address only There are too many lookups where the address is very frequent, even when many address parts are present. --- nominatim/api/search/db_search_builder.py | 7 ------- test/python/api/search/test_db_search_builder.py | 16 +--------------- 2 files changed, 1 insertion(+), 22 deletions(-) diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index 7c6d13f0..8dd435d0 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -212,13 +212,6 @@ class SearchBuilder: exp_count = min(exp_count, min(t.count for t in addr_partials)) \ if addr_partials else exp_count - if exp_count < 1000 and len(addr_tokens) > 3 and partials_indexed: - # Lookup by address partials and restrict results through name terms. - # Give this a small penalty because lookups in the address index are - # more expensive - yield penalty + exp_count/5000, exp_count,\ - dbf.lookup_by_addr(name_tokens, addr_tokens) - return # Partial term to frequent. Try looking up by rare full names first. name_fulls = self.query.get_tokens(name, TokenType.WORD) diff --git a/test/python/api/search/test_db_search_builder.py b/test/python/api/search/test_db_search_builder.py index 0e5a8bfc..d1ad320b 100644 --- a/test/python/api/search/test_db_search_builder.py +++ b/test/python/api/search/test_db_search_builder.py @@ -68,7 +68,7 @@ def test_country_search_with_country_restriction(): assert set(search.countries.values) == {'en'} -def test_country_search_with_confllicting_country_restriction(): +def test_country_search_with_conflicting_country_restriction(): q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])]) builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'})) @@ -369,20 +369,6 @@ def test_infrequent_partials_in_name(): {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')} -def test_frequent_partials_in_name_but_not_in_address(): - searches = make_counted_searches(10000, 1, 1, 1, num_address_parts=4) - - assert len(searches) == 1 - search = searches[0] - - assert isinstance(search, dbs.PlaceSearch) - assert len(search.lookups) == 2 - assert len(search.rankings) == 2 - - assert set((l.column, l.lookup_type) for l in search.lookups) == \ - {('nameaddress_vector', 'lookup_all'), ('name_vector', 'restrict')} - - def test_frequent_partials_in_name_and_address(): searches = make_counted_searches(9999, 1, 9999, 1) -- 2.39.5