X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/f448423727ce8f96723820b6ad1aacc2e5310843..8a1af9b56659d4ef956f45da2928687a17dea20a:/nominatim/api/search/db_search_builder.py diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index f89d8b62..c755f2a7 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -113,7 +113,10 @@ class SearchBuilder: penalty = min(near_items.penalties) near_items.penalties = [p - penalty for p in near_items.penalties] for search in builder: - yield dbs.NearSearch(penalty + assignment.penalty, near_items, search) + search_penalty = search.penalty + search.penalty = 0.0 + yield dbs.NearSearch(penalty + assignment.penalty + search_penalty, + near_items, search) else: for search in builder: search.penalty += assignment.penalty @@ -160,11 +163,15 @@ class SearchBuilder: housenumber is the main name token. """ sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')] + expected_count = sum(t.count for t in hnrs) partials = [t for trange in address for t in self.query.get_partials_list(trange)] - if len(partials) != 1 or partials[0].count < 10000: + if expected_count < 8000: + sdata.lookups.append(dbf.FieldLookup('nameaddress_vector', + [t.token for t in partials], 'restrict')) + elif len(partials) != 1 or partials[0].count < 10000: sdata.lookups.append(dbf.FieldLookup('nameaddress_vector', [t.token for t in partials], 'lookup_all')) else: @@ -175,7 +182,7 @@ class SearchBuilder: 'lookup_any')) sdata.housenumbers = dbf.WeightedStrings([], []) - yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs)) + yield dbs.PlaceSearch(0.05, sdata, expected_count) def build_name_search(self, sdata: dbf.SearchData, @@ -216,16 +223,17 @@ class SearchBuilder: # Partial term to frequent. Try looking up by rare full names first. name_fulls = self.query.get_tokens(name, TokenType.WORD) - fulls_count = sum(t.count for t in name_fulls) - # At this point drop unindexed partials from the address. - # This might yield wrong results, nothing we can do about that. - if not partials_indexed: - addr_tokens = [t.token for t in addr_partials if t.is_indexed] - penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed) - # Any of the full names applies with all of the partials from the address - yield penalty, fulls_count / (2**len(addr_partials)),\ - dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens, - 'restrict' if fulls_count < 10000 else 'lookup_all') + if name_fulls: + fulls_count = sum(t.count for t in name_fulls) + # At this point drop unindexed partials from the address. + # This might yield wrong results, nothing we can do about that. + if not partials_indexed: + addr_tokens = [t.token for t in addr_partials if t.is_indexed] + penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed) + # Any of the full names applies with all of the partials from the address + yield penalty, fulls_count / (2**len(addr_partials)),\ + dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens, + 'restrict' if fulls_count < 10000 else 'lookup_all') # To catch remaining results, lookup by name and address # We only do this if there is a reasonable number of results expected.