From 4ce13f5c1fa59160a17e7db33805d48ba9a04ef5 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 6 Feb 2024 20:29:48 +0100 Subject: [PATCH] prefilter bad results before adding details and reranking Move the first cutting of the result list before reranking by result match. This means that results with significantly less importance are removed early and independently of the fact how well they match the original query. Fixes #3266. --- nominatim/api/search/geocoder.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py index 27e4d91e..711f8383 100644 --- a/nominatim/api/search/geocoder.py +++ b/nominatim/api/search/geocoder.py @@ -104,19 +104,27 @@ class ForwardGeocoder: return SearchResults(results.values()) + def pre_filter_results(self, results: SearchResults) -> SearchResults: + """ Remove results that are significantly worse than the + best match. + """ + if results: + max_ranking = min(r.ranking for r in results) + 0.5 + results = SearchResults(r for r in results if r.ranking < max_ranking) + + return results + + def sort_and_cut_results(self, results: SearchResults) -> SearchResults: """ Remove badly matching results, sort by ranking and limit to the configured number of results. """ if results: - min_ranking = min(r.ranking for r in results) - results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5) results.sort(key=lambda r: r.ranking) - - if results: min_rank = results[0].rank_search + min_ranking = results[0].ranking results = SearchResults(r for r in results - if r.ranking + 0.05 * (r.rank_search - min_rank) + if r.ranking + 0.03 * (r.rank_search - min_rank) < min_ranking + 0.5) results = SearchResults(results[:self.limit]) @@ -174,6 +182,7 @@ class ForwardGeocoder: if query: searches = [wrap_near_search(categories, s) for s in searches[:50]] results = await self.execute_searches(query, searches) + results = self.pre_filter_results(results) await add_result_details(self.conn, results, self.params) log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results)) results = self.sort_and_cut_results(results) @@ -203,6 +212,7 @@ class ForwardGeocoder: if searches: # Execute SQL until an appropriate result is found. results = await self.execute_searches(query, searches[:50]) + results = self.pre_filter_results(results) await add_result_details(self.conn, results, self.params) log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results)) self.rerank_by_query(query, results) -- 2.39.5