From: Sarah Hoffmann Date: Tue, 6 Feb 2024 19:29:48 +0000 (+0100) Subject: prefilter bad results before adding details and reranking X-Git-Tag: v4.4.0~17^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/4ce13f5c1fa59160a17e7db33805d48ba9a04ef5?ds=inline;hp=-c prefilter bad results before adding details and reranking Move the first cutting of the result list before reranking by result match. This means that results with significantly less importance are removed early and independently of the fact how well they match the original query. Fixes #3266. --- 4ce13f5c1fa59160a17e7db33805d48ba9a04ef5 diff --git a/nominatim/api/search/geocoder.py b/nominatim/api/search/geocoder.py index 27e4d91e..711f8383 100644 --- a/nominatim/api/search/geocoder.py +++ b/nominatim/api/search/geocoder.py @@ -104,19 +104,27 @@ class ForwardGeocoder: return SearchResults(results.values()) + def pre_filter_results(self, results: SearchResults) -> SearchResults: + """ Remove results that are significantly worse than the + best match. + """ + if results: + max_ranking = min(r.ranking for r in results) + 0.5 + results = SearchResults(r for r in results if r.ranking < max_ranking) + + return results + + def sort_and_cut_results(self, results: SearchResults) -> SearchResults: """ Remove badly matching results, sort by ranking and limit to the configured number of results. """ if results: - min_ranking = min(r.ranking for r in results) - results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5) results.sort(key=lambda r: r.ranking) - - if results: min_rank = results[0].rank_search + min_ranking = results[0].ranking results = SearchResults(r for r in results - if r.ranking + 0.05 * (r.rank_search - min_rank) + if r.ranking + 0.03 * (r.rank_search - min_rank) < min_ranking + 0.5) results = SearchResults(results[:self.limit]) @@ -174,6 +182,7 @@ class ForwardGeocoder: if query: searches = [wrap_near_search(categories, s) for s in searches[:50]] results = await self.execute_searches(query, searches) + results = self.pre_filter_results(results) await add_result_details(self.conn, results, self.params) log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results)) results = self.sort_and_cut_results(results) @@ -203,6 +212,7 @@ class ForwardGeocoder: if searches: # Execute SQL until an appropriate result is found. results = await self.execute_searches(query, searches[:50]) + results = self.pre_filter_results(results) await add_result_details(self.conn, results, self.params) log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results)) self.rerank_by_query(query, results)