]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/search/geocoder.py
Merge pull request #3346 from lonvia/reduce-artificial-importance
[nominatim.git] / nominatim / api / search / geocoder.py
index 6db7c9cefcfa98b4269249a23a1bdcee44c55c4a..711f83833f9408ff980c29f5eeca046b8baa28d6 100644 (file)
@@ -85,6 +85,7 @@ class ForwardGeocoder:
             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
                 break
             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
                 break
             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
+            log().var_dump('Params', self.params)
             lookup_results = await search.lookup(self.conn, self.params)
             for result in lookup_results:
                 rhash = (result.source_table, result.place_id,
             lookup_results = await search.lookup(self.conn, self.params)
             for result in lookup_results:
                 rhash = (result.source_table, result.place_id,
@@ -103,19 +104,27 @@ class ForwardGeocoder:
         return SearchResults(results.values())
 
 
         return SearchResults(results.values())
 
 
+    def pre_filter_results(self, results: SearchResults) -> SearchResults:
+        """ Remove results that are significantly worse than the
+            best match.
+        """
+        if results:
+            max_ranking = min(r.ranking for r in results) + 0.5
+            results = SearchResults(r for r in results if r.ranking < max_ranking)
+
+        return results
+
+
     def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
         """ Remove badly matching results, sort by ranking and
             limit to the configured number of results.
         """
         if results:
     def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
         """ Remove badly matching results, sort by ranking and
             limit to the configured number of results.
         """
         if results:
-            min_ranking = min(r.ranking for r in results)
-            results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
             results.sort(key=lambda r: r.ranking)
             results.sort(key=lambda r: r.ranking)
-
-        if results:
             min_rank = results[0].rank_search
             min_rank = results[0].rank_search
+            min_ranking = results[0].ranking
             results = SearchResults(r for r in results
             results = SearchResults(r for r in results
-                                    if r.ranking + 0.05 * (r.rank_search - min_rank)
+                                    if r.ranking + 0.03 * (r.rank_search - min_rank)
                                        < min_ranking + 0.5)
 
             results = SearchResults(results[:self.limit])
                                        < min_ranking + 0.5)
 
             results = SearchResults(results[:self.limit])
@@ -140,7 +149,8 @@ class ForwardGeocoder:
                or (result.importance is not None and result.importance < 0):
                 continue
             distance = 0.0
                or (result.importance is not None and result.importance < 0):
                 continue
             distance = 0.0
-            norm = self.query_analyzer.normalize_text(result.display_name)
+            norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
+                                                                result.country_code or '')))
             words = set((w for w in norm.split(' ') if w))
             if not words:
                 continue
             words = set((w for w in norm.split(' ') if w))
             if not words:
                 continue
@@ -172,6 +182,7 @@ class ForwardGeocoder:
             if query:
                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
                 results = await self.execute_searches(query, searches)
             if query:
                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
                 results = await self.execute_searches(query, searches)
+                results = self.pre_filter_results(results)
                 await add_result_details(self.conn, results, self.params)
                 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
                 results = self.sort_and_cut_results(results)
                 await add_result_details(self.conn, results, self.params)
                 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
                 results = self.sort_and_cut_results(results)
@@ -201,6 +212,7 @@ class ForwardGeocoder:
         if searches:
             # Execute SQL until an appropriate result is found.
             results = await self.execute_searches(query, searches[:50])
         if searches:
             # Execute SQL until an appropriate result is found.
             results = await self.execute_searches(query, searches[:50])
+            results = self.pre_filter_results(results)
             await add_result_details(self.conn, results, self.params)
             log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
             self.rerank_by_query(query, results)
             await add_result_details(self.conn, results, self.params)
             log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
             self.rerank_by_query(query, results)