]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/geocoder.py
Merge pull request #3201 from lonvia/tweak-expected-count
[nominatim.git] / nominatim / api / search / geocoder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Public interface to the search code.
9 """
10 from typing import List, Any, Optional, Iterator, Tuple
11 import itertools
12 import datetime as dt
13
14 from nominatim.api.connection import SearchConnection
15 from nominatim.api.types import SearchDetails
16 from nominatim.api.results import SearchResults, add_result_details
17 from nominatim.api.search.token_assignment import yield_token_assignments
18 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
19 from nominatim.api.search.db_searches import AbstractSearch
20 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
21 from nominatim.api.search.query import Phrase, QueryStruct
22 from nominatim.api.logging import log
23
24 class ForwardGeocoder:
25     """ Main class responsible for place search.
26     """
27
28     def __init__(self, conn: SearchConnection,
29                  params: SearchDetails, timeout: Optional[int]) -> None:
30         self.conn = conn
31         self.params = params
32         self.timeout = dt.timedelta(seconds=timeout or 1000000)
33         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
34
35
36     @property
37     def limit(self) -> int:
38         """ Return the configured maximum number of search results.
39         """
40         return self.params.max_results
41
42
43     async def build_searches(self,
44                              phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
45         """ Analyse the query and return the tokenized query and list of
46             possible searches over it.
47         """
48         if self.query_analyzer is None:
49             self.query_analyzer = await make_query_analyzer(self.conn)
50
51         query = await self.query_analyzer.analyze_query(phrases)
52
53         searches: List[AbstractSearch] = []
54         if query.num_token_slots() > 0:
55             # 2. Compute all possible search interpretations
56             log().section('Compute abstract searches')
57             search_builder = SearchBuilder(query, self.params)
58             num_searches = 0
59             for assignment in yield_token_assignments(query):
60                 searches.extend(search_builder.build(assignment))
61                 if num_searches < len(searches):
62                     log().table_dump('Searches for assignment',
63                                      _dump_searches(searches, query, num_searches))
64                 num_searches = len(searches)
65             searches.sort(key=lambda s: s.penalty)
66
67         return query, searches
68
69
70     async def execute_searches(self, query: QueryStruct,
71                                searches: List[AbstractSearch]) -> SearchResults:
72         """ Run the abstract searches against the database until a result
73             is found.
74         """
75         log().section('Execute database searches')
76         results = SearchResults()
77         end_time = dt.datetime.now() + self.timeout
78
79         num_results = 0
80         min_ranking = 1000.0
81         prev_penalty = 0.0
82         for i, search in enumerate(searches):
83             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
84                 break
85             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
86             for result in await search.lookup(self.conn, self.params):
87                 results.append(result)
88                 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
89             log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
90             num_results = len(results)
91             prev_penalty = search.penalty
92             if dt.datetime.now() >= end_time:
93                 break
94
95         if results:
96             min_ranking = min(r.ranking for r in results)
97             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
98
99         if results:
100             min_rank = min(r.rank_search for r in results)
101
102             results = SearchResults(r for r in results
103                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
104                                        < min_ranking + 0.5)
105
106             results.sort(key=lambda r: r.accuracy - r.calculated_importance())
107             results = SearchResults(results[:self.limit])
108
109         return results
110
111
112     async def lookup_pois(self, categories: List[Tuple[str, str]],
113                           phrases: List[Phrase]) -> SearchResults:
114         """ Look up places by category. If phrase is given, a place search
115             over the phrase will be executed first and places close to the
116             results returned.
117         """
118         log().function('forward_lookup_pois', categories=categories, params=self.params)
119
120         if phrases:
121             query, searches = await self.build_searches(phrases)
122
123             if query:
124                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
125                 results = await self.execute_searches(query, searches)
126             else:
127                 results = SearchResults()
128         else:
129             search = build_poi_search(categories, self.params.countries)
130             results = await search.lookup(self.conn, self.params)
131
132         await add_result_details(self.conn, results, self.params)
133         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
134
135         return results
136
137
138     async def lookup(self, phrases: List[Phrase]) -> SearchResults:
139         """ Look up a single free-text query.
140         """
141         log().function('forward_lookup', phrases=phrases, params=self.params)
142         results = SearchResults()
143
144         if self.params.is_impossible():
145             return results
146
147         query, searches = await self.build_searches(phrases)
148
149         if searches:
150             # Execute SQL until an appropriate result is found.
151             results = await self.execute_searches(query, searches[:50])
152             await add_result_details(self.conn, results, self.params)
153             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
154
155         return results
156
157
158 # pylint: disable=invalid-name,too-many-locals
159 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
160                    start: int = 0) -> Iterator[Optional[List[Any]]]:
161     yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
162            'Qualifier', 'Catgeory', 'Rankings']
163
164     def tk(tl: List[int]) -> str:
165         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
166
167         return f"[{','.join(tstr)}]"
168
169     def fmt_ranking(f: Any) -> str:
170         if not f:
171             return ''
172         ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
173         if len(ranks) > 100:
174             ranks = ranks[:100] + '...'
175         return f"{f.column}({ranks},def={f.default:.3g})"
176
177     def fmt_lookup(l: Any) -> str:
178         if not l:
179             return ''
180
181         return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
182
183
184     def fmt_cstr(c: Any) -> str:
185         if not c:
186             return ''
187
188         return f'{c[0]}^{c[1]}'
189
190     for search in searches[start:]:
191         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
192                   'postcodes', 'qualifiers')
193         if hasattr(search, 'search'):
194             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
195                                           *(getattr(search.search, attr, []) for attr in fields),
196                                           getattr(search, 'categories', []),
197                                           fillvalue='')
198         else:
199             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
200                                           *(getattr(search, attr, []) for attr in fields),
201                                           [],
202                                           fillvalue='')
203         for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
204             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
205                    fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
206         yield None