]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/geocoder.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / api / search / geocoder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Public interface to the search code.
9 """
10 from typing import List, Any, Optional, Iterator, Tuple
11 import itertools
12
13 from nominatim.api.connection import SearchConnection
14 from nominatim.api.types import SearchDetails
15 from nominatim.api.results import SearchResults, add_result_details
16 from nominatim.api.search.token_assignment import yield_token_assignments
17 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
18 from nominatim.api.search.db_searches import AbstractSearch
19 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
20 from nominatim.api.search.query import Phrase, QueryStruct
21 from nominatim.api.logging import log
22
23 class ForwardGeocoder:
24     """ Main class responsible for place search.
25     """
26
27     def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
28         self.conn = conn
29         self.params = params
30         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
31
32
33     @property
34     def limit(self) -> int:
35         """ Return the configured maximum number of search results.
36         """
37         return self.params.max_results
38
39
40     async def build_searches(self,
41                              phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
42         """ Analyse the query and return the tokenized query and list of
43             possible searches over it.
44         """
45         if self.query_analyzer is None:
46             self.query_analyzer = await make_query_analyzer(self.conn)
47
48         query = await self.query_analyzer.analyze_query(phrases)
49
50         searches: List[AbstractSearch] = []
51         if query.num_token_slots() > 0:
52             # 2. Compute all possible search interpretations
53             log().section('Compute abstract searches')
54             search_builder = SearchBuilder(query, self.params)
55             num_searches = 0
56             for assignment in yield_token_assignments(query):
57                 searches.extend(search_builder.build(assignment))
58                 if num_searches < len(searches):
59                     log().table_dump('Searches for assignment',
60                                      _dump_searches(searches, query, num_searches))
61                 num_searches = len(searches)
62             searches.sort(key=lambda s: s.penalty)
63
64         return query, searches
65
66
67     async def execute_searches(self, query: QueryStruct,
68                                searches: List[AbstractSearch]) -> SearchResults:
69         """ Run the abstract searches against the database until a result
70             is found.
71         """
72         log().section('Execute database searches')
73         results = SearchResults()
74
75         num_results = 0
76         min_ranking = 1000.0
77         prev_penalty = 0.0
78         for i, search in enumerate(searches):
79             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
80                 break
81             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
82             for result in await search.lookup(self.conn, self.params):
83                 results.append(result)
84                 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
85             log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
86             num_results = len(results)
87             prev_penalty = search.penalty
88
89         if results:
90             min_ranking = min(r.ranking for r in results)
91             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
92
93         if results:
94             min_rank = min(r.rank_search for r in results)
95
96             results = SearchResults(r for r in results
97                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
98                                        < min_ranking + 0.5)
99
100             results.sort(key=lambda r: r.accuracy - r.calculated_importance())
101             results = SearchResults(results[:self.limit])
102
103         return results
104
105
106     async def lookup_pois(self, categories: List[Tuple[str, str]],
107                           phrases: List[Phrase]) -> SearchResults:
108         """ Look up places by category. If phrase is given, a place search
109             over the phrase will be executed first and places close to the
110             results returned.
111         """
112         log().function('forward_lookup_pois', categories=categories, params=self.params)
113
114         if phrases:
115             query, searches = await self.build_searches(phrases)
116
117             if query:
118                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
119                 results = await self.execute_searches(query, searches)
120             else:
121                 results = SearchResults()
122         else:
123             search = build_poi_search(categories, self.params.countries)
124             results = await search.lookup(self.conn, self.params)
125
126         await add_result_details(self.conn, results, self.params)
127         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
128
129         return results
130
131
132     async def lookup(self, phrases: List[Phrase]) -> SearchResults:
133         """ Look up a single free-text query.
134         """
135         log().function('forward_lookup', phrases=phrases, params=self.params)
136         results = SearchResults()
137
138         if self.params.is_impossible():
139             return results
140
141         query, searches = await self.build_searches(phrases)
142
143         if searches:
144             # Execute SQL until an appropriate result is found.
145             results = await self.execute_searches(query, searches[:50])
146             await add_result_details(self.conn, results, self.params)
147             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
148
149         return results
150
151
152 # pylint: disable=invalid-name,too-many-locals
153 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
154                    start: int = 0) -> Iterator[Optional[List[Any]]]:
155     yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
156            'Qualifier', 'Catgeory', 'Rankings']
157
158     def tk(tl: List[int]) -> str:
159         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
160
161         return f"[{','.join(tstr)}]"
162
163     def fmt_ranking(f: Any) -> str:
164         if not f:
165             return ''
166         ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
167         if len(ranks) > 100:
168             ranks = ranks[:100] + '...'
169         return f"{f.column}({ranks},def={f.default:.3g})"
170
171     def fmt_lookup(l: Any) -> str:
172         if not l:
173             return ''
174
175         return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
176
177
178     def fmt_cstr(c: Any) -> str:
179         if not c:
180             return ''
181
182         return f'{c[0]}^{c[1]}'
183
184     for search in searches[start:]:
185         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
186                   'postcodes', 'qualifiers')
187         if hasattr(search, 'search'):
188             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
189                                           *(getattr(search.search, attr, []) for attr in fields),
190                                           getattr(search, 'categories', []),
191                                           fillvalue='')
192         else:
193             iters = itertools.zip_longest([f"{search.penalty:.3g}"],
194                                           *(getattr(search, attr, []) for attr in fields),
195                                           [],
196                                           fillvalue='')
197         for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
198             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
199                    fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
200         yield None