]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/api/search/geocoder.py
These days the OSM wikipedia tab no longer contains URLs
[nominatim.git] / nominatim / api / search / geocoder.py
1 # SPDX-License-Identifier: GPL-3.0-or-later
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Public interface to the search code.
9 """
10 from typing import List, Any, Optional, Iterator, Tuple
11 import itertools
12
13 from nominatim.api.connection import SearchConnection
14 from nominatim.api.types import SearchDetails
15 from nominatim.api.results import SearchResults, add_result_details
16 from nominatim.api.search.token_assignment import yield_token_assignments
17 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
18 from nominatim.api.search.db_searches import AbstractSearch
19 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
20 from nominatim.api.search.query import Phrase, QueryStruct
21 from nominatim.api.logging import log
22
23 class ForwardGeocoder:
24     """ Main class responsible for place search.
25     """
26
27     def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
28         self.conn = conn
29         self.params = params
30         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
31
32
33     @property
34     def limit(self) -> int:
35         """ Return the configured maximum number of search results.
36         """
37         return self.params.max_results
38
39
40     async def build_searches(self,
41                              phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
42         """ Analyse the query and return the tokenized query and list of
43             possible searches over it.
44         """
45         if self.query_analyzer is None:
46             self.query_analyzer = await make_query_analyzer(self.conn)
47
48         query = await self.query_analyzer.analyze_query(phrases)
49
50         searches: List[AbstractSearch] = []
51         if query.num_token_slots() > 0:
52             # 2. Compute all possible search interpretations
53             log().section('Compute abstract searches')
54             search_builder = SearchBuilder(query, self.params)
55             num_searches = 0
56             for assignment in yield_token_assignments(query):
57                 searches.extend(search_builder.build(assignment))
58                 log().table_dump('Searches for assignment',
59                                  _dump_searches(searches, query, num_searches))
60                 num_searches = len(searches)
61             searches.sort(key=lambda s: s.penalty)
62
63         return query, searches
64
65
66     async def execute_searches(self, query: QueryStruct,
67                                searches: List[AbstractSearch]) -> SearchResults:
68         """ Run the abstract searches against the database until a result
69             is found.
70         """
71         log().section('Execute database searches')
72         results = SearchResults()
73
74         num_results = 0
75         min_ranking = 1000.0
76         prev_penalty = 0.0
77         for i, search in enumerate(searches):
78             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
79                 break
80             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
81             for result in await search.lookup(self.conn, self.params):
82                 results.append(result)
83                 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
84             log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
85             num_results = len(results)
86             prev_penalty = search.penalty
87
88         if results:
89             min_ranking = min(r.ranking for r in results)
90             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
91
92         if results:
93             min_rank = min(r.rank_search for r in results)
94
95             results = SearchResults(r for r in results
96                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
97                                        < min_ranking + 0.5)
98
99             results.sort(key=lambda r: r.accuracy - r.calculated_importance())
100             results = SearchResults(results[:self.limit])
101
102         return results
103
104
105     async def lookup_pois(self, categories: List[Tuple[str, str]],
106                           phrases: List[Phrase]) -> SearchResults:
107         """ Look up places by category. If phrase is given, a place search
108             over the phrase will be executed first and places close to the
109             results returned.
110         """
111         log().function('forward_lookup_pois', categories=categories, params=self.params)
112
113         if phrases:
114             query, searches = await self.build_searches(phrases)
115
116             if query:
117                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
118                 results = await self.execute_searches(query, searches)
119             else:
120                 results = SearchResults()
121         else:
122             search = build_poi_search(categories, self.params.countries)
123             results = await search.lookup(self.conn, self.params)
124
125         await add_result_details(self.conn, results, self.params)
126         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
127
128         return results
129
130
131     async def lookup(self, phrases: List[Phrase]) -> SearchResults:
132         """ Look up a single free-text query.
133         """
134         log().function('forward_lookup', phrases=phrases, params=self.params)
135         results = SearchResults()
136
137         if self.params.is_impossible():
138             return results
139
140         query, searches = await self.build_searches(phrases)
141
142         if searches:
143             # Execute SQL until an appropriate result is found.
144             results = await self.execute_searches(query, searches[:50])
145             await add_result_details(self.conn, results, self.params)
146             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
147
148         return results
149
150
151 # pylint: disable=invalid-name,too-many-locals
152 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
153                    start: int = 0) -> Iterator[Optional[List[Any]]]:
154     yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
155
156     def tk(tl: List[int]) -> str:
157         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
158
159         return f"[{','.join(tstr)}]"
160
161     def fmt_ranking(f: Any) -> str:
162         if not f:
163             return ''
164         ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
165         if len(ranks) > 100:
166             ranks = ranks[:100] + '...'
167         return f"{f.column}({ranks},def={f.default:.3g})"
168
169     def fmt_lookup(l: Any) -> str:
170         if not l:
171             return ''
172
173         return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
174
175
176     def fmt_cstr(c: Any) -> str:
177         if not c:
178             return ''
179
180         return f'{c[0]}^{c[1]}'
181
182     for search in searches[start:]:
183         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
184                   'postcodes', 'qualifier')
185         iters = itertools.zip_longest([f"{search.penalty:.3g}"],
186                                       *(getattr(search, attr, []) for attr in fields),
187                                       fillvalue= '')
188         for penalty, lookup, rank, cc, hnr, pc, qual in iters:
189             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
190                    fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
191         yield None