1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Public interface to the search code.
10 from typing import List, Any, Optional, Iterator, Tuple
16 from nominatim.api.connection import SearchConnection
17 from nominatim.api.types import SearchDetails
18 from nominatim.api.results import SearchResults, add_result_details
19 from nominatim.api.search.token_assignment import yield_token_assignments
20 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
21 from nominatim.api.search.db_searches import AbstractSearch
22 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
23 from nominatim.api.search.query import Phrase, QueryStruct
24 from nominatim.api.logging import log
26 class ForwardGeocoder:
27 """ Main class responsible for place search.
30 def __init__(self, conn: SearchConnection,
31 params: SearchDetails, timeout: Optional[int]) -> None:
34 self.timeout = dt.timedelta(seconds=timeout or 1000000)
35 self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
39 def limit(self) -> int:
40 """ Return the configured maximum number of search results.
42 return self.params.max_results
45 async def build_searches(self,
46 phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
47 """ Analyse the query and return the tokenized query and list of
48 possible searches over it.
50 if self.query_analyzer is None:
51 self.query_analyzer = await make_query_analyzer(self.conn)
53 query = await self.query_analyzer.analyze_query(phrases)
55 searches: List[AbstractSearch] = []
56 if query.num_token_slots() > 0:
57 # 2. Compute all possible search interpretations
58 log().section('Compute abstract searches')
59 search_builder = SearchBuilder(query, self.params)
61 for assignment in yield_token_assignments(query):
62 searches.extend(search_builder.build(assignment))
63 if num_searches < len(searches):
64 log().table_dump('Searches for assignment',
65 _dump_searches(searches, query, num_searches))
66 num_searches = len(searches)
67 searches.sort(key=lambda s: s.penalty)
69 return query, searches
72 async def execute_searches(self, query: QueryStruct,
73 searches: List[AbstractSearch]) -> SearchResults:
74 """ Run the abstract searches against the database until a result
77 log().section('Execute database searches')
78 results = SearchResults()
79 end_time = dt.datetime.now() + self.timeout
84 for i, search in enumerate(searches):
85 if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
87 log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
88 for result in await search.lookup(self.conn, self.params):
89 results.append(result)
90 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
91 log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
92 num_results = len(results)
93 prev_penalty = search.penalty
94 if dt.datetime.now() >= end_time:
100 def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
101 """ Remove badly matching results, sort by ranking and
102 limit to the configured number of results.
105 min_ranking = min(r.ranking for r in results)
106 results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
107 results.sort(key=lambda r: r.ranking)
110 min_rank = results[0].rank_search
111 results = SearchResults(r for r in results
112 if r.ranking + 0.05 * (r.rank_search - min_rank)
115 results = SearchResults(results[:self.limit])
120 def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
121 """ Adjust the accuracy of the localized result according to how well
122 they match the original query.
124 assert self.query_analyzer is not None
125 qwords = [word for phrase in query.source
126 for word in re.split('[, ]+', phrase.text) if word]
130 for result in results:
131 if not result.display_name:
134 norm = self.query_analyzer.normalize_text(result.display_name)
135 words = set((w for w in norm.split(' ') if w))
139 wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
141 distance += len(qword)
143 distance += (1.0 - wdist) * len(qword)
144 result.accuracy += distance * 0.5 / sum(len(w) for w in qwords)
147 async def lookup_pois(self, categories: List[Tuple[str, str]],
148 phrases: List[Phrase]) -> SearchResults:
149 """ Look up places by category. If phrase is given, a place search
150 over the phrase will be executed first and places close to the
153 log().function('forward_lookup_pois', categories=categories, params=self.params)
156 query, searches = await self.build_searches(phrases)
159 searches = [wrap_near_search(categories, s) for s in searches[:50]]
160 results = await self.execute_searches(query, searches)
161 await add_result_details(self.conn, results, self.params)
162 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
163 results = self.sort_and_cut_results(results)
165 results = SearchResults()
167 search = build_poi_search(categories, self.params.countries)
168 results = await search.lookup(self.conn, self.params)
169 await add_result_details(self.conn, results, self.params)
171 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
176 async def lookup(self, phrases: List[Phrase]) -> SearchResults:
177 """ Look up a single free-text query.
179 log().function('forward_lookup', phrases=phrases, params=self.params)
180 results = SearchResults()
182 if self.params.is_impossible():
185 query, searches = await self.build_searches(phrases)
188 # Execute SQL until an appropriate result is found.
189 results = await self.execute_searches(query, searches[:50])
190 await add_result_details(self.conn, results, self.params)
191 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
192 self.rerank_by_query(query, results)
193 log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
194 results = self.sort_and_cut_results(results)
195 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
200 # pylint: disable=invalid-name,too-many-locals
201 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
202 start: int = 0) -> Iterator[Optional[List[Any]]]:
203 yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
204 'Qualifier', 'Catgeory', 'Rankings']
206 def tk(tl: List[int]) -> str:
207 tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
209 return f"[{','.join(tstr)}]"
211 def fmt_ranking(f: Any) -> str:
214 ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
216 ranks = ranks[:100] + '...'
217 return f"{f.column}({ranks},def={f.default:.3g})"
219 def fmt_lookup(l: Any) -> str:
223 return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
226 def fmt_cstr(c: Any) -> str:
230 return f'{c[0]}^{c[1]}'
232 for search in searches[start:]:
233 fields = ('lookups', 'rankings', 'countries', 'housenumbers',
234 'postcodes', 'qualifiers')
235 if hasattr(search, 'search'):
236 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
237 *(getattr(search.search, attr, []) for attr in fields),
238 getattr(search, 'categories', []),
241 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
242 *(getattr(search, attr, []) for attr in fields),
245 for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
246 yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
247 fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]