1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Public interface to the search code.
10 from typing import List, Any, Optional, Iterator, Tuple, Dict
16 from nominatim.api.connection import SearchConnection
17 from nominatim.api.types import SearchDetails
18 from nominatim.api.results import SearchResult, SearchResults, add_result_details
19 from nominatim.api.search.token_assignment import yield_token_assignments
20 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
21 from nominatim.api.search.db_searches import AbstractSearch
22 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
23 from nominatim.api.search.query import Phrase, QueryStruct
24 from nominatim.api.logging import log
26 class ForwardGeocoder:
27 """ Main class responsible for place search.
30 def __init__(self, conn: SearchConnection,
31 params: SearchDetails, timeout: Optional[int]) -> None:
34 self.timeout = dt.timedelta(seconds=timeout or 1000000)
35 self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
39 def limit(self) -> int:
40 """ Return the configured maximum number of search results.
42 return self.params.max_results
45 async def build_searches(self,
46 phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
47 """ Analyse the query and return the tokenized query and list of
48 possible searches over it.
50 if self.query_analyzer is None:
51 self.query_analyzer = await make_query_analyzer(self.conn)
53 query = await self.query_analyzer.analyze_query(phrases)
55 searches: List[AbstractSearch] = []
56 if query.num_token_slots() > 0:
57 # 2. Compute all possible search interpretations
58 log().section('Compute abstract searches')
59 search_builder = SearchBuilder(query, self.params)
61 for assignment in yield_token_assignments(query):
62 searches.extend(search_builder.build(assignment))
63 if num_searches < len(searches):
64 log().table_dump('Searches for assignment',
65 _dump_searches(searches, query, num_searches))
66 num_searches = len(searches)
67 searches.sort(key=lambda s: s.penalty)
69 return query, searches
72 async def execute_searches(self, query: QueryStruct,
73 searches: List[AbstractSearch]) -> SearchResults:
74 """ Run the abstract searches against the database until a result
77 log().section('Execute database searches')
78 results: Dict[Any, SearchResult] = {}
80 end_time = dt.datetime.now() + self.timeout
84 for i, search in enumerate(searches):
85 if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
87 log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
88 lookup_results = await search.lookup(self.conn, self.params)
89 for result in lookup_results:
90 rhash = (result.source_table, result.place_id,
91 result.housenumber, result.country_code)
92 prevresult = results.get(rhash)
94 prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
96 results[rhash] = result
97 min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
98 log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
99 prev_penalty = search.penalty
100 if dt.datetime.now() >= end_time:
103 return SearchResults(results.values())
106 def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
107 """ Remove badly matching results, sort by ranking and
108 limit to the configured number of results.
111 min_ranking = min(r.ranking for r in results)
112 results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
113 results.sort(key=lambda r: r.ranking)
116 min_rank = results[0].rank_search
117 results = SearchResults(r for r in results
118 if r.ranking + 0.05 * (r.rank_search - min_rank)
121 results = SearchResults(results[:self.limit])
126 def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
127 """ Adjust the accuracy of the localized result according to how well
128 they match the original query.
130 assert self.query_analyzer is not None
131 qwords = [word for phrase in query.source
132 for word in re.split('[, ]+', phrase.text) if word]
136 for result in results:
137 if not result.display_name:
140 norm = self.query_analyzer.normalize_text(result.display_name)
141 words = set((w for w in norm.split(' ') if w))
145 wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
147 distance += len(qword)
149 distance += (1.0 - wdist) * len(qword)
150 # Compensate for the fact that country names do not get a
151 # match penalty yet by the tokenizer.
152 # Temporary hack that needs to be removed!
153 if result.rank_address == 4:
155 result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
158 async def lookup_pois(self, categories: List[Tuple[str, str]],
159 phrases: List[Phrase]) -> SearchResults:
160 """ Look up places by category. If phrase is given, a place search
161 over the phrase will be executed first and places close to the
164 log().function('forward_lookup_pois', categories=categories, params=self.params)
167 query, searches = await self.build_searches(phrases)
170 searches = [wrap_near_search(categories, s) for s in searches[:50]]
171 results = await self.execute_searches(query, searches)
172 await add_result_details(self.conn, results, self.params)
173 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
174 results = self.sort_and_cut_results(results)
176 results = SearchResults()
178 search = build_poi_search(categories, self.params.countries)
179 results = await search.lookup(self.conn, self.params)
180 await add_result_details(self.conn, results, self.params)
182 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
187 async def lookup(self, phrases: List[Phrase]) -> SearchResults:
188 """ Look up a single free-text query.
190 log().function('forward_lookup', phrases=phrases, params=self.params)
191 results = SearchResults()
193 if self.params.is_impossible():
196 query, searches = await self.build_searches(phrases)
199 # Execute SQL until an appropriate result is found.
200 results = await self.execute_searches(query, searches[:50])
201 await add_result_details(self.conn, results, self.params)
202 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
203 self.rerank_by_query(query, results)
204 log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
205 results = self.sort_and_cut_results(results)
206 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
211 # pylint: disable=invalid-name,too-many-locals
212 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
213 start: int = 0) -> Iterator[Optional[List[Any]]]:
214 yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
215 'Qualifier', 'Catgeory', 'Rankings']
217 def tk(tl: List[int]) -> str:
218 tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
220 return f"[{','.join(tstr)}]"
222 def fmt_ranking(f: Any) -> str:
225 ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
227 ranks = ranks[:100] + '...'
228 return f"{f.column}({ranks},def={f.default:.3g})"
230 def fmt_lookup(l: Any) -> str:
234 return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
237 def fmt_cstr(c: Any) -> str:
241 return f'{c[0]}^{c[1]}'
243 for search in searches[start:]:
244 fields = ('lookups', 'rankings', 'countries', 'housenumbers',
245 'postcodes', 'qualifiers')
246 if hasattr(search, 'search'):
247 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
248 *(getattr(search.search, attr, []) for attr in fields),
249 getattr(search, 'categories', []),
252 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
253 *(getattr(search, attr, []) for attr in fields),
256 for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
257 yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
258 fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]