1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Public interface to the search code.
10 from typing import List, Any, Optional, Iterator, Tuple, Dict
16 from nominatim.api.connection import SearchConnection
17 from nominatim.api.types import SearchDetails
18 from nominatim.api.results import SearchResult, SearchResults, add_result_details
19 from nominatim.api.search.token_assignment import yield_token_assignments
20 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
21 from nominatim.api.search.db_searches import AbstractSearch
22 from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
23 from nominatim.api.search.query import Phrase, QueryStruct
24 from nominatim.api.logging import log
26 class ForwardGeocoder:
27 """ Main class responsible for place search.
30 def __init__(self, conn: SearchConnection,
31 params: SearchDetails, timeout: Optional[int]) -> None:
34 self.timeout = dt.timedelta(seconds=timeout or 1000000)
35 self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
39 def limit(self) -> int:
40 """ Return the configured maximum number of search results.
42 return self.params.max_results
45 async def build_searches(self,
46 phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
47 """ Analyse the query and return the tokenized query and list of
48 possible searches over it.
50 if self.query_analyzer is None:
51 self.query_analyzer = await make_query_analyzer(self.conn)
53 query = await self.query_analyzer.analyze_query(phrases)
55 searches: List[AbstractSearch] = []
56 if query.num_token_slots() > 0:
57 # 2. Compute all possible search interpretations
58 log().section('Compute abstract searches')
59 search_builder = SearchBuilder(query, self.params)
61 for assignment in yield_token_assignments(query):
62 searches.extend(search_builder.build(assignment))
63 if num_searches < len(searches):
64 log().table_dump('Searches for assignment',
65 _dump_searches(searches, query, num_searches))
66 num_searches = len(searches)
67 searches.sort(key=lambda s: s.penalty)
69 return query, searches
72 async def execute_searches(self, query: QueryStruct,
73 searches: List[AbstractSearch]) -> SearchResults:
74 """ Run the abstract searches against the database until a result
77 log().section('Execute database searches')
78 results: Dict[Any, SearchResult] = {}
80 end_time = dt.datetime.now() + self.timeout
82 min_ranking = searches[0].penalty + 2.0
84 for i, search in enumerate(searches):
85 if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
87 log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
88 lookup_results = await search.lookup(self.conn, self.params)
89 for result in lookup_results:
90 rhash = (result.source_table, result.place_id,
91 result.housenumber, result.country_code)
92 prevresult = results.get(rhash)
94 prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
96 results[rhash] = result
97 min_ranking = min(min_ranking, result.accuracy * 1.2)
98 log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
99 prev_penalty = search.penalty
100 if dt.datetime.now() >= end_time:
103 return SearchResults(results.values())
106 def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
107 """ Remove badly matching results, sort by ranking and
108 limit to the configured number of results.
111 min_ranking = min(r.ranking for r in results)
112 results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
113 results.sort(key=lambda r: r.ranking)
116 min_rank = results[0].rank_search
117 results = SearchResults(r for r in results
118 if r.ranking + 0.05 * (r.rank_search - min_rank)
121 results = SearchResults(results[:self.limit])
126 def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
127 """ Adjust the accuracy of the localized result according to how well
128 they match the original query.
130 assert self.query_analyzer is not None
131 qwords = [word for phrase in query.source
132 for word in re.split('[, ]+', phrase.text) if word]
136 for result in results:
137 # Negative importance indicates ordering by distance, which is
138 # more important than word matching.
139 if not result.display_name\
140 or (result.importance is not None and result.importance < 0):
143 norm = self.query_analyzer.normalize_text(result.display_name)
144 words = set((w for w in norm.split(' ') if w))
148 wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
150 distance += len(qword)
152 distance += (1.0 - wdist) * len(qword)
153 # Compensate for the fact that country names do not get a
154 # match penalty yet by the tokenizer.
155 # Temporary hack that needs to be removed!
156 if result.rank_address == 4:
158 result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
161 async def lookup_pois(self, categories: List[Tuple[str, str]],
162 phrases: List[Phrase]) -> SearchResults:
163 """ Look up places by category. If phrase is given, a place search
164 over the phrase will be executed first and places close to the
167 log().function('forward_lookup_pois', categories=categories, params=self.params)
170 query, searches = await self.build_searches(phrases)
173 searches = [wrap_near_search(categories, s) for s in searches[:50]]
174 results = await self.execute_searches(query, searches)
175 await add_result_details(self.conn, results, self.params)
176 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
177 results = self.sort_and_cut_results(results)
179 results = SearchResults()
181 search = build_poi_search(categories, self.params.countries)
182 results = await search.lookup(self.conn, self.params)
183 await add_result_details(self.conn, results, self.params)
185 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
190 async def lookup(self, phrases: List[Phrase]) -> SearchResults:
191 """ Look up a single free-text query.
193 log().function('forward_lookup', phrases=phrases, params=self.params)
194 results = SearchResults()
196 if self.params.is_impossible():
199 query, searches = await self.build_searches(phrases)
202 # Execute SQL until an appropriate result is found.
203 results = await self.execute_searches(query, searches[:50])
204 await add_result_details(self.conn, results, self.params)
205 log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
206 self.rerank_by_query(query, results)
207 log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
208 results = self.sort_and_cut_results(results)
209 log().result_dump('Final Results', ((r.accuracy, r) for r in results))
214 # pylint: disable=invalid-name,too-many-locals
215 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
216 start: int = 0) -> Iterator[Optional[List[Any]]]:
217 yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
218 'Qualifier', 'Catgeory', 'Rankings']
220 def tk(tl: List[int]) -> str:
221 tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
223 return f"[{','.join(tstr)}]"
225 def fmt_ranking(f: Any) -> str:
228 ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
230 ranks = ranks[:100] + '...'
231 return f"{f.column}({ranks},def={f.default:.3g})"
233 def fmt_lookup(l: Any) -> str:
237 return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
240 def fmt_cstr(c: Any) -> str:
244 return f'{c[0]}^{c[1]}'
246 for search in searches[start:]:
247 fields = ('lookups', 'rankings', 'countries', 'housenumbers',
248 'postcodes', 'qualifiers')
249 if hasattr(search, 'search'):
250 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
251 *(getattr(search.search, attr, []) for attr in fields),
252 getattr(search, 'categories', []),
255 iters = itertools.zip_longest([f"{search.penalty:.3g}"],
256 *(getattr(search, attr, []) for attr in fields),
259 for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
260 yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
261 fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]