1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
19 import sqlalchemy as sa
21 from nominatim.typing import SaSelect, SaRow, SaColumn
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31 """ Mix-in names from linked places, so that they show up
32 as standard names where necessary.
38 for k, v in names.items():
39 if k.startswith('_place_'):
41 out[k if outkey in names else outkey] = v
48 class SourceTable(enum.Enum):
49 """ Enumeration of kinds of results.
58 @dataclasses.dataclass
60 """ Detailed information about a related place.
62 place_id: Optional[int]
63 osm_object: Optional[Tuple[str, int]]
64 category: Tuple[str, str]
66 extratags: Optional[Dict[str, str]]
68 admin_level: Optional[int]
74 local_name: Optional[str] = None
77 class AddressLines(List[AddressLine]):
78 """ Sequence of address lines order in descending order by their rank.
81 def localize(self, locales: Locales) -> List[str]:
82 """ Set the local name of address parts according to the chosen
83 locale. Return the list of local names without duplications.
85 Only address parts that are marked as isaddress are localized
88 label_parts: List[str] = []
91 if line.isaddress and line.names:
92 line.local_name = locales.display_name(line.names)
93 if not label_parts or label_parts[-1] != line.local_name:
94 label_parts.append(line.local_name)
100 @dataclasses.dataclass
102 """ Detailed information about a search term.
106 word: Optional[str] = None
109 WordInfos = Sequence[WordInfo]
112 @dataclasses.dataclass
114 """ Data class collecting information common to all
115 types of search results.
117 source_table: SourceTable
118 category: Tuple[str, str]
121 place_id : Optional[int] = None
122 osm_object: Optional[Tuple[str, int]] = None
124 locale_name: Optional[str] = None
125 display_name: Optional[str] = None
127 names: Optional[Dict[str, str]] = None
128 address: Optional[Dict[str, str]] = None
129 extratags: Optional[Dict[str, str]] = None
131 housenumber: Optional[str] = None
132 postcode: Optional[str] = None
133 wikipedia: Optional[str] = None
135 rank_address: int = 30
136 rank_search: int = 30
137 importance: Optional[float] = None
139 country_code: Optional[str] = None
141 address_rows: Optional[AddressLines] = None
142 linked_rows: Optional[AddressLines] = None
143 parented_rows: Optional[AddressLines] = None
144 name_keywords: Optional[WordInfos] = None
145 address_keywords: Optional[WordInfos] = None
147 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
150 def lat(self) -> float:
151 """ Get the latitude (or y) of the center point of the place.
153 return self.centroid[1]
157 def lon(self) -> float:
158 """ Get the longitude (or x) of the center point of the place.
160 return self.centroid[0]
163 def calculated_importance(self) -> float:
164 """ Get a valid importance value. This is either the stored importance
165 of the value or an artificial value computed from the place's
168 return self.importance or (0.7500001 - (self.rank_search/40.0))
171 def localize(self, locales: Locales) -> None:
172 """ Fill the locale_name and the display_name field for the
173 place and, if available, its address information.
175 self.locale_name = locales.display_name(self.names)
176 if self.address_rows:
177 self.display_name = ', '.join(self.address_rows.localize(locales))
179 self.display_name = self.locale_name
183 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
185 @dataclasses.dataclass
186 class DetailedResult(BaseResult):
187 """ A search result with more internal information from the database
190 parent_place_id: Optional[int] = None
191 linked_place_id: Optional[int] = None
192 admin_level: int = 15
193 indexed_date: Optional[dt.datetime] = None
196 @dataclasses.dataclass
197 class ReverseResult(BaseResult):
198 """ A search result for reverse geocoding.
200 distance: Optional[float] = None
201 bbox: Optional[Bbox] = None
204 class ReverseResults(List[ReverseResult]):
205 """ Sequence of reverse lookup results ordered by distance.
206 May be empty when no result was found.
210 @dataclasses.dataclass
211 class SearchResult(BaseResult):
212 """ A search result for forward geocoding.
214 bbox: Optional[Bbox] = None
215 accuracy: float = 0.0
219 def ranking(self) -> float:
220 """ Return the ranking, a combined measure of accuracy and importance.
222 return (self.accuracy if self.accuracy is not None else 1) \
223 - self.calculated_importance()
226 class SearchResults(List[SearchResult]):
227 """ Sequence of forward lookup results ordered by relevance.
228 May be empty when no result was found.
231 def localize(self, locales: Locales) -> None:
232 """ Apply the given locales to all results.
235 result.localize(locales)
238 def _filter_geometries(row: SaRow) -> Dict[str, str]:
239 return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
240 if k.startswith('geometry_')}
243 def create_from_placex_row(row: Optional[SaRow],
244 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
245 """ Construct a new result and add the data from the result row
246 from the placex table. 'class_type' defines the type of result
247 to return. Returns None if the row is None.
252 return class_type(source_table=SourceTable.PLACEX,
253 place_id=row.place_id,
254 osm_object=(row.osm_type, row.osm_id),
255 category=(row.class_, row.type),
256 names=_mingle_name_tags(row.name),
258 extratags=row.extratags,
259 housenumber=row.housenumber,
260 postcode=row.postcode,
261 wikipedia=row.wikipedia,
262 rank_address=row.rank_address,
263 rank_search=row.rank_search,
264 importance=row.importance,
265 country_code=row.country_code,
266 centroid=Point.from_wkb(row.centroid),
267 geometry=_filter_geometries(row))
270 def create_from_osmline_row(row: Optional[SaRow],
271 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
272 """ Construct a new result and add the data from the result row
273 from the address interpolation table osmline. 'class_type' defines
274 the type of result to return. Returns None if the row is None.
276 If the row contains a housenumber, then the housenumber is filled out.
277 Otherwise the result contains the interpolation information in extratags.
282 hnr = getattr(row, 'housenumber', None)
284 res = class_type(source_table=SourceTable.OSMLINE,
285 place_id=row.place_id,
286 osm_object=('W', row.osm_id),
287 category=('place', 'houses' if hnr is None else 'house'),
289 postcode=row.postcode,
290 country_code=row.country_code,
291 centroid=Point.from_wkb(row.centroid),
292 geometry=_filter_geometries(row))
295 res.extratags = {'startnumber': str(row.startnumber),
296 'endnumber': str(row.endnumber),
297 'step': str(row.step)}
299 res.housenumber = str(hnr)
304 def create_from_tiger_row(row: Optional[SaRow],
305 class_type: Type[BaseResultT],
306 osm_type: Optional[str] = None,
307 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
308 """ Construct a new result and add the data from the result row
309 from the Tiger data interpolation table. 'class_type' defines
310 the type of result to return. Returns None if the row is None.
312 If the row contains a housenumber, then the housenumber is filled out.
313 Otherwise the result contains the interpolation information in extratags.
318 hnr = getattr(row, 'housenumber', None)
320 res = class_type(source_table=SourceTable.TIGER,
321 place_id=row.place_id,
322 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
323 category=('place', 'houses' if hnr is None else 'house'),
324 postcode=row.postcode,
326 centroid=Point.from_wkb(row.centroid),
327 geometry=_filter_geometries(row))
330 res.extratags = {'startnumber': str(row.startnumber),
331 'endnumber': str(row.endnumber),
332 'step': str(row.step)}
334 res.housenumber = str(hnr)
339 def create_from_postcode_row(row: Optional[SaRow],
340 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
341 """ Construct a new result and add the data from the result row
342 from the postcode table. 'class_type' defines
343 the type of result to return. Returns None if the row is None.
348 return class_type(source_table=SourceTable.POSTCODE,
349 place_id=row.place_id,
350 category=('place', 'postcode'),
351 names={'ref': row.postcode},
352 rank_search=row.rank_search,
353 rank_address=row.rank_address,
354 country_code=row.country_code,
355 centroid=Point.from_wkb(row.centroid),
356 geometry=_filter_geometries(row))
359 def create_from_country_row(row: Optional[SaRow],
360 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
361 """ Construct a new result and add the data from the result row
362 from the fallback country tables. 'class_type' defines
363 the type of result to return. Returns None if the row is None.
368 return class_type(source_table=SourceTable.COUNTRY,
369 category=('place', 'country'),
370 centroid=Point.from_wkb(row.centroid),
372 rank_address=4, rank_search=4,
373 country_code=row.country_code)
376 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
377 details: LookupDetails) -> None:
378 """ Retrieve more details from the database according to the
379 parameters specified in 'details'.
382 log().section('Query details for result')
383 if details.address_details:
384 log().comment('Query address details')
385 await complete_address_details(conn, results)
386 if details.linked_places:
387 log().comment('Query linked places')
388 for result in results:
389 await complete_linked_places(conn, result)
390 if details.parented_places:
391 log().comment('Query parent places')
392 for result in results:
393 await complete_parented_places(conn, result)
395 log().comment('Query keywords')
396 for result in results:
397 await complete_keywords(conn, result)
400 def _result_row_to_address_row(row: SaRow) -> AddressLine:
401 """ Create a new AddressLine from the results of a datbase query.
403 extratags: Dict[str, str] = getattr(row, 'extratags', {})
404 if hasattr(row, 'place_type') and row.place_type:
405 extratags['place'] = row.place_type
407 names = _mingle_name_tags(row.name) or {}
408 if getattr(row, 'housenumber', None) is not None:
409 names['housenumber'] = row.housenumber
411 return AddressLine(place_id=row.place_id,
412 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
413 category=(getattr(row, 'class'), row.type),
416 admin_level=row.admin_level,
417 fromarea=row.fromarea,
418 isaddress=getattr(row, 'isaddress', True),
419 rank_address=row.rank_address,
420 distance=row.distance)
423 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
426 for result in results:
429 if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
430 if result.housenumber is not None:
431 housenumber = int(result.housenumber)
432 elif result.extratags is not None and 'startnumber' in result.extratags:
433 # details requests do not come with a specific house number
434 housenumber = int(result.extratags['startnumber'])
435 places.append(result.place_id)
436 hnrs.append(housenumber)
441 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
442 """ Retrieve information about places that make up the address of the result.
444 places, hnrs = _get_housenumber_details(results)
449 def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
450 return sa.func.get_addressdata(place_id, hnr)\
451 .table_valued( # type: ignore[no-untyped-call]
452 sa.column('place_id', type_=sa.Integer),
454 sa.column('osm_id', type_=sa.BigInteger),
455 sa.column('name', type_=conn.t.types.Composite),
456 'class', 'type', 'place_type',
457 sa.column('admin_level', type_=sa.Integer),
458 sa.column('fromarea', type_=sa.Boolean),
459 sa.column('isaddress', type_=sa.Boolean),
460 sa.column('rank_address', type_=sa.SmallInteger),
461 sa.column('distance', type_=sa.Float),
462 joins_implicitly=True)
466 # Optimized case for exactly one result (reverse)
467 sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
468 .order_by(sa.column('rank_address').desc(),
469 sa.column('isaddress').desc())
471 alines = AddressLines()
472 for row in await conn.execute(sql):
473 alines.append(_result_row_to_address_row(row))
475 for result in results:
476 if result.place_id == places[0]:
477 result.address_rows = alines
481 darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
482 .table_valued( # type: ignore[no-untyped-call]
483 sa.column('place_id', type_= sa.Integer),
484 sa.column('housenumber', type_= sa.Integer)
487 sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
489 sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
490 .order_by(darray.c.place_id,
491 sa.column('rank_address').desc(),
492 sa.column('isaddress').desc())
494 current_result = None
495 for row in await conn.execute(sql):
496 if current_result is None or row.result_place_id != current_result.place_id:
497 for result in results:
498 if result.place_id == row.result_place_id:
499 current_result = result
503 current_result.address_rows = AddressLines()
504 current_result.address_rows.append(_result_row_to_address_row(row))
507 # pylint: disable=consider-using-f-string
508 def _placex_select_address_row(conn: SearchConnection,
509 centroid: Point) -> SaSelect:
511 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
512 t.c.class_.label('class'), t.c.type,
513 t.c.admin_level, t.c.housenumber,
514 sa.literal_column("""ST_GeometryType(geometry) in
515 ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
518 """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
519 'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
520 """ % centroid).label('distance'))
523 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
524 """ Retrieve information about places that link to the result.
526 result.linked_rows = AddressLines()
527 if result.source_table != SourceTable.PLACEX:
530 sql = _placex_select_address_row(conn, result.centroid)\
531 .where(conn.t.placex.c.linked_place_id == result.place_id)
533 for row in await conn.execute(sql):
534 result.linked_rows.append(_result_row_to_address_row(row))
537 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
538 """ Retrieve information about the search terms used for this place.
540 Requires that the query analyzer was initialised to get access to
543 t = conn.t.search_name
544 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
545 .where(t.c.place_id == result.place_id)
547 result.name_keywords = []
548 result.address_keywords = []
550 t = conn.t.meta.tables['word']
551 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
553 for name_tokens, address_tokens in await conn.execute(sql):
554 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
555 result.name_keywords.append(WordInfo(*row))
557 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
558 result.address_keywords.append(WordInfo(*row))
561 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
562 """ Retrieve information about places that the result provides the
565 result.parented_rows = AddressLines()
566 if result.source_table != SourceTable.PLACEX:
569 sql = _placex_select_address_row(conn, result.centroid)\
570 .where(conn.t.placex.c.parent_place_id == result.place_id)\
571 .where(conn.t.placex.c.rank_search == 30)
573 for row in await conn.execute(sql):
574 result.parented_rows.append(_result_row_to_address_row(row))