1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
19 import sqlalchemy as sa
21 from nominatim.typing import SaSelect, SaRow, SaColumn
22 from nominatim.api.types import Point, Bbox, LookupDetails
23 from nominatim.api.connection import SearchConnection
24 from nominatim.api.logging import log
25 from nominatim.api.localization import Locales
27 # This file defines complex result data classes.
28 # pylint: disable=too-many-instance-attributes
30 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
31 """ Mix-in names from linked places, so that they show up
32 as standard names where necessary.
38 for k, v in names.items():
39 if k.startswith('_place_'):
41 out[k if outkey in names else outkey] = v
48 class SourceTable(enum.Enum):
49 """ The `SourceTable` type lists the possible sources a result can have.
52 """ The placex table is the main source for result usually containing
56 """ The osmline table contains address interpolations from OSM data.
57 Interpolation addresses are always approximate. The OSM id in the
58 result refers to the OSM way with the interpolation line object.
61 """ TIGER address data contains US addresses imported on the side,
62 see [Installing TIGER data](../customize/Tiger.md).
63 TIGER address are also interpolations. The addresses always refer
64 to a street from OSM data. The OSM id in the result refers to
68 """ The postcode table contains artificial centroids for postcodes,
69 computed from the postcodes available with address points. Results
70 are always approximate.
73 """ The country table provides a fallback, when country data is missing
78 @dataclasses.dataclass
80 """ The `AddressLine` may contain the following fields about a related place
81 and its function as an address object. Most fields are optional.
82 Their presence depends on the kind and function of the address part.
84 place_id: Optional[int]
85 """ Internal ID of the place.
87 osm_object: Optional[Tuple[str, int]]
88 """ OSM type and ID of the place, if such an object exists.
90 category: Tuple[str, str]
91 """ Main category of the place, described by a key-value pair.
94 """ All available names for the place including references, alternative
95 names and translations.
97 extratags: Optional[Dict[str, str]]
98 """ Any extra information available about the place. This is a dictionary
99 that usually contains OSM tag key-value pairs.
102 admin_level: Optional[int]
103 """ The administrative level of a boundary as tagged in the input data.
104 This field is only meaningful for places of the category
105 (boundary, administrative).
108 """ If true, then the exact area of the place is known. Without area
109 information, Nominatim has to make an educated guess if an address
110 belongs to one place or another.
113 """ If true, this place should be considered for the final address display.
114 Nominatim will sometimes include more than one candidate for
115 the address in the list when it cannot reliably determine where the
116 place belongs. It will consider names of all candidates when searching
117 but when displaying the result, only the most likely candidate should
121 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
124 """ Distance in degrees between the result place and this address part.
127 local_name: Optional[str] = None
128 """ Place holder for localization of this address part. See
129 [Localization](#localization) below.
133 class AddressLines(List[AddressLine]):
134 """ Sequence of address lines order in descending order by their rank.
137 def localize(self, locales: Locales) -> List[str]:
138 """ Set the local name of address parts according to the chosen
139 locale. Return the list of local names without duplicates.
141 Only address parts that are marked as isaddress are localized
144 label_parts: List[str] = []
147 if line.isaddress and line.names:
148 line.local_name = locales.display_name(line.names)
149 if not label_parts or label_parts[-1] != line.local_name:
150 label_parts.append(line.local_name)
156 @dataclasses.dataclass
158 """ Each entry in the list of search terms contains the
159 following detailed information.
162 """ Internal identifier for the word.
165 """ Normalised and transliterated form of the word.
166 This form is used for searching.
168 word: Optional[str] = None
169 """ Untransliterated form, if available.
173 WordInfos = Sequence[WordInfo]
176 @dataclasses.dataclass
178 """ Data class collecting information common to all
179 types of search results.
181 source_table: SourceTable
182 category: Tuple[str, str]
185 place_id : Optional[int] = None
186 osm_object: Optional[Tuple[str, int]] = None
188 locale_name: Optional[str] = None
189 display_name: Optional[str] = None
191 names: Optional[Dict[str, str]] = None
192 address: Optional[Dict[str, str]] = None
193 extratags: Optional[Dict[str, str]] = None
195 housenumber: Optional[str] = None
196 postcode: Optional[str] = None
197 wikipedia: Optional[str] = None
199 rank_address: int = 30
200 rank_search: int = 30
201 importance: Optional[float] = None
203 country_code: Optional[str] = None
205 address_rows: Optional[AddressLines] = None
206 linked_rows: Optional[AddressLines] = None
207 parented_rows: Optional[AddressLines] = None
208 name_keywords: Optional[WordInfos] = None
209 address_keywords: Optional[WordInfos] = None
211 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
214 def lat(self) -> float:
215 """ Get the latitude (or y) of the center point of the place.
217 return self.centroid[1]
221 def lon(self) -> float:
222 """ Get the longitude (or x) of the center point of the place.
224 return self.centroid[0]
227 def calculated_importance(self) -> float:
228 """ Get a valid importance value. This is either the stored importance
229 of the value or an artificial value computed from the place's
232 return self.importance or (0.7500001 - (self.rank_search/40.0))
235 def localize(self, locales: Locales) -> None:
236 """ Fill the locale_name and the display_name field for the
237 place and, if available, its address information.
239 self.locale_name = locales.display_name(self.names)
240 if self.address_rows:
241 self.display_name = ', '.join(self.address_rows.localize(locales))
243 self.display_name = self.locale_name
247 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
249 @dataclasses.dataclass
250 class DetailedResult(BaseResult):
251 """ A search result with more internal information from the database
254 parent_place_id: Optional[int] = None
255 linked_place_id: Optional[int] = None
256 admin_level: int = 15
257 indexed_date: Optional[dt.datetime] = None
260 @dataclasses.dataclass
261 class ReverseResult(BaseResult):
262 """ A search result for reverse geocoding.
264 distance: Optional[float] = None
265 bbox: Optional[Bbox] = None
268 class ReverseResults(List[ReverseResult]):
269 """ Sequence of reverse lookup results ordered by distance.
270 May be empty when no result was found.
274 @dataclasses.dataclass
275 class SearchResult(BaseResult):
276 """ A search result for forward geocoding.
278 bbox: Optional[Bbox] = None
279 accuracy: float = 0.0
283 def ranking(self) -> float:
284 """ Return the ranking, a combined measure of accuracy and importance.
286 return (self.accuracy if self.accuracy is not None else 1) \
287 - self.calculated_importance()
290 class SearchResults(List[SearchResult]):
291 """ Sequence of forward lookup results ordered by relevance.
292 May be empty when no result was found.
295 def localize(self, locales: Locales) -> None:
296 """ Apply the given locales to all results.
299 result.localize(locales)
302 def _filter_geometries(row: SaRow) -> Dict[str, str]:
303 return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
304 if k.startswith('geometry_')}
307 def create_from_placex_row(row: Optional[SaRow],
308 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
309 """ Construct a new result and add the data from the result row
310 from the placex table. 'class_type' defines the type of result
311 to return. Returns None if the row is None.
316 return class_type(source_table=SourceTable.PLACEX,
317 place_id=row.place_id,
318 osm_object=(row.osm_type, row.osm_id),
319 category=(row.class_, row.type),
320 names=_mingle_name_tags(row.name),
322 extratags=row.extratags,
323 housenumber=row.housenumber,
324 postcode=row.postcode,
325 wikipedia=row.wikipedia,
326 rank_address=row.rank_address,
327 rank_search=row.rank_search,
328 importance=row.importance,
329 country_code=row.country_code,
330 centroid=Point.from_wkb(row.centroid),
331 geometry=_filter_geometries(row))
334 def create_from_osmline_row(row: Optional[SaRow],
335 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
336 """ Construct a new result and add the data from the result row
337 from the address interpolation table osmline. 'class_type' defines
338 the type of result to return. Returns None if the row is None.
340 If the row contains a housenumber, then the housenumber is filled out.
341 Otherwise the result contains the interpolation information in extratags.
346 hnr = getattr(row, 'housenumber', None)
348 res = class_type(source_table=SourceTable.OSMLINE,
349 place_id=row.place_id,
350 osm_object=('W', row.osm_id),
351 category=('place', 'houses' if hnr is None else 'house'),
353 postcode=row.postcode,
354 country_code=row.country_code,
355 centroid=Point.from_wkb(row.centroid),
356 geometry=_filter_geometries(row))
359 res.extratags = {'startnumber': str(row.startnumber),
360 'endnumber': str(row.endnumber),
361 'step': str(row.step)}
363 res.housenumber = str(hnr)
368 def create_from_tiger_row(row: Optional[SaRow],
369 class_type: Type[BaseResultT],
370 osm_type: Optional[str] = None,
371 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
372 """ Construct a new result and add the data from the result row
373 from the Tiger data interpolation table. 'class_type' defines
374 the type of result to return. Returns None if the row is None.
376 If the row contains a housenumber, then the housenumber is filled out.
377 Otherwise the result contains the interpolation information in extratags.
382 hnr = getattr(row, 'housenumber', None)
384 res = class_type(source_table=SourceTable.TIGER,
385 place_id=row.place_id,
386 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
387 category=('place', 'houses' if hnr is None else 'house'),
388 postcode=row.postcode,
390 centroid=Point.from_wkb(row.centroid),
391 geometry=_filter_geometries(row))
394 res.extratags = {'startnumber': str(row.startnumber),
395 'endnumber': str(row.endnumber),
396 'step': str(row.step)}
398 res.housenumber = str(hnr)
403 def create_from_postcode_row(row: Optional[SaRow],
404 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
405 """ Construct a new result and add the data from the result row
406 from the postcode table. 'class_type' defines
407 the type of result to return. Returns None if the row is None.
412 return class_type(source_table=SourceTable.POSTCODE,
413 place_id=row.place_id,
414 category=('place', 'postcode'),
415 names={'ref': row.postcode},
416 rank_search=row.rank_search,
417 rank_address=row.rank_address,
418 country_code=row.country_code,
419 centroid=Point.from_wkb(row.centroid),
420 geometry=_filter_geometries(row))
423 def create_from_country_row(row: Optional[SaRow],
424 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
425 """ Construct a new result and add the data from the result row
426 from the fallback country tables. 'class_type' defines
427 the type of result to return. Returns None if the row is None.
432 return class_type(source_table=SourceTable.COUNTRY,
433 category=('place', 'country'),
434 centroid=Point.from_wkb(row.centroid),
436 rank_address=4, rank_search=4,
437 country_code=row.country_code)
440 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
441 details: LookupDetails) -> None:
442 """ Retrieve more details from the database according to the
443 parameters specified in 'details'.
446 log().section('Query details for result')
447 if details.address_details:
448 log().comment('Query address details')
449 await complete_address_details(conn, results)
450 if details.linked_places:
451 log().comment('Query linked places')
452 for result in results:
453 await complete_linked_places(conn, result)
454 if details.parented_places:
455 log().comment('Query parent places')
456 for result in results:
457 await complete_parented_places(conn, result)
459 log().comment('Query keywords')
460 for result in results:
461 await complete_keywords(conn, result)
464 def _result_row_to_address_row(row: SaRow) -> AddressLine:
465 """ Create a new AddressLine from the results of a datbase query.
467 extratags: Dict[str, str] = getattr(row, 'extratags', {})
468 if hasattr(row, 'place_type') and row.place_type:
469 extratags['place'] = row.place_type
471 names = _mingle_name_tags(row.name) or {}
472 if getattr(row, 'housenumber', None) is not None:
473 names['housenumber'] = row.housenumber
475 return AddressLine(place_id=row.place_id,
476 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
477 category=(getattr(row, 'class'), row.type),
480 admin_level=row.admin_level,
481 fromarea=row.fromarea,
482 isaddress=getattr(row, 'isaddress', True),
483 rank_address=row.rank_address,
484 distance=row.distance)
487 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
490 for result in results:
493 if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
494 if result.housenumber is not None:
495 housenumber = int(result.housenumber)
496 elif result.extratags is not None and 'startnumber' in result.extratags:
497 # details requests do not come with a specific house number
498 housenumber = int(result.extratags['startnumber'])
499 places.append(result.place_id)
500 hnrs.append(housenumber)
505 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
506 """ Retrieve information about places that make up the address of the result.
508 places, hnrs = _get_housenumber_details(results)
513 def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
514 return sa.func.get_addressdata(place_id, hnr)\
515 .table_valued( # type: ignore[no-untyped-call]
516 sa.column('place_id', type_=sa.Integer),
518 sa.column('osm_id', type_=sa.BigInteger),
519 sa.column('name', type_=conn.t.types.Composite),
520 'class', 'type', 'place_type',
521 sa.column('admin_level', type_=sa.Integer),
522 sa.column('fromarea', type_=sa.Boolean),
523 sa.column('isaddress', type_=sa.Boolean),
524 sa.column('rank_address', type_=sa.SmallInteger),
525 sa.column('distance', type_=sa.Float),
526 joins_implicitly=True)
530 # Optimized case for exactly one result (reverse)
531 sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
532 .order_by(sa.column('rank_address').desc(),
533 sa.column('isaddress').desc())
535 alines = AddressLines()
536 for row in await conn.execute(sql):
537 alines.append(_result_row_to_address_row(row))
539 for result in results:
540 if result.place_id == places[0]:
541 result.address_rows = alines
545 darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
546 .table_valued( # type: ignore[no-untyped-call]
547 sa.column('place_id', type_= sa.Integer),
548 sa.column('housenumber', type_= sa.Integer)
551 sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
553 sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
554 .order_by(darray.c.place_id,
555 sa.column('rank_address').desc(),
556 sa.column('isaddress').desc())
558 current_result = None
559 for row in await conn.execute(sql):
560 if current_result is None or row.result_place_id != current_result.place_id:
561 for result in results:
562 if result.place_id == row.result_place_id:
563 current_result = result
567 current_result.address_rows = AddressLines()
568 current_result.address_rows.append(_result_row_to_address_row(row))
571 # pylint: disable=consider-using-f-string
572 def _placex_select_address_row(conn: SearchConnection,
573 centroid: Point) -> SaSelect:
575 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
576 t.c.class_.label('class'), t.c.type,
577 t.c.admin_level, t.c.housenumber,
578 sa.literal_column("""ST_GeometryType(geometry) in
579 ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
582 """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
583 'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
584 """ % centroid).label('distance'))
587 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
588 """ Retrieve information about places that link to the result.
590 result.linked_rows = AddressLines()
591 if result.source_table != SourceTable.PLACEX:
594 sql = _placex_select_address_row(conn, result.centroid)\
595 .where(conn.t.placex.c.linked_place_id == result.place_id)
597 for row in await conn.execute(sql):
598 result.linked_rows.append(_result_row_to_address_row(row))
601 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
602 """ Retrieve information about the search terms used for this place.
604 Requires that the query analyzer was initialised to get access to
607 t = conn.t.search_name
608 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
609 .where(t.c.place_id == result.place_id)
611 result.name_keywords = []
612 result.address_keywords = []
614 t = conn.t.meta.tables['word']
615 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
617 for name_tokens, address_tokens in await conn.execute(sql):
618 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
619 result.name_keywords.append(WordInfo(*row))
621 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
622 result.address_keywords.append(WordInfo(*row))
625 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
626 """ Retrieve information about places that the result provides the
629 result.parented_rows = AddressLines()
630 if result.source_table != SourceTable.PLACEX:
633 sql = _placex_select_address_row(conn, result.centroid)\
634 .where(conn.t.placex.c.parent_place_id == result.place_id)\
635 .where(conn.t.placex.c.rank_search == 30)
637 for row in await conn.execute(sql):
638 result.parented_rows.append(_result_row_to_address_row(row))