1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2024 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
19 import sqlalchemy as sa
21 from .typing import SaSelect, SaRow
22 from .sql.sqlalchemy_types import Geometry
23 from .types import Point, Bbox, LookupDetails
24 from .connection import SearchConnection
25 from .logging import log
26 from .localization import Locales
28 # This file defines complex result data classes.
31 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
32 """ Mix-in names from linked places, so that they show up
33 as standard names where necessary.
39 for k, v in names.items():
40 if k.startswith('_place_'):
42 out[k if outkey in names else outkey] = v
49 class SourceTable(enum.Enum):
50 """ The `SourceTable` type lists the possible sources a result can have.
53 """ The placex table is the main source for result usually containing
57 """ The osmline table contains address interpolations from OSM data.
58 Interpolation addresses are always approximate. The OSM id in the
59 result refers to the OSM way with the interpolation line object.
62 """ TIGER address data contains US addresses imported on the side,
63 see [Installing TIGER data](../customize/Tiger.md).
64 TIGER address are also interpolations. The addresses always refer
65 to a street from OSM data. The OSM id in the result refers to
69 """ The postcode table contains artificial centroids for postcodes,
70 computed from the postcodes available with address points. Results
71 are always approximate.
74 """ The country table provides a fallback, when country data is missing
79 @dataclasses.dataclass
81 """ The `AddressLine` may contain the following fields about a related place
82 and its function as an address object. Most fields are optional.
83 Their presence depends on the kind and function of the address part.
85 category: Tuple[str, str]
86 """ Main category of the place, described by a key-value pair.
89 """ All available names for the place including references, alternative
90 names and translations.
93 """ If true, then the exact area of the place is known. Without area
94 information, Nominatim has to make an educated guess if an address
95 belongs to one place or another.
98 """ If true, this place should be considered for the final address display.
99 Nominatim will sometimes include more than one candidate for
100 the address in the list when it cannot reliably determine where the
101 place belongs. It will consider names of all candidates when searching
102 but when displaying the result, only the most likely candidate should
106 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
109 """ Distance in degrees between the result place and this address part.
111 place_id: Optional[int] = None
112 """ Internal ID of the place.
114 osm_object: Optional[Tuple[str, int]] = None
115 """ OSM type and ID of the place, if such an object exists.
117 extratags: Optional[Dict[str, str]] = None
118 """ Any extra information available about the place. This is a dictionary
119 that usually contains OSM tag key-value pairs.
122 admin_level: Optional[int] = None
123 """ The administrative level of a boundary as tagged in the input data.
124 This field is only meaningful for places of the category
125 (boundary, administrative).
128 local_name: Optional[str] = None
129 """ Place holder for localization of this address part. See
130 [Localization](Result-Handling.md#localization) below.
134 class AddressLines(List[AddressLine]):
135 """ Sequence of address lines order in descending order by their rank.
138 def localize(self, locales: Locales) -> List[str]:
139 """ Set the local name of address parts according to the chosen
140 locale. Return the list of local names without duplicates.
142 Only address parts that are marked as isaddress are localized
145 label_parts: List[str] = []
148 if line.isaddress and line.names:
149 line.local_name = locales.display_name(line.names)
150 if not label_parts or label_parts[-1] != line.local_name:
151 label_parts.append(line.local_name)
156 @dataclasses.dataclass
158 """ Each entry in the list of search terms contains the
159 following detailed information.
162 """ Internal identifier for the word.
165 """ Normalised and transliterated form of the word.
166 This form is used for searching.
168 word: Optional[str] = None
169 """ Untransliterated form, if available.
173 WordInfos = Sequence[WordInfo]
176 @dataclasses.dataclass
178 """ Data class collecting information common to all
179 types of search results.
181 source_table: SourceTable
182 category: Tuple[str, str]
185 place_id: Optional[int] = None
186 osm_object: Optional[Tuple[str, int]] = None
187 parent_place_id: Optional[int] = None
188 linked_place_id: Optional[int] = None
189 admin_level: int = 15
191 locale_name: Optional[str] = None
192 display_name: Optional[str] = None
194 names: Optional[Dict[str, str]] = None
195 address: Optional[Dict[str, str]] = None
196 extratags: Optional[Dict[str, str]] = None
198 housenumber: Optional[str] = None
199 postcode: Optional[str] = None
200 wikipedia: Optional[str] = None
202 rank_address: int = 30
203 rank_search: int = 30
204 importance: Optional[float] = None
206 country_code: Optional[str] = None
208 address_rows: Optional[AddressLines] = None
209 linked_rows: Optional[AddressLines] = None
210 parented_rows: Optional[AddressLines] = None
211 name_keywords: Optional[WordInfos] = None
212 address_keywords: Optional[WordInfos] = None
214 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
217 def lat(self) -> float:
218 """ Get the latitude (or y) of the center point of the place.
220 return self.centroid[1]
223 def lon(self) -> float:
224 """ Get the longitude (or x) of the center point of the place.
226 return self.centroid[0]
228 def calculated_importance(self) -> float:
229 """ Get a valid importance value. This is either the stored importance
230 of the value or an artificial value computed from the place's
233 return self.importance or (0.40001 - (self.rank_search/75.0))
235 def localize(self, locales: Locales) -> None:
236 """ Fill the locale_name and the display_name field for the
237 place and, if available, its address information.
239 self.locale_name = locales.display_name(self.names)
240 if self.address_rows:
241 self.display_name = ', '.join(self.address_rows.localize(locales))
243 self.display_name = self.locale_name
246 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
249 @dataclasses.dataclass
250 class DetailedResult(BaseResult):
251 """ A search result with more internal information from the database
254 indexed_date: Optional[dt.datetime] = None
257 @dataclasses.dataclass
258 class ReverseResult(BaseResult):
259 """ A search result for reverse geocoding.
261 distance: Optional[float] = None
262 bbox: Optional[Bbox] = None
265 class ReverseResults(List[ReverseResult]):
266 """ Sequence of reverse lookup results ordered by distance.
267 May be empty when no result was found.
271 @dataclasses.dataclass
272 class SearchResult(BaseResult):
273 """ A search result for forward geocoding.
275 bbox: Optional[Bbox] = None
276 accuracy: float = 0.0
279 def ranking(self) -> float:
280 """ Return the ranking, a combined measure of accuracy and importance.
282 return (self.accuracy if self.accuracy is not None else 1) \
283 - self.calculated_importance()
286 class SearchResults(List[SearchResult]):
287 """ Sequence of forward lookup results ordered by relevance.
288 May be empty when no result was found.
292 def _filter_geometries(row: SaRow) -> Dict[str, str]:
293 return {k[9:]: v for k, v in row._mapping.items()
294 if k.startswith('geometry_')}
297 def create_from_placex_row(row: Optional[SaRow],
298 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
299 """ Construct a new result and add the data from the result row
300 from the placex table. 'class_type' defines the type of result
301 to return. Returns None if the row is None.
306 return class_type(source_table=SourceTable.PLACEX,
307 place_id=row.place_id,
308 osm_object=(row.osm_type, row.osm_id),
309 category=(row.class_, row.type),
310 parent_place_id=row.parent_place_id,
311 linked_place_id=getattr(row, 'linked_place_id', None),
312 admin_level=getattr(row, 'admin_level', 15),
313 names=_mingle_name_tags(row.name),
315 extratags=row.extratags,
316 housenumber=row.housenumber,
317 postcode=row.postcode,
318 wikipedia=row.wikipedia,
319 rank_address=row.rank_address,
320 rank_search=row.rank_search,
321 importance=row.importance,
322 country_code=row.country_code,
323 centroid=Point.from_wkb(row.centroid),
324 geometry=_filter_geometries(row))
327 def create_from_osmline_row(row: Optional[SaRow],
328 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
329 """ Construct a new result and add the data from the result row
330 from the address interpolation table osmline. 'class_type' defines
331 the type of result to return. Returns None if the row is None.
333 If the row contains a housenumber, then the housenumber is filled out.
334 Otherwise the result contains the interpolation information in extratags.
339 hnr = getattr(row, 'housenumber', None)
341 res = class_type(source_table=SourceTable.OSMLINE,
342 place_id=row.place_id,
343 parent_place_id=row.parent_place_id,
344 osm_object=('W', row.osm_id),
345 category=('place', 'houses' if hnr is None else 'house'),
347 postcode=row.postcode,
348 country_code=row.country_code,
349 centroid=Point.from_wkb(row.centroid),
350 geometry=_filter_geometries(row))
353 res.extratags = {'startnumber': str(row.startnumber),
354 'endnumber': str(row.endnumber),
355 'step': str(row.step)}
357 res.housenumber = str(hnr)
362 def create_from_tiger_row(row: Optional[SaRow],
363 class_type: Type[BaseResultT],
364 osm_type: Optional[str] = None,
365 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
366 """ Construct a new result and add the data from the result row
367 from the Tiger data interpolation table. 'class_type' defines
368 the type of result to return. Returns None if the row is None.
370 If the row contains a housenumber, then the housenumber is filled out.
371 Otherwise the result contains the interpolation information in extratags.
376 hnr = getattr(row, 'housenumber', None)
378 res = class_type(source_table=SourceTable.TIGER,
379 place_id=row.place_id,
380 parent_place_id=row.parent_place_id,
381 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
382 category=('place', 'houses' if hnr is None else 'house'),
383 postcode=row.postcode,
385 centroid=Point.from_wkb(row.centroid),
386 geometry=_filter_geometries(row))
389 res.extratags = {'startnumber': str(row.startnumber),
390 'endnumber': str(row.endnumber),
391 'step': str(row.step)}
393 res.housenumber = str(hnr)
398 def create_from_postcode_row(row: Optional[SaRow],
399 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
400 """ Construct a new result and add the data from the result row
401 from the postcode table. 'class_type' defines
402 the type of result to return. Returns None if the row is None.
407 return class_type(source_table=SourceTable.POSTCODE,
408 place_id=row.place_id,
409 parent_place_id=row.parent_place_id,
410 category=('place', 'postcode'),
411 names={'ref': row.postcode},
412 rank_search=row.rank_search,
413 rank_address=row.rank_address,
414 country_code=row.country_code,
415 centroid=Point.from_wkb(row.centroid),
416 geometry=_filter_geometries(row))
419 def create_from_country_row(row: Optional[SaRow],
420 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
421 """ Construct a new result and add the data from the result row
422 from the fallback country tables. 'class_type' defines
423 the type of result to return. Returns None if the row is None.
428 return class_type(source_table=SourceTable.COUNTRY,
429 category=('place', 'country'),
430 centroid=Point.from_wkb(row.centroid),
432 rank_address=4, rank_search=4,
433 country_code=row.country_code,
434 geometry=_filter_geometries(row))
437 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
438 details: LookupDetails) -> None:
439 """ Retrieve more details from the database according to the
440 parameters specified in 'details'.
443 log().section('Query details for result')
444 if details.address_details:
445 log().comment('Query address details')
446 await complete_address_details(conn, results)
447 if details.linked_places:
448 log().comment('Query linked places')
449 for result in results:
450 await complete_linked_places(conn, result)
451 if details.parented_places:
452 log().comment('Query parent places')
453 for result in results:
454 await complete_parented_places(conn, result)
456 log().comment('Query keywords')
457 for result in results:
458 await complete_keywords(conn, result)
459 for result in results:
460 result.localize(details.locales)
463 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
464 """ Create a new AddressLine from the results of a database query.
466 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
467 if 'linked_place' in extratags:
468 extratags['place'] = extratags['linked_place']
470 names = _mingle_name_tags(row.name) or {}
471 if getattr(row, 'housenumber', None) is not None:
472 names['housenumber'] = row.housenumber
474 if isaddress is None:
475 isaddress = getattr(row, 'isaddress', True)
477 return AddressLine(place_id=row.place_id,
478 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
479 category=(getattr(row, 'class'), row.type),
482 admin_level=row.admin_level,
483 fromarea=row.fromarea,
485 rank_address=row.rank_address,
486 distance=row.distance)
489 def _get_address_lookup_id(result: BaseResultT) -> int:
490 assert result.place_id
491 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
492 return result.parent_place_id or result.place_id
494 return result.linked_place_id or result.place_id
497 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
498 assert result.address_rows is not None
499 if result.category[0] not in ('boundary', 'place')\
500 or result.category[1] not in ('postal_code', 'postcode'):
501 postcode = result.postcode
502 if not postcode and result.address:
503 postcode = result.address.get('postcode')
504 if postcode and ',' not in postcode and ';' not in postcode:
505 result.address_rows.append(AddressLine(
506 category=('place', 'postcode'),
507 names={'ref': postcode},
508 fromarea=False, isaddress=True, rank_address=5,
510 if result.country_code:
511 async def _get_country_names() -> Optional[Dict[str, str]]:
512 t = conn.t.country_name
513 sql = sa.select(t.c.name, t.c.derived_name)\
514 .where(t.c.country_code == result.country_code)
515 for cres in await conn.execute(sql):
516 names = cast(Dict[str, str], cres[0])
518 names.update(cast(Dict[str, str], cres[1]))
522 country_names = await conn.get_cached_value('COUNTRY_NAME',
526 result.address_rows.append(AddressLine(
527 category=('place', 'country'),
529 fromarea=False, isaddress=True, rank_address=4,
531 result.address_rows.append(AddressLine(
532 category=('place', 'country_code'),
533 names={'ref': result.country_code}, extratags={},
534 fromarea=True, isaddress=False, rank_address=4,
538 def _setup_address_details(result: BaseResultT) -> None:
539 """ Retrieve information about places that make up the address of the result.
541 result.address_rows = AddressLines()
543 result.address_rows.append(AddressLine(
544 place_id=result.place_id,
545 osm_object=result.osm_object,
546 category=result.category,
548 extratags=result.extratags or {},
549 admin_level=result.admin_level,
550 fromarea=True, isaddress=True,
551 rank_address=result.rank_address, distance=0.0))
552 if result.source_table == SourceTable.PLACEX and result.address:
553 housenumber = result.address.get('housenumber')\
554 or result.address.get('streetnumber')\
555 or result.address.get('conscriptionnumber')
556 elif result.housenumber:
557 housenumber = result.housenumber
561 result.address_rows.append(AddressLine(
562 category=('place', 'house_number'),
563 names={'ref': housenumber},
564 fromarea=True, isaddress=True, rank_address=28, distance=0))
565 if result.address and '_unlisted_place' in result.address:
566 result.address_rows.append(AddressLine(
567 category=('place', 'locality'),
568 names={'name': result.address['_unlisted_place']},
569 fromarea=False, isaddress=True, rank_address=25, distance=0))
572 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
573 """ Retrieve information about places that make up the address of the result.
575 for result in results:
576 _setup_address_details(result)
578 # Lookup entries from place_address line
580 lookup_ids = [{'pid': r.place_id,
581 'lid': _get_address_lookup_id(r),
582 'names': list(r.address.values()) if r.address else [],
583 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else ''}
584 for r in results if r.place_id]
589 ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
590 .table_valued(sa.column('value', type_=sa.JSON))
593 taddr = conn.t.addressline
595 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
596 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
597 t.c.class_, t.c.type, t.c.extratags,
598 t.c.admin_level, taddr.c.fromarea,
599 sa.case((t.c.rank_address == 11, 5),
600 else_=t.c.rank_address).label('rank_address'),
601 taddr.c.distance, t.c.country_code, t.c.postcode)\
602 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
603 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
604 .join(t, taddr.c.address_place_id == t.c.place_id)\
605 .order_by('src_place_id')\
606 .order_by(sa.column('rank_address').desc())\
607 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
608 .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
609 (taddr.c.isaddress, 0),
610 (sa.and_(taddr.c.fromarea,
611 t.c.geometry.ST_Contains(
612 sa.func.ST_GeomFromEWKT(
613 ltab.c.value['c'].as_string()))), 1),
615 .order_by(taddr.c.fromarea.desc())\
616 .order_by(taddr.c.distance.desc())\
617 .order_by(t.c.rank_search.desc())
619 current_result = None
620 current_rank_address = -1
621 for row in await conn.execute(sql):
622 if current_result is None or row.src_place_id != current_result.place_id:
623 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
624 assert current_result is not None
625 current_rank_address = -1
627 location_isaddress = row.rank_address != current_rank_address
629 if current_result.country_code is None and row.country_code:
630 current_result.country_code = row.country_code
632 if row.type in ('postcode', 'postal_code') and location_isaddress:
633 if not row.fromarea or \
634 (current_result.address and 'postcode' in current_result.address):
635 location_isaddress = False
637 current_result.postcode = None
639 assert current_result.address_rows is not None
640 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
641 current_rank_address = row.rank_address
643 for result in results:
644 await _finalize_entry(conn, result)
646 # Finally add the record for the parent entry where necessary.
648 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
649 if parent_lookup_ids:
650 ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
651 .table_valued(sa.column('value', type_=sa.JSON))
652 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
653 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
654 t.c.class_, t.c.type, t.c.extratags,
657 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
659 for row in await conn.execute(sql):
660 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
661 assert current_result is not None
662 assert current_result.address_rows is not None
664 current_result.address_rows.append(AddressLine(
665 place_id=row.place_id,
666 osm_object=(row.osm_type, row.osm_id),
667 category=(row.class_, row.type),
668 names=row.name, extratags=row.extratags or {},
669 admin_level=row.admin_level,
670 fromarea=True, isaddress=True,
671 rank_address=row.rank_address, distance=0.0))
673 # Now sort everything
674 def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
675 return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
677 for result in results:
678 assert result.address_rows is not None
679 result.address_rows.sort(key=mk_sort_key(result.place_id))
682 def _placex_select_address_row(conn: SearchConnection,
683 centroid: Point) -> SaSelect:
685 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
686 t.c.class_.label('class'), t.c.type,
687 t.c.admin_level, t.c.housenumber,
688 t.c.geometry.is_area().label('fromarea'),
690 t.c.geometry.distance_spheroid(
691 sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
694 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
695 """ Retrieve information about places that link to the result.
697 result.linked_rows = AddressLines()
698 if result.source_table != SourceTable.PLACEX:
701 sql = _placex_select_address_row(conn, result.centroid)\
702 .where(conn.t.placex.c.linked_place_id == result.place_id)
704 for row in await conn.execute(sql):
705 result.linked_rows.append(_result_row_to_address_row(row))
708 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
709 """ Retrieve information about the search terms used for this place.
711 Requires that the query analyzer was initialised to get access to
714 t = conn.t.search_name
715 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
716 .where(t.c.place_id == result.place_id)
718 result.name_keywords = []
719 result.address_keywords = []
721 t = conn.t.meta.tables['word']
722 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
724 for name_tokens, address_tokens in await conn.execute(sql):
725 for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
726 result.name_keywords.append(WordInfo(*row))
728 for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
729 result.address_keywords.append(WordInfo(*row))
732 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
733 """ Retrieve information about places that the result provides the
736 result.parented_rows = AddressLines()
737 if result.source_table != SourceTable.PLACEX:
740 sql = _placex_select_address_row(conn, result.centroid)\
741 .where(conn.t.placex.c.parent_place_id == result.place_id)\
742 .where(conn.t.placex.c.rank_search == 30)
744 for row in await conn.execute(sql):
745 result.parented_rows.append(_result_row_to_address_row(row))