1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2023 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Dataclasses for search results and helper functions to fill them.
10 Data classes are part of the public API while the functions are for
11 internal use only. That's why they are implemented as free-standing functions
12 instead of member functions.
14 from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast
19 import sqlalchemy as sa
21 from nominatim.typing import SaSelect, SaRow
22 from nominatim.db.sqlalchemy_functions import CrosscheckNames
23 from nominatim.api.types import Point, Bbox, LookupDetails
24 from nominatim.api.connection import SearchConnection
25 from nominatim.api.logging import log
26 from nominatim.api.localization import Locales
28 # This file defines complex result data classes.
29 # pylint: disable=too-many-instance-attributes
31 def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
32 """ Mix-in names from linked places, so that they show up
33 as standard names where necessary.
39 for k, v in names.items():
40 if k.startswith('_place_'):
42 out[k if outkey in names else outkey] = v
49 class SourceTable(enum.Enum):
50 """ The `SourceTable` type lists the possible sources a result can have.
53 """ The placex table is the main source for result usually containing
57 """ The osmline table contains address interpolations from OSM data.
58 Interpolation addresses are always approximate. The OSM id in the
59 result refers to the OSM way with the interpolation line object.
62 """ TIGER address data contains US addresses imported on the side,
63 see [Installing TIGER data](../customize/Tiger.md).
64 TIGER address are also interpolations. The addresses always refer
65 to a street from OSM data. The OSM id in the result refers to
69 """ The postcode table contains artificial centroids for postcodes,
70 computed from the postcodes available with address points. Results
71 are always approximate.
74 """ The country table provides a fallback, when country data is missing
79 @dataclasses.dataclass
81 """ The `AddressLine` may contain the following fields about a related place
82 and its function as an address object. Most fields are optional.
83 Their presence depends on the kind and function of the address part.
85 category: Tuple[str, str]
86 """ Main category of the place, described by a key-value pair.
89 """ All available names for the place including references, alternative
90 names and translations.
93 """ If true, then the exact area of the place is known. Without area
94 information, Nominatim has to make an educated guess if an address
95 belongs to one place or another.
98 """ If true, this place should be considered for the final address display.
99 Nominatim will sometimes include more than one candidate for
100 the address in the list when it cannot reliably determine where the
101 place belongs. It will consider names of all candidates when searching
102 but when displaying the result, only the most likely candidate should
106 """ [Address rank](../customize/Ranking.md#address-rank) of the place.
109 """ Distance in degrees between the result place and this address part.
111 place_id: Optional[int] = None
112 """ Internal ID of the place.
114 osm_object: Optional[Tuple[str, int]] = None
115 """ OSM type and ID of the place, if such an object exists.
117 extratags: Optional[Dict[str, str]] = None
118 """ Any extra information available about the place. This is a dictionary
119 that usually contains OSM tag key-value pairs.
122 admin_level: Optional[int] = None
123 """ The administrative level of a boundary as tagged in the input data.
124 This field is only meaningful for places of the category
125 (boundary, administrative).
128 local_name: Optional[str] = None
129 """ Place holder for localization of this address part. See
130 [Localization](#localization) below.
134 class AddressLines(List[AddressLine]):
135 """ Sequence of address lines order in descending order by their rank.
138 def localize(self, locales: Locales) -> List[str]:
139 """ Set the local name of address parts according to the chosen
140 locale. Return the list of local names without duplicates.
142 Only address parts that are marked as isaddress are localized
145 label_parts: List[str] = []
148 if line.isaddress and line.names:
149 line.local_name = locales.display_name(line.names)
150 if not label_parts or label_parts[-1] != line.local_name:
151 label_parts.append(line.local_name)
157 @dataclasses.dataclass
159 """ Each entry in the list of search terms contains the
160 following detailed information.
163 """ Internal identifier for the word.
166 """ Normalised and transliterated form of the word.
167 This form is used for searching.
169 word: Optional[str] = None
170 """ Untransliterated form, if available.
174 WordInfos = Sequence[WordInfo]
177 @dataclasses.dataclass
179 """ Data class collecting information common to all
180 types of search results.
182 source_table: SourceTable
183 category: Tuple[str, str]
186 place_id : Optional[int] = None
187 osm_object: Optional[Tuple[str, int]] = None
188 parent_place_id: Optional[int] = None
189 linked_place_id: Optional[int] = None
190 admin_level: int = 15
192 locale_name: Optional[str] = None
193 display_name: Optional[str] = None
195 names: Optional[Dict[str, str]] = None
196 address: Optional[Dict[str, str]] = None
197 extratags: Optional[Dict[str, str]] = None
199 housenumber: Optional[str] = None
200 postcode: Optional[str] = None
201 wikipedia: Optional[str] = None
203 rank_address: int = 30
204 rank_search: int = 30
205 importance: Optional[float] = None
207 country_code: Optional[str] = None
209 address_rows: Optional[AddressLines] = None
210 linked_rows: Optional[AddressLines] = None
211 parented_rows: Optional[AddressLines] = None
212 name_keywords: Optional[WordInfos] = None
213 address_keywords: Optional[WordInfos] = None
215 geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
218 def lat(self) -> float:
219 """ Get the latitude (or y) of the center point of the place.
221 return self.centroid[1]
225 def lon(self) -> float:
226 """ Get the longitude (or x) of the center point of the place.
228 return self.centroid[0]
231 def calculated_importance(self) -> float:
232 """ Get a valid importance value. This is either the stored importance
233 of the value or an artificial value computed from the place's
236 return self.importance or (0.7500001 - (self.rank_search/40.0))
239 def localize(self, locales: Locales) -> None:
240 """ Fill the locale_name and the display_name field for the
241 place and, if available, its address information.
243 self.locale_name = locales.display_name(self.names)
244 if self.address_rows:
245 self.display_name = ', '.join(self.address_rows.localize(locales))
247 self.display_name = self.locale_name
251 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
253 @dataclasses.dataclass
254 class DetailedResult(BaseResult):
255 """ A search result with more internal information from the database
258 indexed_date: Optional[dt.datetime] = None
261 @dataclasses.dataclass
262 class ReverseResult(BaseResult):
263 """ A search result for reverse geocoding.
265 distance: Optional[float] = None
266 bbox: Optional[Bbox] = None
269 class ReverseResults(List[ReverseResult]):
270 """ Sequence of reverse lookup results ordered by distance.
271 May be empty when no result was found.
275 @dataclasses.dataclass
276 class SearchResult(BaseResult):
277 """ A search result for forward geocoding.
279 bbox: Optional[Bbox] = None
280 accuracy: float = 0.0
284 def ranking(self) -> float:
285 """ Return the ranking, a combined measure of accuracy and importance.
287 return (self.accuracy if self.accuracy is not None else 1) \
288 - self.calculated_importance()
291 class SearchResults(List[SearchResult]):
292 """ Sequence of forward lookup results ordered by relevance.
293 May be empty when no result was found.
297 def _filter_geometries(row: SaRow) -> Dict[str, str]:
298 return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
299 if k.startswith('geometry_')}
302 def create_from_placex_row(row: Optional[SaRow],
303 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
304 """ Construct a new result and add the data from the result row
305 from the placex table. 'class_type' defines the type of result
306 to return. Returns None if the row is None.
311 return class_type(source_table=SourceTable.PLACEX,
312 place_id=row.place_id,
313 osm_object=(row.osm_type, row.osm_id),
314 category=(row.class_, row.type),
315 parent_place_id = row.parent_place_id,
316 linked_place_id = getattr(row, 'linked_place_id', None),
317 admin_level = getattr(row, 'admin_level', 15),
318 names=_mingle_name_tags(row.name),
320 extratags=row.extratags,
321 housenumber=row.housenumber,
322 postcode=row.postcode,
323 wikipedia=row.wikipedia,
324 rank_address=row.rank_address,
325 rank_search=row.rank_search,
326 importance=row.importance,
327 country_code=row.country_code,
328 centroid=Point.from_wkb(row.centroid),
329 geometry=_filter_geometries(row))
332 def create_from_osmline_row(row: Optional[SaRow],
333 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
334 """ Construct a new result and add the data from the result row
335 from the address interpolation table osmline. 'class_type' defines
336 the type of result to return. Returns None if the row is None.
338 If the row contains a housenumber, then the housenumber is filled out.
339 Otherwise the result contains the interpolation information in extratags.
344 hnr = getattr(row, 'housenumber', None)
346 res = class_type(source_table=SourceTable.OSMLINE,
347 place_id=row.place_id,
348 parent_place_id = row.parent_place_id,
349 osm_object=('W', row.osm_id),
350 category=('place', 'houses' if hnr is None else 'house'),
352 postcode=row.postcode,
353 country_code=row.country_code,
354 centroid=Point.from_wkb(row.centroid),
355 geometry=_filter_geometries(row))
358 res.extratags = {'startnumber': str(row.startnumber),
359 'endnumber': str(row.endnumber),
360 'step': str(row.step)}
362 res.housenumber = str(hnr)
367 def create_from_tiger_row(row: Optional[SaRow],
368 class_type: Type[BaseResultT],
369 osm_type: Optional[str] = None,
370 osm_id: Optional[int] = None) -> Optional[BaseResultT]:
371 """ Construct a new result and add the data from the result row
372 from the Tiger data interpolation table. 'class_type' defines
373 the type of result to return. Returns None if the row is None.
375 If the row contains a housenumber, then the housenumber is filled out.
376 Otherwise the result contains the interpolation information in extratags.
381 hnr = getattr(row, 'housenumber', None)
383 res = class_type(source_table=SourceTable.TIGER,
384 place_id=row.place_id,
385 parent_place_id = row.parent_place_id,
386 osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
387 category=('place', 'houses' if hnr is None else 'house'),
388 postcode=row.postcode,
390 centroid=Point.from_wkb(row.centroid),
391 geometry=_filter_geometries(row))
394 res.extratags = {'startnumber': str(row.startnumber),
395 'endnumber': str(row.endnumber),
396 'step': str(row.step)}
398 res.housenumber = str(hnr)
403 def create_from_postcode_row(row: Optional[SaRow],
404 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
405 """ Construct a new result and add the data from the result row
406 from the postcode table. 'class_type' defines
407 the type of result to return. Returns None if the row is None.
412 return class_type(source_table=SourceTable.POSTCODE,
413 place_id=row.place_id,
414 parent_place_id = row.parent_place_id,
415 category=('place', 'postcode'),
416 names={'ref': row.postcode},
417 rank_search=row.rank_search,
418 rank_address=row.rank_address,
419 country_code=row.country_code,
420 centroid=Point.from_wkb(row.centroid),
421 geometry=_filter_geometries(row))
424 def create_from_country_row(row: Optional[SaRow],
425 class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
426 """ Construct a new result and add the data from the result row
427 from the fallback country tables. 'class_type' defines
428 the type of result to return. Returns None if the row is None.
433 return class_type(source_table=SourceTable.COUNTRY,
434 category=('place', 'country'),
435 centroid=Point.from_wkb(row.centroid),
437 rank_address=4, rank_search=4,
438 country_code=row.country_code,
439 geometry=_filter_geometries(row))
442 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
443 details: LookupDetails) -> None:
444 """ Retrieve more details from the database according to the
445 parameters specified in 'details'.
448 log().section('Query details for result')
449 if details.address_details:
450 log().comment('Query address details')
451 await complete_address_details(conn, results)
452 if details.linked_places:
453 log().comment('Query linked places')
454 for result in results:
455 await complete_linked_places(conn, result)
456 if details.parented_places:
457 log().comment('Query parent places')
458 for result in results:
459 await complete_parented_places(conn, result)
461 log().comment('Query keywords')
462 for result in results:
463 await complete_keywords(conn, result)
464 for result in results:
465 result.localize(details.locales)
468 def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
469 """ Create a new AddressLine from the results of a datbase query.
471 extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
472 if 'linked_place' in extratags:
473 extratags['place'] = extratags['linked_place']
475 names = _mingle_name_tags(row.name) or {}
476 if getattr(row, 'housenumber', None) is not None:
477 names['housenumber'] = row.housenumber
479 if isaddress is None:
480 isaddress = getattr(row, 'isaddress', True)
482 return AddressLine(place_id=row.place_id,
483 osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
484 category=(getattr(row, 'class'), row.type),
487 admin_level=row.admin_level,
488 fromarea=row.fromarea,
490 rank_address=row.rank_address,
491 distance=row.distance)
494 def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
497 for result in results:
500 if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
501 if result.housenumber is not None:
502 housenumber = int(result.housenumber)
503 elif result.extratags is not None and 'startnumber' in result.extratags:
504 # details requests do not come with a specific house number
505 housenumber = int(result.extratags['startnumber'])
506 places.append(result.place_id)
507 hnrs.append(housenumber)
512 def _get_address_lookup_id(result: BaseResultT) -> int:
513 assert result.place_id
514 if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
515 return result.parent_place_id or result.place_id
517 return result.linked_place_id or result.place_id
520 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
521 assert result.address_rows
522 postcode = result.postcode
523 if not postcode and result.address:
524 postcode = result.address.get('postcode')
525 if postcode and ',' not in postcode and ';' not in postcode:
526 result.address_rows.append(AddressLine(
527 category=('place', 'postcode'),
528 names={'ref': postcode},
529 fromarea=False, isaddress=True, rank_address=5,
531 if result.country_code:
532 async def _get_country_names() -> Optional[Dict[str, str]]:
533 t = conn.t.country_name
534 sql = sa.select(t.c.name, t.c.derived_name)\
535 .where(t.c.country_code == result.country_code)
536 for cres in await conn.execute(sql):
537 names = cast(Dict[str, str], cres[0])
539 names.update(cast(Dict[str, str], cres[1]))
543 country_names = await conn.get_cached_value('COUNTRY_NAME',
547 result.address_rows.append(AddressLine(
548 category=('place', 'country'),
550 fromarea=False, isaddress=True, rank_address=4,
552 result.address_rows.append(AddressLine(
553 category=('place', 'country_code'),
554 names={'ref': result.country_code}, extratags = {},
555 fromarea=True, isaddress=False, rank_address=4,
559 def _setup_address_details(result: BaseResultT) -> None:
560 """ Retrieve information about places that make up the address of the result.
562 result.address_rows = AddressLines()
564 result.address_rows.append(AddressLine(
565 place_id=result.place_id,
566 osm_object=result.osm_object,
567 category=result.category,
569 extratags=result.extratags or {},
570 admin_level=result.admin_level,
571 fromarea=True, isaddress=True,
572 rank_address=result.rank_address, distance=0.0))
573 if result.source_table == SourceTable.PLACEX and result.address:
574 housenumber = result.address.get('housenumber')\
575 or result.address.get('streetnumber')\
576 or result.address.get('conscriptionnumber')
577 elif result.housenumber:
578 housenumber = result.housenumber
582 result.address_rows.append(AddressLine(
583 category=('place', 'house_number'),
584 names={'ref': housenumber},
585 fromarea=True, isaddress=True, rank_address=28, distance=0))
586 if result.address and '_unlisted_place' in result.address:
587 result.address_rows.append(AddressLine(
588 category=('place', 'locality'),
589 names={'name': result.address['_unlisted_place']},
590 fromarea=False, isaddress=True, rank_address=25, distance=0))
593 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
594 """ Retrieve information about places that make up the address of the result.
596 for result in results:
597 _setup_address_details(result)
599 ### Lookup entries from place_address line
601 lookup_ids = [{'pid': r.place_id,
602 'lid': _get_address_lookup_id(r),
603 'names': list(r.address.values()) if r.address else [],
604 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
605 for r in results if r.place_id]
610 ltab = sa.func.json_array_elements(sa.type_coerce(lookup_ids, sa.JSON))\
611 .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
614 taddr = conn.t.addressline
616 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
617 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
618 t.c.class_, t.c.type, t.c.extratags,
619 t.c.admin_level, taddr.c.fromarea,
620 sa.case((t.c.rank_address == 11, 5),
621 else_=t.c.rank_address).label('rank_address'),
622 taddr.c.distance, t.c.country_code, t.c.postcode)\
623 .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
624 taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
625 .join(t, taddr.c.address_place_id == t.c.place_id)\
626 .order_by('src_place_id')\
627 .order_by(sa.column('rank_address').desc())\
628 .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
629 .order_by(sa.case((CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
630 (taddr.c.isaddress, 0),
631 (sa.and_(taddr.c.fromarea,
632 t.c.geometry.ST_Contains(
633 sa.func.ST_GeomFromEWKT(
634 ltab.c.value['c'].as_string()))), 1),
636 .order_by(taddr.c.fromarea.desc())\
637 .order_by(taddr.c.distance.desc())\
638 .order_by(t.c.rank_search.desc())
641 current_result = None
642 current_rank_address = -1
643 for row in await conn.execute(sql):
644 if current_result is None or row.src_place_id != current_result.place_id:
645 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
646 assert current_result is not None
647 current_rank_address = -1
649 location_isaddress = row.rank_address != current_rank_address
651 if current_result.country_code is None and row.country_code:
652 current_result.country_code = row.country_code
654 if row.type in ('postcode', 'postal_code') and location_isaddress:
655 if not row.fromarea or \
656 (current_result.address and 'postcode' in current_result.address):
657 location_isaddress = False
659 current_result.postcode = None
661 assert current_result.address_rows is not None
662 current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
663 current_rank_address = row.rank_address
665 for result in results:
666 await _finalize_entry(conn, result)
669 ### Finally add the record for the parent entry where necessary.
671 parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
672 if parent_lookup_ids:
673 ltab = sa.func.json_array_elements(sa.type_coerce(parent_lookup_ids, sa.JSON))\
674 .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
675 sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
676 t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
677 t.c.class_, t.c.type, t.c.extratags,
680 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
682 for row in await conn.execute(sql):
683 current_result = next((r for r in results if r.place_id == row.src_place_id), None)
684 assert current_result is not None
685 assert current_result.address_rows is not None
687 current_result.address_rows.append(AddressLine(
688 place_id=row.place_id,
689 osm_object=(row.osm_type, row.osm_id),
690 category=(row.class_, row.type),
691 names=row.name, extratags=row.extratags or {},
692 admin_level=row.admin_level,
693 fromarea=True, isaddress=True,
694 rank_address=row.rank_address, distance=0.0))
696 ### Now sort everything
697 for result in results:
698 assert result.address_rows is not None
699 result.address_rows.sort(key=lambda a: (-a.rank_address, a.isaddress))
702 def _placex_select_address_row(conn: SearchConnection,
703 centroid: Point) -> SaSelect:
705 return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
706 t.c.class_.label('class'), t.c.type,
707 t.c.admin_level, t.c.housenumber,
708 sa.literal_column("""ST_GeometryType(geometry) in
709 ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
712 f"""ST_DistanceSpheroid(geometry,
713 'SRID=4326;{centroid.to_wkt()}'::geometry,
714 'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
715 """).label('distance'))
718 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
719 """ Retrieve information about places that link to the result.
721 result.linked_rows = AddressLines()
722 if result.source_table != SourceTable.PLACEX:
725 sql = _placex_select_address_row(conn, result.centroid)\
726 .where(conn.t.placex.c.linked_place_id == result.place_id)
728 for row in await conn.execute(sql):
729 result.linked_rows.append(_result_row_to_address_row(row))
732 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
733 """ Retrieve information about the search terms used for this place.
735 Requires that the query analyzer was initialised to get access to
738 t = conn.t.search_name
739 sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
740 .where(t.c.place_id == result.place_id)
742 result.name_keywords = []
743 result.address_keywords = []
745 t = conn.t.meta.tables['word']
746 sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
748 for name_tokens, address_tokens in await conn.execute(sql):
749 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
750 result.name_keywords.append(WordInfo(*row))
752 for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
753 result.address_keywords.append(WordInfo(*row))
756 async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
757 """ Retrieve information about places that the result provides the
760 result.parented_rows = AddressLines()
761 if result.source_table != SourceTable.PLACEX:
764 sql = _placex_select_address_row(conn, result.centroid)\
765 .where(conn.t.placex.c.parent_place_id == result.place_id)\
766 .where(conn.t.placex.c.rank_search == 30)
768 for row in await conn.execute(sql):
769 result.parented_rows.append(_result_row_to_address_row(row))