X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/70f6f9a711727150532a9b958f279435901d4805..e1303fb5921a81947e55243a78b93be5f4ebbe37:/nominatim/api/results.py diff --git a/nominatim/api/results.py b/nominatim/api/results.py index 7839859f..0183f5b9 100644 --- a/nominatim/api/results.py +++ b/nominatim/api/results.py @@ -11,75 +11,182 @@ Data classes are part of the public API while the functions are for internal use only. That's why they are implemented as free-standing functions instead of member functions. """ -from typing import Optional, Tuple, Dict, Sequence +from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union import enum import dataclasses import datetime as dt import sqlalchemy as sa -from nominatim.typing import SaSelect, SaRow -from nominatim.api.types import Point, LookupDetails +from nominatim.typing import SaSelect, SaRow, SaColumn +from nominatim.api.types import Point, Bbox, LookupDetails from nominatim.api.connection import SearchConnection +from nominatim.api.logging import log +from nominatim.api.localization import Locales # This file defines complex result data classes. # pylint: disable=too-many-instance-attributes +def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]: + """ Mix-in names from linked places, so that they show up + as standard names where necessary. + """ + if not names: + return None + + out = {} + for k, v in names.items(): + if k.startswith('_place_'): + outkey = k[7:] + out[k if outkey in names else outkey] = v + else: + out[k] = v + + return out + + class SourceTable(enum.Enum): - """ Enumeration of kinds of results. + """ The `SourceTable` type lists the possible sources a result can have. """ PLACEX = 1 + """ The placex table is the main source for result usually containing + OSM data. + """ OSMLINE = 2 + """ The osmline table contains address interpolations from OSM data. + Interpolation addresses are always approximate. The OSM id in the + result refers to the OSM way with the interpolation line object. + """ TIGER = 3 + """ TIGER address data contains US addresses imported on the side, + see [Installing TIGER data](../customize/Tiger.md). + TIGER address are also interpolations. The addresses always refer + to a street from OSM data. The OSM id in the result refers to + that street. + """ POSTCODE = 4 + """ The postcode table contains artificial centroids for postcodes, + computed from the postcodes available with address points. Results + are always approximate. + """ COUNTRY = 5 + """ The country table provides a fallback, when country data is missing + in the OSM data. + """ @dataclasses.dataclass class AddressLine: - """ Detailed information about a related place. + """ The `AddressLine` may contain the following fields about a related place + and its function as an address object. Most fields are optional. + Their presence depends on the kind and function of the address part. """ place_id: Optional[int] + """ Internal ID of the place. + """ osm_object: Optional[Tuple[str, int]] + """ OSM type and ID of the place, if such an object exists. + """ category: Tuple[str, str] + """ Main category of the place, described by a key-value pair. + """ names: Dict[str, str] + """ All available names for the place including references, alternative + names and translations. + """ extratags: Optional[Dict[str, str]] + """ Any extra information available about the place. This is a dictionary + that usually contains OSM tag key-value pairs. + """ admin_level: Optional[int] + """ The administrative level of a boundary as tagged in the input data. + This field is only meaningful for places of the category + (boundary, administrative). + """ fromarea: bool + """ If true, then the exact area of the place is known. Without area + information, Nominatim has to make an educated guess if an address + belongs to one place or another. + """ isaddress: bool + """ If true, this place should be considered for the final address display. + Nominatim will sometimes include more than one candidate for + the address in the list when it cannot reliably determine where the + place belongs. It will consider names of all candidates when searching + but when displaying the result, only the most likely candidate should + be shown. + """ rank_address: int + """ [Address rank](../customize/Ranking.md#address-rank) of the place. + """ distance: float + """ Distance in degrees between the result place and this address part. + """ + + local_name: Optional[str] = None + """ Place holder for localization of this address part. See + [Localization](#localization) below. + """ -AddressLines = Sequence[AddressLine] +class AddressLines(List[AddressLine]): + """ Sequence of address lines order in descending order by their rank. + """ + + def localize(self, locales: Locales) -> List[str]: + """ Set the local name of address parts according to the chosen + locale. Return the list of local names without duplicates. + + Only address parts that are marked as isaddress are localized + and returned. + """ + label_parts: List[str] = [] + + for line in self: + if line.isaddress and line.names: + line.local_name = locales.display_name(line.names) + if not label_parts or label_parts[-1] != line.local_name: + label_parts.append(line.local_name) + + return label_parts + @dataclasses.dataclass class WordInfo: - """ Detailed information about a search term. + """ Each entry in the list of search terms contains the + following detailed information. """ word_id: int + """ Internal identifier for the word. + """ word_token: str + """ Normalised and transliterated form of the word. + This form is used for searching. + """ word: Optional[str] = None + """ Untransliterated form, if available. + """ WordInfos = Sequence[WordInfo] @dataclasses.dataclass -class SearchResult: - """ Data class collecting all available information about a search result. +class BaseResult: + """ Data class collecting information common to all + types of search results. """ source_table: SourceTable category: Tuple[str, str] centroid: Point place_id : Optional[int] = None - parent_place_id: Optional[int] = None - linked_place_id: Optional[int] = None osm_object: Optional[Tuple[str, int]] = None - admin_level: int = 15 + + locale_name: Optional[str] = None + display_name: Optional[str] = None names: Optional[Dict[str, str]] = None address: Optional[Dict[str, str]] = None @@ -95,8 +202,6 @@ class SearchResult: country_code: Optional[str] = None - indexed_date: Optional[dt.datetime] = None - address_rows: Optional[AddressLines] = None linked_rows: Optional[AddressLines] = None parented_rows: Optional[AddressLines] = None @@ -105,7 +210,6 @@ class SearchResult: geometry: Dict[str, str] = dataclasses.field(default_factory=dict) - @property def lat(self) -> float: """ Get the latitude (or y) of the center point of the place. @@ -128,11 +232,71 @@ class SearchResult: return self.importance or (0.7500001 - (self.rank_search/40.0)) - # pylint: disable=consider-using-f-string - def centroid_as_geojson(self) -> str: - """ Get the centroid in GeoJSON format. + def localize(self, locales: Locales) -> None: + """ Fill the locale_name and the display_name field for the + place and, if available, its address information. + """ + self.locale_name = locales.display_name(self.names) + if self.address_rows: + self.display_name = ', '.join(self.address_rows.localize(locales)) + else: + self.display_name = self.locale_name + + + +BaseResultT = TypeVar('BaseResultT', bound=BaseResult) + +@dataclasses.dataclass +class DetailedResult(BaseResult): + """ A search result with more internal information from the database + added. + """ + parent_place_id: Optional[int] = None + linked_place_id: Optional[int] = None + admin_level: int = 15 + indexed_date: Optional[dt.datetime] = None + + +@dataclasses.dataclass +class ReverseResult(BaseResult): + """ A search result for reverse geocoding. + """ + distance: Optional[float] = None + bbox: Optional[Bbox] = None + + +class ReverseResults(List[ReverseResult]): + """ Sequence of reverse lookup results ordered by distance. + May be empty when no result was found. + """ + + +@dataclasses.dataclass +class SearchResult(BaseResult): + """ A search result for forward geocoding. + """ + bbox: Optional[Bbox] = None + accuracy: float = 0.0 + + + @property + def ranking(self) -> float: + """ Return the ranking, a combined measure of accuracy and importance. + """ + return (self.accuracy if self.accuracy is not None else 1) \ + - self.calculated_importance() + + +class SearchResults(List[SearchResult]): + """ Sequence of forward lookup results ordered by relevance. + May be empty when no result was found. + """ + + def localize(self, locales: Locales) -> None: + """ Apply the given locales to all results. """ - return '{"type": "Point","coordinates": [%f, %f]}' % self.centroid + for result in self: + result.localize(locales) def _filter_geometries(row: SaRow) -> Dict[str, str]: @@ -140,95 +304,172 @@ def _filter_geometries(row: SaRow) -> Dict[str, str]: if k.startswith('geometry_')} -def create_from_placex_row(row: SaRow) -> SearchResult: - """ Construct a new SearchResult and add the data from the result row - from the placex table. - """ - return SearchResult(source_table=SourceTable.PLACEX, - place_id=row.place_id, - parent_place_id=row.parent_place_id, - linked_place_id=row.linked_place_id, - osm_object=(row.osm_type, row.osm_id), - category=(row.class_, row.type), - admin_level=row.admin_level, - names=row.name, - address=row.address, - extratags=row.extratags, - housenumber=row.housenumber, - postcode=row.postcode, - wikipedia=row.wikipedia, - rank_address=row.rank_address, - rank_search=row.rank_search, - importance=row.importance, - country_code=row.country_code, - indexed_date=getattr(row, 'indexed_date'), - centroid=Point(row.x, row.y), - geometry=_filter_geometries(row)) - - -def create_from_osmline_row(row: SaRow) -> SearchResult: - """ Construct a new SearchResult and add the data from the result row - from the osmline table. - """ - return SearchResult(source_table=SourceTable.OSMLINE, - place_id=row.place_id, - parent_place_id=row.parent_place_id, - osm_object=('W', row.osm_id), - category=('place', 'houses'), - address=row.address, - postcode=row.postcode, - extratags={'startnumber': str(row.startnumber), - 'endnumber': str(row.endnumber), - 'step': str(row.step)}, - country_code=row.country_code, - indexed_date=getattr(row, 'indexed_date'), - centroid=Point(row.x, row.y), - geometry=_filter_geometries(row)) - - -def create_from_tiger_row(row: SaRow) -> SearchResult: - """ Construct a new SearchResult and add the data from the result row - from the Tiger table. - """ - return SearchResult(source_table=SourceTable.TIGER, - place_id=row.place_id, - parent_place_id=row.parent_place_id, - category=('place', 'houses'), - postcode=row.postcode, - extratags={'startnumber': str(row.startnumber), - 'endnumber': str(row.endnumber), - 'step': str(row.step)}, - country_code='us', - centroid=Point(row.x, row.y), - geometry=_filter_geometries(row)) - - -async def add_result_details(conn: SearchConnection, result: SearchResult, +def create_from_placex_row(row: Optional[SaRow], + class_type: Type[BaseResultT]) -> Optional[BaseResultT]: + """ Construct a new result and add the data from the result row + from the placex table. 'class_type' defines the type of result + to return. Returns None if the row is None. + """ + if row is None: + return None + + return class_type(source_table=SourceTable.PLACEX, + place_id=row.place_id, + osm_object=(row.osm_type, row.osm_id), + category=(row.class_, row.type), + names=_mingle_name_tags(row.name), + address=row.address, + extratags=row.extratags, + housenumber=row.housenumber, + postcode=row.postcode, + wikipedia=row.wikipedia, + rank_address=row.rank_address, + rank_search=row.rank_search, + importance=row.importance, + country_code=row.country_code, + centroid=Point.from_wkb(row.centroid), + geometry=_filter_geometries(row)) + + +def create_from_osmline_row(row: Optional[SaRow], + class_type: Type[BaseResultT]) -> Optional[BaseResultT]: + """ Construct a new result and add the data from the result row + from the address interpolation table osmline. 'class_type' defines + the type of result to return. Returns None if the row is None. + + If the row contains a housenumber, then the housenumber is filled out. + Otherwise the result contains the interpolation information in extratags. + """ + if row is None: + return None + + hnr = getattr(row, 'housenumber', None) + + res = class_type(source_table=SourceTable.OSMLINE, + place_id=row.place_id, + osm_object=('W', row.osm_id), + category=('place', 'houses' if hnr is None else 'house'), + address=row.address, + postcode=row.postcode, + country_code=row.country_code, + centroid=Point.from_wkb(row.centroid), + geometry=_filter_geometries(row)) + + if hnr is None: + res.extratags = {'startnumber': str(row.startnumber), + 'endnumber': str(row.endnumber), + 'step': str(row.step)} + else: + res.housenumber = str(hnr) + + return res + + +def create_from_tiger_row(row: Optional[SaRow], + class_type: Type[BaseResultT], + osm_type: Optional[str] = None, + osm_id: Optional[int] = None) -> Optional[BaseResultT]: + """ Construct a new result and add the data from the result row + from the Tiger data interpolation table. 'class_type' defines + the type of result to return. Returns None if the row is None. + + If the row contains a housenumber, then the housenumber is filled out. + Otherwise the result contains the interpolation information in extratags. + """ + if row is None: + return None + + hnr = getattr(row, 'housenumber', None) + + res = class_type(source_table=SourceTable.TIGER, + place_id=row.place_id, + osm_object=(osm_type or row.osm_type, osm_id or row.osm_id), + category=('place', 'houses' if hnr is None else 'house'), + postcode=row.postcode, + country_code='us', + centroid=Point.from_wkb(row.centroid), + geometry=_filter_geometries(row)) + + if hnr is None: + res.extratags = {'startnumber': str(row.startnumber), + 'endnumber': str(row.endnumber), + 'step': str(row.step)} + else: + res.housenumber = str(hnr) + + return res + + +def create_from_postcode_row(row: Optional[SaRow], + class_type: Type[BaseResultT]) -> Optional[BaseResultT]: + """ Construct a new result and add the data from the result row + from the postcode table. 'class_type' defines + the type of result to return. Returns None if the row is None. + """ + if row is None: + return None + + return class_type(source_table=SourceTable.POSTCODE, + place_id=row.place_id, + category=('place', 'postcode'), + names={'ref': row.postcode}, + rank_search=row.rank_search, + rank_address=row.rank_address, + country_code=row.country_code, + centroid=Point.from_wkb(row.centroid), + geometry=_filter_geometries(row)) + + +def create_from_country_row(row: Optional[SaRow], + class_type: Type[BaseResultT]) -> Optional[BaseResultT]: + """ Construct a new result and add the data from the result row + from the fallback country tables. 'class_type' defines + the type of result to return. Returns None if the row is None. + """ + if row is None: + return None + + return class_type(source_table=SourceTable.COUNTRY, + category=('place', 'country'), + centroid=Point.from_wkb(row.centroid), + names=row.name, + rank_address=4, rank_search=4, + country_code=row.country_code) + + +async def add_result_details(conn: SearchConnection, results: List[BaseResultT], details: LookupDetails) -> None: """ Retrieve more details from the database according to the parameters specified in 'details'. """ - if details.address_details: - await complete_address_details(conn, result) - if details.linked_places: - await complete_linked_places(conn, result) - if details.parented_places: - await complete_parented_places(conn, result) - if details.keywords: - await complete_keywords(conn, result) + if results: + log().section('Query details for result') + if details.address_details: + log().comment('Query address details') + await complete_address_details(conn, results) + if details.linked_places: + log().comment('Query linked places') + for result in results: + await complete_linked_places(conn, result) + if details.parented_places: + log().comment('Query parent places') + for result in results: + await complete_parented_places(conn, result) + if details.keywords: + log().comment('Query keywords') + for result in results: + await complete_keywords(conn, result) def _result_row_to_address_row(row: SaRow) -> AddressLine: """ Create a new AddressLine from the results of a datbase query. """ extratags: Dict[str, str] = getattr(row, 'extratags', {}) - if 'place_type' in row: - extratags['place_type'] = row.place_type + if hasattr(row, 'place_type') and row.place_type: + extratags['place'] = row.place_type - names = row.name + names = _mingle_name_tags(row.name) or {} if getattr(row, 'housenumber', None) is not None: - if names is None: - names = {} names['housenumber'] = row.housenumber return AddressLine(place_id=row.place_id, @@ -243,35 +484,89 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine: distance=row.distance) -async def complete_address_details(conn: SearchConnection, result: SearchResult) -> None: +def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]: + places = [] + hnrs = [] + for result in results: + if result.place_id: + housenumber = -1 + if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE): + if result.housenumber is not None: + housenumber = int(result.housenumber) + elif result.extratags is not None and 'startnumber' in result.extratags: + # details requests do not come with a specific house number + housenumber = int(result.extratags['startnumber']) + places.append(result.place_id) + hnrs.append(housenumber) + + return places, hnrs + + +async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None: """ Retrieve information about places that make up the address of the result. """ - housenumber = -1 - if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE): - if result.housenumber is not None: - housenumber = int(result.housenumber) - elif result.extratags is not None and 'startnumber' in result.extratags: - # details requests do not come with a specific house number - housenumber = int(result.extratags['startnumber']) - - sfn = sa.func.get_addressdata(result.place_id, housenumber)\ - .table_valued( # type: ignore[no-untyped-call] - sa.column('place_id', type_=sa.Integer), - 'osm_type', - sa.column('osm_id', type_=sa.BigInteger), - sa.column('name', type_=conn.t.types.Composite), - 'class', 'type', 'place_type', - sa.column('admin_level', type_=sa.Integer), - sa.column('fromarea', type_=sa.Boolean), - sa.column('isaddress', type_=sa.Boolean), - sa.column('rank_address', type_=sa.SmallInteger), - sa.column('distance', type_=sa.Float)) - sql = sa.select(sfn).order_by(sa.column('rank_address').desc(), - sa.column('isaddress').desc()) - - result.address_rows = [] + places, hnrs = _get_housenumber_details(results) + + if not places: + return + + def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any: + return sa.func.get_addressdata(place_id, hnr)\ + .table_valued( # type: ignore[no-untyped-call] + sa.column('place_id', type_=sa.Integer), + 'osm_type', + sa.column('osm_id', type_=sa.BigInteger), + sa.column('name', type_=conn.t.types.Composite), + 'class', 'type', 'place_type', + sa.column('admin_level', type_=sa.Integer), + sa.column('fromarea', type_=sa.Boolean), + sa.column('isaddress', type_=sa.Boolean), + sa.column('rank_address', type_=sa.SmallInteger), + sa.column('distance', type_=sa.Float), + joins_implicitly=True) + + + if len(places) == 1: + # Optimized case for exactly one result (reverse) + sql = sa.select(_get_addressdata(places[0], hnrs[0]))\ + .order_by(sa.column('rank_address').desc(), + sa.column('isaddress').desc()) + + alines = AddressLines() + for row in await conn.execute(sql): + alines.append(_result_row_to_address_row(row)) + + for result in results: + if result.place_id == places[0]: + result.address_rows = alines + return + + + darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\ + .table_valued( # type: ignore[no-untyped-call] + sa.column('place_id', type_= sa.Integer), + sa.column('housenumber', type_= sa.Integer) + ).render_derived() + + sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber) + + sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\ + .order_by(darray.c.place_id, + sa.column('rank_address').desc(), + sa.column('isaddress').desc()) + + current_result = None for row in await conn.execute(sql): - result.address_rows.append(_result_row_to_address_row(row)) + if current_result is None or row.result_place_id != current_result.place_id: + for result in results: + if result.place_id == row.result_place_id: + current_result = result + break + else: + assert False + current_result.address_rows = AddressLines() + current_result.address_rows.append(_result_row_to_address_row(row)) + # pylint: disable=consider-using-f-string def _placex_select_address_row(conn: SearchConnection, @@ -289,10 +584,10 @@ def _placex_select_address_row(conn: SearchConnection, """ % centroid).label('distance')) -async def complete_linked_places(conn: SearchConnection, result: SearchResult) -> None: +async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None: """ Retrieve information about places that link to the result. """ - result.linked_rows = [] + result.linked_rows = AddressLines() if result.source_table != SourceTable.PLACEX: return @@ -303,8 +598,11 @@ async def complete_linked_places(conn: SearchConnection, result: SearchResult) - result.linked_rows.append(_result_row_to_address_row(row)) -async def complete_keywords(conn: SearchConnection, result: SearchResult) -> None: +async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None: """ Retrieve information about the search terms used for this place. + + Requires that the query analyzer was initialised to get access to + the word table. """ t = conn.t.search_name sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\ @@ -312,10 +610,11 @@ async def complete_keywords(conn: SearchConnection, result: SearchResult) -> Non result.name_keywords = [] result.address_keywords = [] - for name_tokens, address_tokens in await conn.execute(sql): - t = conn.t.word - sel = sa.select(t.c.word_id, t.c.word_token, t.c.word) + t = conn.t.meta.tables['word'] + sel = sa.select(t.c.word_id, t.c.word_token, t.c.word) + + for name_tokens, address_tokens in await conn.execute(sql): for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))): result.name_keywords.append(WordInfo(*row)) @@ -323,11 +622,11 @@ async def complete_keywords(conn: SearchConnection, result: SearchResult) -> Non result.address_keywords.append(WordInfo(*row)) -async def complete_parented_places(conn: SearchConnection, result: SearchResult) -> None: +async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None: """ Retrieve information about places that the result provides the address for. """ - result.parented_rows = [] + result.parented_rows = AddressLines() if result.source_table != SourceTable.PLACEX: return