]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/results.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / api / results.py
index 1e77d0be5aba5b6b6c9ba7118380956adba90d7d..0183f5b9f35e1cf201773104aee7821653f78402 100644 (file)
@@ -11,14 +11,14 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
-from nominatim.typing import SaSelect, SaRow
+from nominatim.typing import SaSelect, SaRow, SaColumn
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
@@ -27,33 +27,107 @@ from nominatim.api.localization import Locales
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+    """ Mix-in names from linked places, so that they show up
+        as standard names where necessary.
+    """
+    if not names:
+        return None
+
+    out = {}
+    for k, v in names.items():
+        if k.startswith('_place_'):
+            outkey = k[7:]
+            out[k if outkey in names else outkey] = v
+        else:
+            out[k] = v
+
+    return out
+
+
 class SourceTable(enum.Enum):
 class SourceTable(enum.Enum):
-    """ Enumeration of kinds of results.
+    """ The `SourceTable` type lists the possible sources a result can have.
     """
     PLACEX = 1
     """
     PLACEX = 1
+    """ The placex table is the main source for result usually containing
+        OSM data.
+    """
     OSMLINE = 2
     OSMLINE = 2
+    """ The osmline table contains address interpolations from OSM data.
+        Interpolation addresses are always approximate. The OSM id in the
+        result refers to the OSM way with the interpolation line object.
+    """
     TIGER = 3
     TIGER = 3
+    """ TIGER address data contains US addresses imported on the side,
+        see [Installing TIGER data](../customize/Tiger.md).
+        TIGER address are also interpolations. The addresses always refer
+        to a street from OSM data. The OSM id in the result refers to
+        that street.
+    """
     POSTCODE = 4
     POSTCODE = 4
+    """ The postcode table contains artificial centroids for postcodes,
+        computed from the postcodes available with address points. Results
+        are always approximate.
+    """
     COUNTRY = 5
     COUNTRY = 5
+    """ The country table provides a fallback, when country data is missing
+        in the OSM data.
+    """
 
 
 @dataclasses.dataclass
 class AddressLine:
 
 
 @dataclasses.dataclass
 class AddressLine:
-    """ Detailed information about a related place.
+    """ The `AddressLine` may contain the following fields about a related place
+        and its function as an address object. Most fields are optional.
+        Their presence depends on the kind and function of the address part.
     """
     place_id: Optional[int]
     """
     place_id: Optional[int]
+    """ Internal ID of the place.
+    """
     osm_object: Optional[Tuple[str, int]]
     osm_object: Optional[Tuple[str, int]]
+    """ OSM type and ID of the place, if such an object exists.
+    """
     category: Tuple[str, str]
     category: Tuple[str, str]
+    """ Main category of the place, described by a key-value pair.
+    """
     names: Dict[str, str]
     names: Dict[str, str]
+    """ All available names for the place including references, alternative
+        names and translations.
+    """
     extratags: Optional[Dict[str, str]]
     extratags: Optional[Dict[str, str]]
+    """ Any extra information available about the place. This is a dictionary
+        that usually contains OSM tag key-value pairs.
+    """
 
     admin_level: Optional[int]
 
     admin_level: Optional[int]
+    """ The administrative level of a boundary as tagged in the input data.
+        This field is only meaningful for places of the category
+        (boundary, administrative).
+    """
     fromarea: bool
     fromarea: bool
+    """ If true, then the exact area of the place is known. Without area
+        information, Nominatim has to make an educated guess if an address
+        belongs to one place or another.
+    """
     isaddress: bool
     isaddress: bool
+    """ If true, this place should be considered for the final address display.
+        Nominatim will sometimes include more than one candidate for
+        the address in the list when it cannot reliably determine where the
+        place belongs. It will consider names of all candidates when searching
+        but when displaying the result, only the most likely candidate should
+        be shown.
+    """
     rank_address: int
     rank_address: int
+    """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+    """
     distance: float
     distance: float
+    """ Distance in degrees between the result place and this address part.
+    """
 
     local_name: Optional[str] = None
 
     local_name: Optional[str] = None
+    """ Place holder for localization of this address part. See
+        [Localization](#localization) below.
+    """
 
 
 class AddressLines(List[AddressLine]):
 
 
 class AddressLines(List[AddressLine]):
@@ -62,7 +136,7 @@ class AddressLines(List[AddressLine]):
 
     def localize(self, locales: Locales) -> List[str]:
         """ Set the local name of address parts according to the chosen
 
     def localize(self, locales: Locales) -> List[str]:
         """ Set the local name of address parts according to the chosen
-            locale. Return the list of local names without duplications.
+            locale. Return the list of local names without duplicates.
 
             Only address parts that are marked as isaddress are localized
             and returned.
 
             Only address parts that are marked as isaddress are localized
             and returned.
@@ -81,11 +155,19 @@ class AddressLines(List[AddressLine]):
 
 @dataclasses.dataclass
 class WordInfo:
 
 @dataclasses.dataclass
 class WordInfo:
-    """ Detailed information about a search term.
+    """ Each entry in the list of search terms contains the
+        following detailed information.
     """
     word_id: int
     """
     word_id: int
+    """ Internal identifier for the word.
+    """
     word_token: str
     word_token: str
+    """ Normalised and transliterated form of the word.
+        This form is used for searching.
+    """
     word: Optional[str] = None
     word: Optional[str] = None
+    """ Untransliterated form, if available.
+    """
 
 
 WordInfos = Sequence[WordInfo]
 
 
 WordInfos = Sequence[WordInfo]
@@ -210,6 +292,12 @@ class SearchResults(List[SearchResult]):
         May be empty when no result was found.
     """
 
         May be empty when no result was found.
     """
 
+    def localize(self, locales: Locales) -> None:
+        """ Apply the given locales to all results.
+        """
+        for result in self:
+            result.localize(locales)
+
 
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
 
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
@@ -229,7 +317,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
-                      names=row.name,
+                      names=_mingle_name_tags(row.name),
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
@@ -239,7 +327,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
@@ -264,7 +352,7 @@ def create_from_osmline_row(row: Optional[SaRow],
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -278,7 +366,9 @@ def create_from_osmline_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
-                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
@@ -293,11 +383,11 @@ def create_from_tiger_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
-                     osm_object=(row.osm_type, row.osm_id),
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -326,7 +416,7 @@ def create_from_postcode_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
@@ -341,7 +431,7 @@ def create_from_country_row(row: Optional[SaRow],
 
     return class_type(source_table=SourceTable.COUNTRY,
                       category=('place', 'country'),
 
     return class_type(source_table=SourceTable.COUNTRY,
                       category=('place', 'country'),
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       names=row.name,
                       rank_address=4, rank_search=4,
                       country_code=row.country_code)
                       names=row.name,
                       rank_address=4, rank_search=4,
                       country_code=row.country_code)
@@ -378,10 +468,8 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
     if hasattr(row, 'place_type') and row.place_type:
         extratags['place'] = row.place_type
 
     if hasattr(row, 'place_type') and row.place_type:
         extratags['place'] = row.place_type
 
-    names = row.name
+    names = _mingle_name_tags(row.name) or {}
     if getattr(row, 'housenumber', None) is not None:
     if getattr(row, 'housenumber', None) is not None:
-        if names is None:
-            names = {}
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
@@ -396,46 +484,74 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                        distance=row.distance)
 
 
                        distance=row.distance)
 
 
+def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
+    places = []
+    hnrs = []
+    for result in results:
+        if result.place_id:
+            housenumber = -1
+            if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+                if result.housenumber is not None:
+                    housenumber = int(result.housenumber)
+                elif result.extratags is not None and 'startnumber' in result.extratags:
+                    # details requests do not come with a specific house number
+                    housenumber = int(result.extratags['startnumber'])
+            places.append(result.place_id)
+            hnrs.append(housenumber)
+
+    return places, hnrs
+
+
 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
-    def get_hnr(result: BaseResult) -> Tuple[int, int]:
-        housenumber = -1
-        if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-            if result.housenumber is not None:
-                housenumber = int(result.housenumber)
-            elif result.extratags is not None and 'startnumber' in result.extratags:
-                # details requests do not come with a specific house number
-                housenumber = int(result.extratags['startnumber'])
-        assert result.place_id
-        return result.place_id, housenumber
-
-    data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
+    places, hnrs = _get_housenumber_details(results)
 
 
-    if not data:
+    if not places:
         return
 
         return
 
-    values = sa.values(sa.column('place_id', type_=sa.Integer),
-                       sa.column('housenumber', type_=sa.Integer),
-                       name='places',
-                       literal_binds=True).data(data)
-
-    sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
-                .table_valued( # type: ignore[no-untyped-call]
-                    sa.column('place_id', type_=sa.Integer),
-                    'osm_type',
-                    sa.column('osm_id', type_=sa.BigInteger),
-                    sa.column('name', type_=conn.t.types.Composite),
-                    'class', 'type', 'place_type',
-                    sa.column('admin_level', type_=sa.Integer),
-                    sa.column('fromarea', type_=sa.Boolean),
-                    sa.column('isaddress', type_=sa.Boolean),
-                    sa.column('rank_address', type_=sa.SmallInteger),
-                    sa.column('distance', type_=sa.Float),
-                    joins_implicitly=True)
-
-    sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
-            .order_by(values.c.place_id,
+    def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
+        return sa.func.get_addressdata(place_id, hnr)\
+                    .table_valued( # type: ignore[no-untyped-call]
+                        sa.column('place_id', type_=sa.Integer),
+                        'osm_type',
+                        sa.column('osm_id', type_=sa.BigInteger),
+                        sa.column('name', type_=conn.t.types.Composite),
+                        'class', 'type', 'place_type',
+                        sa.column('admin_level', type_=sa.Integer),
+                        sa.column('fromarea', type_=sa.Boolean),
+                        sa.column('isaddress', type_=sa.Boolean),
+                        sa.column('rank_address', type_=sa.SmallInteger),
+                        sa.column('distance', type_=sa.Float),
+                        joins_implicitly=True)
+
+
+    if len(places) == 1:
+        # Optimized case for exactly one result (reverse)
+        sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
+                .order_by(sa.column('rank_address').desc(),
+                          sa.column('isaddress').desc())
+
+        alines = AddressLines()
+        for row in await conn.execute(sql):
+            alines.append(_result_row_to_address_row(row))
+
+        for result in results:
+            if result.place_id == places[0]:
+                result.address_rows = alines
+                return
+
+
+    darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
+                    .table_valued( # type: ignore[no-untyped-call]
+                       sa.column('place_id', type_= sa.Integer),
+                       sa.column('housenumber', type_= sa.Integer)
+                    ).render_derived()
+
+    sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
+
+    sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
+            .order_by(darray.c.place_id,
                       sa.column('rank_address').desc(),
                       sa.column('isaddress').desc())
 
                       sa.column('rank_address').desc(),
                       sa.column('isaddress').desc())