]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/results.py
Merge pull request #3188 from lonvia/update-osm2pgsql
[nominatim.git] / nominatim / api / results.py
index 0e3ddeda778bea988b74ea95f5ecd5fe41f687cc..0183f5b9f35e1cf201773104aee7821653f78402 100644 (file)
@@ -11,14 +11,14 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
-from nominatim.typing import SaSelect, SaRow
+from nominatim.typing import SaSelect, SaRow, SaColumn
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
@@ -27,33 +27,107 @@ from nominatim.api.localization import Locales
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+    """ Mix-in names from linked places, so that they show up
+        as standard names where necessary.
+    """
+    if not names:
+        return None
+
+    out = {}
+    for k, v in names.items():
+        if k.startswith('_place_'):
+            outkey = k[7:]
+            out[k if outkey in names else outkey] = v
+        else:
+            out[k] = v
+
+    return out
+
+
 class SourceTable(enum.Enum):
 class SourceTable(enum.Enum):
-    """ Enumeration of kinds of results.
+    """ The `SourceTable` type lists the possible sources a result can have.
     """
     PLACEX = 1
     """
     PLACEX = 1
+    """ The placex table is the main source for result usually containing
+        OSM data.
+    """
     OSMLINE = 2
     OSMLINE = 2
+    """ The osmline table contains address interpolations from OSM data.
+        Interpolation addresses are always approximate. The OSM id in the
+        result refers to the OSM way with the interpolation line object.
+    """
     TIGER = 3
     TIGER = 3
+    """ TIGER address data contains US addresses imported on the side,
+        see [Installing TIGER data](../customize/Tiger.md).
+        TIGER address are also interpolations. The addresses always refer
+        to a street from OSM data. The OSM id in the result refers to
+        that street.
+    """
     POSTCODE = 4
     POSTCODE = 4
+    """ The postcode table contains artificial centroids for postcodes,
+        computed from the postcodes available with address points. Results
+        are always approximate.
+    """
     COUNTRY = 5
     COUNTRY = 5
+    """ The country table provides a fallback, when country data is missing
+        in the OSM data.
+    """
 
 
 @dataclasses.dataclass
 class AddressLine:
 
 
 @dataclasses.dataclass
 class AddressLine:
-    """ Detailed information about a related place.
+    """ The `AddressLine` may contain the following fields about a related place
+        and its function as an address object. Most fields are optional.
+        Their presence depends on the kind and function of the address part.
     """
     place_id: Optional[int]
     """
     place_id: Optional[int]
+    """ Internal ID of the place.
+    """
     osm_object: Optional[Tuple[str, int]]
     osm_object: Optional[Tuple[str, int]]
+    """ OSM type and ID of the place, if such an object exists.
+    """
     category: Tuple[str, str]
     category: Tuple[str, str]
+    """ Main category of the place, described by a key-value pair.
+    """
     names: Dict[str, str]
     names: Dict[str, str]
+    """ All available names for the place including references, alternative
+        names and translations.
+    """
     extratags: Optional[Dict[str, str]]
     extratags: Optional[Dict[str, str]]
+    """ Any extra information available about the place. This is a dictionary
+        that usually contains OSM tag key-value pairs.
+    """
 
     admin_level: Optional[int]
 
     admin_level: Optional[int]
+    """ The administrative level of a boundary as tagged in the input data.
+        This field is only meaningful for places of the category
+        (boundary, administrative).
+    """
     fromarea: bool
     fromarea: bool
+    """ If true, then the exact area of the place is known. Without area
+        information, Nominatim has to make an educated guess if an address
+        belongs to one place or another.
+    """
     isaddress: bool
     isaddress: bool
+    """ If true, this place should be considered for the final address display.
+        Nominatim will sometimes include more than one candidate for
+        the address in the list when it cannot reliably determine where the
+        place belongs. It will consider names of all candidates when searching
+        but when displaying the result, only the most likely candidate should
+        be shown.
+    """
     rank_address: int
     rank_address: int
+    """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+    """
     distance: float
     distance: float
+    """ Distance in degrees between the result place and this address part.
+    """
 
     local_name: Optional[str] = None
 
     local_name: Optional[str] = None
+    """ Place holder for localization of this address part. See
+        [Localization](#localization) below.
+    """
 
 
 class AddressLines(List[AddressLine]):
 
 
 class AddressLines(List[AddressLine]):
@@ -62,7 +136,7 @@ class AddressLines(List[AddressLine]):
 
     def localize(self, locales: Locales) -> List[str]:
         """ Set the local name of address parts according to the chosen
 
     def localize(self, locales: Locales) -> List[str]:
         """ Set the local name of address parts according to the chosen
-            locale. Return the list of local names without duplications.
+            locale. Return the list of local names without duplicates.
 
             Only address parts that are marked as isaddress are localized
             and returned.
 
             Only address parts that are marked as isaddress are localized
             and returned.
@@ -81,11 +155,19 @@ class AddressLines(List[AddressLine]):
 
 @dataclasses.dataclass
 class WordInfo:
 
 @dataclasses.dataclass
 class WordInfo:
-    """ Detailed information about a search term.
+    """ Each entry in the list of search terms contains the
+        following detailed information.
     """
     word_id: int
     """
     word_id: int
+    """ Internal identifier for the word.
+    """
     word_token: str
     word_token: str
+    """ Normalised and transliterated form of the word.
+        This form is used for searching.
+    """
     word: Optional[str] = None
     word: Optional[str] = None
+    """ Untransliterated form, if available.
+    """
 
 
 WordInfos = Sequence[WordInfo]
 
 
 WordInfos = Sequence[WordInfo]
@@ -103,6 +185,9 @@ class BaseResult:
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
+    locale_name: Optional[str] = None
+    display_name: Optional[str] = None
+
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
@@ -146,6 +231,19 @@ class BaseResult:
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
+
+    def localize(self, locales: Locales) -> None:
+        """ Fill the locale_name and the display_name field for the
+            place and, if available, its address information.
+        """
+        self.locale_name = locales.display_name(self.names)
+        if self.address_rows:
+            self.display_name = ', '.join(self.address_rows.localize(locales))
+        else:
+            self.display_name = self.locale_name
+
+
+
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
@@ -173,6 +271,34 @@ class ReverseResults(List[ReverseResult]):
     """
 
 
     """
 
 
+@dataclasses.dataclass
+class SearchResult(BaseResult):
+    """ A search result for forward geocoding.
+    """
+    bbox: Optional[Bbox] = None
+    accuracy: float = 0.0
+
+
+    @property
+    def ranking(self) -> float:
+        """ Return the ranking, a combined measure of accuracy and importance.
+        """
+        return (self.accuracy if self.accuracy is not None else 1) \
+               - self.calculated_importance()
+
+
+class SearchResults(List[SearchResult]):
+    """ Sequence of forward lookup results ordered by relevance.
+        May be empty when no result was found.
+    """
+
+    def localize(self, locales: Locales) -> None:
+        """ Apply the given locales to all results.
+        """
+        for result in self:
+            result.localize(locales)
+
+
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
@@ -191,7 +317,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
-                      names=row.name,
+                      names=_mingle_name_tags(row.name),
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
@@ -201,7 +327,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
@@ -226,7 +352,7 @@ def create_from_osmline_row(row: Optional[SaRow],
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -240,7 +366,9 @@ def create_from_osmline_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
-                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
@@ -255,10 +383,11 @@ def create_from_tiger_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -287,41 +416,60 @@ def create_from_postcode_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
-async def add_result_details(conn: SearchConnection, result: BaseResult,
+def create_from_country_row(row: Optional[SaRow],
+                        class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the fallback country tables. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.COUNTRY,
+                      category=('place', 'country'),
+                      centroid=Point.from_wkb(row.centroid),
+                      names=row.name,
+                      rank_address=4, rank_search=4,
+                      country_code=row.country_code)
+
+
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
-    log().section('Query details for result')
-    if details.address_details:
-        log().comment('Query address details')
-        await complete_address_details(conn, result)
-    if details.linked_places:
-        log().comment('Query linked places')
-        await complete_linked_places(conn, result)
-    if details.parented_places:
-        log().comment('Query parent places')
-        await complete_parented_places(conn, result)
-    if details.keywords:
-        log().comment('Query keywords')
-        await complete_keywords(conn, result)
+    if results:
+        log().section('Query details for result')
+        if details.address_details:
+            log().comment('Query address details')
+            await complete_address_details(conn, results)
+        if details.linked_places:
+            log().comment('Query linked places')
+            for result in results:
+                await complete_linked_places(conn, result)
+        if details.parented_places:
+            log().comment('Query parent places')
+            for result in results:
+                await complete_parented_places(conn, result)
+        if details.keywords:
+            log().comment('Query keywords')
+            for result in results:
+                await complete_keywords(conn, result)
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
     """ Create a new AddressLine from the results of a datbase query.
     """
     extratags: Dict[str, str] = getattr(row, 'extratags', {})
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
     """ Create a new AddressLine from the results of a datbase query.
     """
     extratags: Dict[str, str] = getattr(row, 'extratags', {})
-    if 'place_type' in row:
-        extratags['place_type'] = row.place_type
+    if hasattr(row, 'place_type') and row.place_type:
+        extratags['place'] = row.place_type
 
 
-    names = row.name
+    names = _mingle_name_tags(row.name) or {}
     if getattr(row, 'housenumber', None) is not None:
     if getattr(row, 'housenumber', None) is not None:
-        if names is None:
-            names = {}
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
@@ -336,35 +484,88 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                        distance=row.distance)
 
 
                        distance=row.distance)
 
 
-async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
+def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
+    places = []
+    hnrs = []
+    for result in results:
+        if result.place_id:
+            housenumber = -1
+            if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+                if result.housenumber is not None:
+                    housenumber = int(result.housenumber)
+                elif result.extratags is not None and 'startnumber' in result.extratags:
+                    # details requests do not come with a specific house number
+                    housenumber = int(result.extratags['startnumber'])
+            places.append(result.place_id)
+            hnrs.append(housenumber)
+
+    return places, hnrs
+
+
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
     """ Retrieve information about places that make up the address of the result.
     """
-    housenumber = -1
-    if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-        if result.housenumber is not None:
-            housenumber = int(result.housenumber)
-        elif result.extratags is not None and 'startnumber' in result.extratags:
-            # details requests do not come with a specific house number
-            housenumber = int(result.extratags['startnumber'])
-
-    sfn = sa.func.get_addressdata(result.place_id, housenumber)\
-            .table_valued( # type: ignore[no-untyped-call]
-                sa.column('place_id', type_=sa.Integer),
-                'osm_type',
-                sa.column('osm_id', type_=sa.BigInteger),
-                sa.column('name', type_=conn.t.types.Composite),
-                'class', 'type', 'place_type',
-                sa.column('admin_level', type_=sa.Integer),
-                sa.column('fromarea', type_=sa.Boolean),
-                sa.column('isaddress', type_=sa.Boolean),
-                sa.column('rank_address', type_=sa.SmallInteger),
-                sa.column('distance', type_=sa.Float))
-    sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
-                                  sa.column('isaddress').desc())
-
-    result.address_rows = AddressLines()
+    places, hnrs = _get_housenumber_details(results)
+
+    if not places:
+        return
+
+    def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
+        return sa.func.get_addressdata(place_id, hnr)\
+                    .table_valued( # type: ignore[no-untyped-call]
+                        sa.column('place_id', type_=sa.Integer),
+                        'osm_type',
+                        sa.column('osm_id', type_=sa.BigInteger),
+                        sa.column('name', type_=conn.t.types.Composite),
+                        'class', 'type', 'place_type',
+                        sa.column('admin_level', type_=sa.Integer),
+                        sa.column('fromarea', type_=sa.Boolean),
+                        sa.column('isaddress', type_=sa.Boolean),
+                        sa.column('rank_address', type_=sa.SmallInteger),
+                        sa.column('distance', type_=sa.Float),
+                        joins_implicitly=True)
+
+
+    if len(places) == 1:
+        # Optimized case for exactly one result (reverse)
+        sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
+                .order_by(sa.column('rank_address').desc(),
+                          sa.column('isaddress').desc())
+
+        alines = AddressLines()
+        for row in await conn.execute(sql):
+            alines.append(_result_row_to_address_row(row))
+
+        for result in results:
+            if result.place_id == places[0]:
+                result.address_rows = alines
+                return
+
+
+    darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
+                    .table_valued( # type: ignore[no-untyped-call]
+                       sa.column('place_id', type_= sa.Integer),
+                       sa.column('housenumber', type_= sa.Integer)
+                    ).render_derived()
+
+    sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
+
+    sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
+            .order_by(darray.c.place_id,
+                      sa.column('rank_address').desc(),
+                      sa.column('isaddress').desc())
+
+    current_result = None
     for row in await conn.execute(sql):
     for row in await conn.execute(sql):
-        result.address_rows.append(_result_row_to_address_row(row))
+        if current_result is None or row.result_place_id != current_result.place_id:
+            for result in results:
+                if result.place_id == row.result_place_id:
+                    current_result = result
+                    break
+            else:
+                assert False
+            current_result.address_rows = AddressLines()
+        current_result.address_rows.append(_result_row_to_address_row(row))
 
 
 # pylint: disable=consider-using-f-string
 
 
 # pylint: disable=consider-using-f-string
@@ -399,6 +600,9 @@ async def complete_linked_places(conn: SearchConnection, result: BaseResult) ->
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
+
+        Requires that the query analyzer was initialised to get access to
+        the word table.
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
@@ -406,10 +610,11 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
 
     result.name_keywords = []
     result.address_keywords = []
 
     result.name_keywords = []
     result.address_keywords = []
-    for name_tokens, address_tokens in await conn.execute(sql):
-        t = conn.t.word
-        sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
 
 
+    t = conn.t.meta.tables['word']
+    sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+
+    for name_tokens, address_tokens in await conn.execute(sql):
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))
 
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))