]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/results.py
cache ICU transliterators and reuse them
[nominatim.git] / nominatim / api / results.py
index 2999b9a781c29fe2e18cdc3e46007396beb23bcc..d425ff6cbad94665984c1c086ad9c3d570cf707a 100644 (file)
@@ -11,21 +11,40 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
-from nominatim.typing import SaSelect, SaRow
+from nominatim.typing import SaSelect, SaRow, SaColumn
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
+from nominatim.api.localization import Locales
 
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
 
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+    """ Mix-in names from linked places, so that they show up
+        as standard names where necessary.
+    """
+    if not names:
+        return None
+
+    out = {}
+    for k, v in names.items():
+        if k.startswith('_place_'):
+            outkey = k[7:]
+            out[k if outkey in names else outkey] = v
+        else:
+            out[k] = v
+
+    return out
+
+
 class SourceTable(enum.Enum):
     """ Enumeration of kinds of results.
     """
 class SourceTable(enum.Enum):
     """ Enumeration of kinds of results.
     """
@@ -52,8 +71,30 @@ class AddressLine:
     rank_address: int
     distance: float
 
     rank_address: int
     distance: float
 
+    local_name: Optional[str] = None
+
+
+class AddressLines(List[AddressLine]):
+    """ Sequence of address lines order in descending order by their rank.
+    """
+
+    def localize(self, locales: Locales) -> List[str]:
+        """ Set the local name of address parts according to the chosen
+            locale. Return the list of local names without duplications.
+
+            Only address parts that are marked as isaddress are localized
+            and returned.
+        """
+        label_parts: List[str] = []
+
+        for line in self:
+            if line.isaddress and line.names:
+                line.local_name = locales.display_name(line.names)
+                if not label_parts or label_parts[-1] != line.local_name:
+                    label_parts.append(line.local_name)
+
+        return label_parts
 
 
-AddressLines = Sequence[AddressLine]
 
 
 @dataclasses.dataclass
 
 
 @dataclasses.dataclass
@@ -80,6 +121,9 @@ class BaseResult:
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
+    locale_name: Optional[str] = None
+    display_name: Optional[str] = None
+
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
@@ -123,6 +167,19 @@ class BaseResult:
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
+
+    def localize(self, locales: Locales) -> None:
+        """ Fill the locale_name and the display_name field for the
+            place and, if available, its address information.
+        """
+        self.locale_name = locales.display_name(self.names)
+        if self.address_rows:
+            self.display_name = ', '.join(self.address_rows.localize(locales))
+        else:
+            self.display_name = self.locale_name
+
+
+
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
@@ -144,6 +201,40 @@ class ReverseResult(BaseResult):
     bbox: Optional[Bbox] = None
 
 
     bbox: Optional[Bbox] = None
 
 
+class ReverseResults(List[ReverseResult]):
+    """ Sequence of reverse lookup results ordered by distance.
+        May be empty when no result was found.
+    """
+
+
+@dataclasses.dataclass
+class SearchResult(BaseResult):
+    """ A search result for forward geocoding.
+    """
+    bbox: Optional[Bbox] = None
+    accuracy: float = 0.0
+
+
+    @property
+    def ranking(self) -> float:
+        """ Return the ranking, a combined measure of accuracy and importance.
+        """
+        return (self.accuracy if self.accuracy is not None else 1) \
+               - self.calculated_importance()
+
+
+class SearchResults(List[SearchResult]):
+    """ Sequence of forward lookup results ordered by relevance.
+        May be empty when no result was found.
+    """
+
+    def localize(self, locales: Locales) -> None:
+        """ Apply the given locales to all results.
+        """
+        for result in self:
+            result.localize(locales)
+
+
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
@@ -162,7 +253,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
-                      names=row.name,
+                      names=_mingle_name_tags(row.name),
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
@@ -172,7 +263,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
@@ -197,7 +288,7 @@ def create_from_osmline_row(row: Optional[SaRow],
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -211,7 +302,9 @@ def create_from_osmline_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
-                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
@@ -226,10 +319,11 @@ def create_from_tiger_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -258,41 +352,60 @@ def create_from_postcode_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
-async def add_result_details(conn: SearchConnection, result: BaseResult,
+def create_from_country_row(row: Optional[SaRow],
+                        class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the fallback country tables. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.COUNTRY,
+                      category=('place', 'country'),
+                      centroid=Point.from_wkb(row.centroid),
+                      names=row.name,
+                      rank_address=4, rank_search=4,
+                      country_code=row.country_code)
+
+
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
-    log().section('Query details for result')
-    if details.address_details:
-        log().comment('Query address details')
-        await complete_address_details(conn, result)
-    if details.linked_places:
-        log().comment('Query linked places')
-        await complete_linked_places(conn, result)
-    if details.parented_places:
-        log().comment('Query parent places')
-        await complete_parented_places(conn, result)
-    if details.keywords:
-        log().comment('Query keywords')
-        await complete_keywords(conn, result)
+    if results:
+        log().section('Query details for result')
+        if details.address_details:
+            log().comment('Query address details')
+            await complete_address_details(conn, results)
+        if details.linked_places:
+            log().comment('Query linked places')
+            for result in results:
+                await complete_linked_places(conn, result)
+        if details.parented_places:
+            log().comment('Query parent places')
+            for result in results:
+                await complete_parented_places(conn, result)
+        if details.keywords:
+            log().comment('Query keywords')
+            for result in results:
+                await complete_keywords(conn, result)
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
     """ Create a new AddressLine from the results of a datbase query.
     """
     extratags: Dict[str, str] = getattr(row, 'extratags', {})
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
     """ Create a new AddressLine from the results of a datbase query.
     """
     extratags: Dict[str, str] = getattr(row, 'extratags', {})
-    if 'place_type' in row:
-        extratags['place_type'] = row.place_type
+    if hasattr(row, 'place_type') and row.place_type:
+        extratags['place'] = row.place_type
 
 
-    names = row.name
+    names = _mingle_name_tags(row.name) or {}
     if getattr(row, 'housenumber', None) is not None:
     if getattr(row, 'housenumber', None) is not None:
-        if names is None:
-            names = {}
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
@@ -307,35 +420,88 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                        distance=row.distance)
 
 
                        distance=row.distance)
 
 
-async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
+def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
+    places = []
+    hnrs = []
+    for result in results:
+        if result.place_id:
+            housenumber = -1
+            if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+                if result.housenumber is not None:
+                    housenumber = int(result.housenumber)
+                elif result.extratags is not None and 'startnumber' in result.extratags:
+                    # details requests do not come with a specific house number
+                    housenumber = int(result.extratags['startnumber'])
+            places.append(result.place_id)
+            hnrs.append(housenumber)
+
+    return places, hnrs
+
+
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
     """ Retrieve information about places that make up the address of the result.
     """
-    housenumber = -1
-    if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-        if result.housenumber is not None:
-            housenumber = int(result.housenumber)
-        elif result.extratags is not None and 'startnumber' in result.extratags:
-            # details requests do not come with a specific house number
-            housenumber = int(result.extratags['startnumber'])
-
-    sfn = sa.func.get_addressdata(result.place_id, housenumber)\
-            .table_valued( # type: ignore[no-untyped-call]
-                sa.column('place_id', type_=sa.Integer),
-                'osm_type',
-                sa.column('osm_id', type_=sa.BigInteger),
-                sa.column('name', type_=conn.t.types.Composite),
-                'class', 'type', 'place_type',
-                sa.column('admin_level', type_=sa.Integer),
-                sa.column('fromarea', type_=sa.Boolean),
-                sa.column('isaddress', type_=sa.Boolean),
-                sa.column('rank_address', type_=sa.SmallInteger),
-                sa.column('distance', type_=sa.Float))
-    sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
-                                  sa.column('isaddress').desc())
-
-    result.address_rows = []
+    places, hnrs = _get_housenumber_details(results)
+
+    if not places:
+        return
+
+    def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
+        return sa.func.get_addressdata(place_id, hnr)\
+                    .table_valued( # type: ignore[no-untyped-call]
+                        sa.column('place_id', type_=sa.Integer),
+                        'osm_type',
+                        sa.column('osm_id', type_=sa.BigInteger),
+                        sa.column('name', type_=conn.t.types.Composite),
+                        'class', 'type', 'place_type',
+                        sa.column('admin_level', type_=sa.Integer),
+                        sa.column('fromarea', type_=sa.Boolean),
+                        sa.column('isaddress', type_=sa.Boolean),
+                        sa.column('rank_address', type_=sa.SmallInteger),
+                        sa.column('distance', type_=sa.Float),
+                        joins_implicitly=True)
+
+
+    if len(places) == 1:
+        # Optimized case for exactly one result (reverse)
+        sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
+                .order_by(sa.column('rank_address').desc(),
+                          sa.column('isaddress').desc())
+
+        alines = AddressLines()
+        for row in await conn.execute(sql):
+            alines.append(_result_row_to_address_row(row))
+
+        for result in results:
+            if result.place_id == places[0]:
+                result.address_rows = alines
+                return
+
+
+    darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
+                    .table_valued( # type: ignore[no-untyped-call]
+                       sa.column('place_id', type_= sa.Integer),
+                       sa.column('housenumber', type_= sa.Integer)
+                    ).render_derived()
+
+    sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
+
+    sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
+            .order_by(darray.c.place_id,
+                      sa.column('rank_address').desc(),
+                      sa.column('isaddress').desc())
+
+    current_result = None
     for row in await conn.execute(sql):
     for row in await conn.execute(sql):
-        result.address_rows.append(_result_row_to_address_row(row))
+        if current_result is None or row.result_place_id != current_result.place_id:
+            for result in results:
+                if result.place_id == row.result_place_id:
+                    current_result = result
+                    break
+            else:
+                assert False
+            current_result.address_rows = AddressLines()
+        current_result.address_rows.append(_result_row_to_address_row(row))
 
 
 # pylint: disable=consider-using-f-string
 
 
 # pylint: disable=consider-using-f-string
@@ -357,7 +523,7 @@ def _placex_select_address_row(conn: SearchConnection,
 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about places that link to the result.
     """
 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about places that link to the result.
     """
-    result.linked_rows = []
+    result.linked_rows = AddressLines()
     if result.source_table != SourceTable.PLACEX:
         return
 
     if result.source_table != SourceTable.PLACEX:
         return
 
@@ -370,6 +536,9 @@ async def complete_linked_places(conn: SearchConnection, result: BaseResult) ->
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
+
+        Requires that the query analyzer was initialised to get access to
+        the word table.
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
@@ -377,10 +546,11 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
 
     result.name_keywords = []
     result.address_keywords = []
 
     result.name_keywords = []
     result.address_keywords = []
-    for name_tokens, address_tokens in await conn.execute(sql):
-        t = conn.t.word
-        sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
 
 
+    t = conn.t.meta.tables['word']
+    sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+
+    for name_tokens, address_tokens in await conn.execute(sql):
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))
 
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))
 
@@ -392,7 +562,7 @@ async def complete_parented_places(conn: SearchConnection, result: BaseResult) -
     """ Retrieve information about places that the result provides the
         address for.
     """
     """ Retrieve information about places that the result provides the
         address for.
     """
-    result.parented_rows = []
+    result.parented_rows = AddressLines()
     if result.source_table != SourceTable.PLACEX:
         return
 
     if result.source_table != SourceTable.PLACEX:
         return