]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/results.py
increase threshold for full name searches
[nominatim.git] / nominatim / api / results.py
index 098851ef1b8a39da145eba2b7ba5968f45e84f34..d425ff6cbad94665984c1c086ad9c3d570cf707a 100644 (file)
@@ -11,14 +11,14 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
 import enum
 import dataclasses
 import datetime as dt
 
 import sqlalchemy as sa
 
-from nominatim.typing import SaSelect, SaRow
+from nominatim.typing import SaSelect, SaRow, SaColumn
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
@@ -27,6 +27,24 @@ from nominatim.api.localization import Locales
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
 # This file defines complex result data classes.
 # pylint: disable=too-many-instance-attributes
 
+def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
+    """ Mix-in names from linked places, so that they show up
+        as standard names where necessary.
+    """
+    if not names:
+        return None
+
+    out = {}
+    for k, v in names.items():
+        if k.startswith('_place_'):
+            outkey = k[7:]
+            out[k if outkey in names else outkey] = v
+        else:
+            out[k] = v
+
+    return out
+
+
 class SourceTable(enum.Enum):
     """ Enumeration of kinds of results.
     """
 class SourceTable(enum.Enum):
     """ Enumeration of kinds of results.
     """
@@ -103,6 +121,9 @@ class BaseResult:
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
 
+    locale_name: Optional[str] = None
+    display_name: Optional[str] = None
+
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
     names: Optional[Dict[str, str]] = None
     address: Optional[Dict[str, str]] = None
     extratags: Optional[Dict[str, str]] = None
@@ -146,6 +167,19 @@ class BaseResult:
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
         """
         return self.importance or (0.7500001 - (self.rank_search/40.0))
 
+
+    def localize(self, locales: Locales) -> None:
+        """ Fill the locale_name and the display_name field for the
+            place and, if available, its address information.
+        """
+        self.locale_name = locales.display_name(self.names)
+        if self.address_rows:
+            self.display_name = ', '.join(self.address_rows.localize(locales))
+        else:
+            self.display_name = self.locale_name
+
+
+
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
 BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
 
 @dataclasses.dataclass
@@ -173,6 +207,34 @@ class ReverseResults(List[ReverseResult]):
     """
 
 
     """
 
 
+@dataclasses.dataclass
+class SearchResult(BaseResult):
+    """ A search result for forward geocoding.
+    """
+    bbox: Optional[Bbox] = None
+    accuracy: float = 0.0
+
+
+    @property
+    def ranking(self) -> float:
+        """ Return the ranking, a combined measure of accuracy and importance.
+        """
+        return (self.accuracy if self.accuracy is not None else 1) \
+               - self.calculated_importance()
+
+
+class SearchResults(List[SearchResult]):
+    """ Sequence of forward lookup results ordered by relevance.
+        May be empty when no result was found.
+    """
+
+    def localize(self, locales: Locales) -> None:
+        """ Apply the given locales to all results.
+        """
+        for result in self:
+            result.localize(locales)
+
+
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
             if k.startswith('geometry_')}
@@ -191,7 +253,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
-                      names=row.name,
+                      names=_mingle_name_tags(row.name),
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
                       address=row.address,
                       extratags=row.extratags,
                       housenumber=row.housenumber,
@@ -201,7 +263,7 @@ def create_from_placex_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       importance=row.importance,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
@@ -226,7 +288,7 @@ def create_from_osmline_row(row: Optional[SaRow],
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
                      address=row.address,
                      postcode=row.postcode,
                      country_code=row.country_code,
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -240,7 +302,9 @@ def create_from_osmline_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
-                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
@@ -255,11 +319,11 @@ def create_from_tiger_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
-                     osm_object=(row.osm_type, row.osm_id),
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
-                     centroid=Point.from_wkb(row.centroid.data),
+                     centroid=Point.from_wkb(row.centroid),
                      geometry=_filter_geometries(row))
 
     if hnr is None:
                      geometry=_filter_geometries(row))
 
     if hnr is None:
@@ -288,28 +352,49 @@ def create_from_postcode_row(row: Optional[SaRow],
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
                       rank_search=row.rank_search,
                       rank_address=row.rank_address,
                       country_code=row.country_code,
-                      centroid=Point.from_wkb(row.centroid.data),
+                      centroid=Point.from_wkb(row.centroid),
                       geometry=_filter_geometries(row))
 
 
                       geometry=_filter_geometries(row))
 
 
-async def add_result_details(conn: SearchConnection, result: BaseResult,
+def create_from_country_row(row: Optional[SaRow],
+                        class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+    """ Construct a new result and add the data from the result row
+        from the fallback country tables. 'class_type' defines
+        the type of result to return. Returns None if the row is None.
+    """
+    if row is None:
+        return None
+
+    return class_type(source_table=SourceTable.COUNTRY,
+                      category=('place', 'country'),
+                      centroid=Point.from_wkb(row.centroid),
+                      names=row.name,
+                      rank_address=4, rank_search=4,
+                      country_code=row.country_code)
+
+
+async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
                              details: LookupDetails) -> None:
     """ Retrieve more details from the database according to the
         parameters specified in 'details'.
     """
-    log().section('Query details for result')
-    if details.address_details:
-        log().comment('Query address details')
-        await complete_address_details(conn, result)
-    if details.linked_places:
-        log().comment('Query linked places')
-        await complete_linked_places(conn, result)
-    if details.parented_places:
-        log().comment('Query parent places')
-        await complete_parented_places(conn, result)
-    if details.keywords:
-        log().comment('Query keywords')
-        await complete_keywords(conn, result)
+    if results:
+        log().section('Query details for result')
+        if details.address_details:
+            log().comment('Query address details')
+            await complete_address_details(conn, results)
+        if details.linked_places:
+            log().comment('Query linked places')
+            for result in results:
+                await complete_linked_places(conn, result)
+        if details.parented_places:
+            log().comment('Query parent places')
+            for result in results:
+                await complete_parented_places(conn, result)
+        if details.keywords:
+            log().comment('Query keywords')
+            for result in results:
+                await complete_keywords(conn, result)
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
 
 
 def _result_row_to_address_row(row: SaRow) -> AddressLine:
@@ -319,10 +404,8 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
     if hasattr(row, 'place_type') and row.place_type:
         extratags['place'] = row.place_type
 
     if hasattr(row, 'place_type') and row.place_type:
         extratags['place'] = row.place_type
 
-    names = row.name
+    names = _mingle_name_tags(row.name) or {}
     if getattr(row, 'housenumber', None) is not None:
     if getattr(row, 'housenumber', None) is not None:
-        if names is None:
-            names = {}
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
         names['housenumber'] = row.housenumber
 
     return AddressLine(place_id=row.place_id,
@@ -337,35 +420,88 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                        distance=row.distance)
 
 
                        distance=row.distance)
 
 
-async def complete_address_details(conn: SearchConnection, result: BaseResult) -> None:
+def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
+    places = []
+    hnrs = []
+    for result in results:
+        if result.place_id:
+            housenumber = -1
+            if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
+                if result.housenumber is not None:
+                    housenumber = int(result.housenumber)
+                elif result.extratags is not None and 'startnumber' in result.extratags:
+                    # details requests do not come with a specific house number
+                    housenumber = int(result.extratags['startnumber'])
+            places.append(result.place_id)
+            hnrs.append(housenumber)
+
+    return places, hnrs
+
+
+async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
     """ Retrieve information about places that make up the address of the result.
     """
-    housenumber = -1
-    if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-        if result.housenumber is not None:
-            housenumber = int(result.housenumber)
-        elif result.extratags is not None and 'startnumber' in result.extratags:
-            # details requests do not come with a specific house number
-            housenumber = int(result.extratags['startnumber'])
-
-    sfn = sa.func.get_addressdata(result.place_id, housenumber)\
-            .table_valued( # type: ignore[no-untyped-call]
-                sa.column('place_id', type_=sa.Integer),
-                'osm_type',
-                sa.column('osm_id', type_=sa.BigInteger),
-                sa.column('name', type_=conn.t.types.Composite),
-                'class', 'type', 'place_type',
-                sa.column('admin_level', type_=sa.Integer),
-                sa.column('fromarea', type_=sa.Boolean),
-                sa.column('isaddress', type_=sa.Boolean),
-                sa.column('rank_address', type_=sa.SmallInteger),
-                sa.column('distance', type_=sa.Float))
-    sql = sa.select(sfn).order_by(sa.column('rank_address').desc(),
-                                  sa.column('isaddress').desc())
-
-    result.address_rows = AddressLines()
+    places, hnrs = _get_housenumber_details(results)
+
+    if not places:
+        return
+
+    def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
+        return sa.func.get_addressdata(place_id, hnr)\
+                    .table_valued( # type: ignore[no-untyped-call]
+                        sa.column('place_id', type_=sa.Integer),
+                        'osm_type',
+                        sa.column('osm_id', type_=sa.BigInteger),
+                        sa.column('name', type_=conn.t.types.Composite),
+                        'class', 'type', 'place_type',
+                        sa.column('admin_level', type_=sa.Integer),
+                        sa.column('fromarea', type_=sa.Boolean),
+                        sa.column('isaddress', type_=sa.Boolean),
+                        sa.column('rank_address', type_=sa.SmallInteger),
+                        sa.column('distance', type_=sa.Float),
+                        joins_implicitly=True)
+
+
+    if len(places) == 1:
+        # Optimized case for exactly one result (reverse)
+        sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
+                .order_by(sa.column('rank_address').desc(),
+                          sa.column('isaddress').desc())
+
+        alines = AddressLines()
+        for row in await conn.execute(sql):
+            alines.append(_result_row_to_address_row(row))
+
+        for result in results:
+            if result.place_id == places[0]:
+                result.address_rows = alines
+                return
+
+
+    darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
+                    .table_valued( # type: ignore[no-untyped-call]
+                       sa.column('place_id', type_= sa.Integer),
+                       sa.column('housenumber', type_= sa.Integer)
+                    ).render_derived()
+
+    sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
+
+    sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
+            .order_by(darray.c.place_id,
+                      sa.column('rank_address').desc(),
+                      sa.column('isaddress').desc())
+
+    current_result = None
     for row in await conn.execute(sql):
     for row in await conn.execute(sql):
-        result.address_rows.append(_result_row_to_address_row(row))
+        if current_result is None or row.result_place_id != current_result.place_id:
+            for result in results:
+                if result.place_id == row.result_place_id:
+                    current_result = result
+                    break
+            else:
+                assert False
+            current_result.address_rows = AddressLines()
+        current_result.address_rows.append(_result_row_to_address_row(row))
 
 
 # pylint: disable=consider-using-f-string
 
 
 # pylint: disable=consider-using-f-string
@@ -400,6 +536,9 @@ async def complete_linked_places(conn: SearchConnection, result: BaseResult) ->
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
 
 async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     """ Retrieve information about the search terms used for this place.
+
+        Requires that the query analyzer was initialised to get access to
+        the word table.
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
     """
     t = conn.t.search_name
     sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
@@ -407,10 +546,11 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
 
     result.name_keywords = []
     result.address_keywords = []
 
     result.name_keywords = []
     result.address_keywords = []
-    for name_tokens, address_tokens in await conn.execute(sql):
-        t = conn.t.word
-        sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
 
 
+    t = conn.t.meta.tables['word']
+    sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
+
+    for name_tokens, address_tokens in await conn.execute(sql):
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))
 
         for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))