]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/reverse.py
add BDD tests for new layers parameter
[nominatim.git] / nominatim / api / reverse.py
index 053b96dd96f3a1c3db683f547eef76bde5ffc16c..1c9cd4c1708bf435c2dbd5348fb9ddba1f87204e 100644 (file)
@@ -7,26 +7,38 @@
 """
 Implementation of reverse geocoding.
 """
-from typing import Optional
+from typing import Optional, List, Callable, Type, Tuple
 
 import sqlalchemy as sa
 from geoalchemy2 import WKTElement
-from geoalchemy2.types import Geometry
 
-from nominatim.typing import SaColumn, SaSelect, SaTable, SaLabel, SaClause
+from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow
 from nominatim.api.connection import SearchConnection
 import nominatim.api.results as nres
 from nominatim.api.logging import log
-from nominatim.api.types import AnyPoint, DataLayer, LookupDetails, GeometryFormat
+from nominatim.api.types import AnyPoint, DataLayer, LookupDetails, GeometryFormat, Bbox
 
-def _select_from_placex(t: SaTable, wkt: Optional[str] = None) -> SaSelect:
+# In SQLAlchemy expression which compare with NULL need to be expressed with
+# the equal sign.
+# pylint: disable=singleton-comparison
+
+RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
+
+def _select_from_placex(t: SaFromClause, wkt: Optional[str] = None) -> SaSelect:
     """ Create a select statement with the columns relevant for reverse
         results.
     """
     if wkt is None:
         distance = t.c.distance
+        centroid = t.c.centroid
     else:
         distance = t.c.geometry.ST_Distance(wkt)
+        centroid = sa.case(
+                       (t.c.geometry.ST_GeometryType().in_(('ST_LineString',
+                                                           'ST_MultiLineString')),
+                        t.c.geometry.ST_ClosestPoint(wkt)),
+                       else_=t.c.centroid).label('centroid')
+
 
     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
                      t.c.class_, t.c.type,
@@ -34,24 +46,41 @@ def _select_from_placex(t: SaTable, wkt: Optional[str] = None) -> SaSelect:
                      t.c.housenumber, t.c.postcode, t.c.country_code,
                      t.c.importance, t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
-                     t.c.centroid,
+                     centroid,
                      distance.label('distance'),
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
 
-def _interpolated_housenumber(table: SaTable) -> SaLabel:
-    # Entries with startnumber = endnumber are legacy from version < 4.1
+def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
     return sa.cast(table.c.startnumber
                     + sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
                                     / table.c.step) * table.c.step,
                    sa.Integer).label('housenumber')
 
 
-def _is_address_point(table: SaTable) -> SaClause:
+def _interpolated_position(table: SaFromClause) -> SaLabel:
+    fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
+    rounded_pos = sa.func.round(table.c.position / fac) * fac
+    return sa.case(
+             (table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
+              else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
+
+
+def _locate_interpolation(table: SaFromClause, wkt: WKTElement) -> SaLabel:
+    """ Given a position, locate the closest point on the line.
+    """
+    return sa.case((table.c.linegeo.ST_GeometryType() == 'ST_LineString',
+                    sa.func.ST_LineLocatePoint(table.c.linegeo, wkt)),
+                   else_=0).label('position')
+
+
+def _is_address_point(table: SaFromClause) -> SaColumn:
     return sa.and_(table.c.rank_address == 30,
                    sa.or_(table.c.housenumber != None,
                           table.c.name.has_key('housename')))
 
+def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
+    return min(rows, key=lambda row: 1000 if row is None else row.distance)
 
 class ReverseGeocoder:
     """ Class implementing the logic for looking up a place from a
@@ -65,6 +94,22 @@ class ReverseGeocoder:
         self.layer = layer
         self.details = details
 
+    def layer_enabled(self, *layer: DataLayer) -> bool:
+        """ Return true when any of the given layer types are requested.
+        """
+        return any(self.layer & l for l in layer)
+
+
+    def layer_disabled(self, *layer: DataLayer) -> bool:
+        """ Return true when none of the given layer types is requested.
+        """
+        return not any(self.layer & l for l in layer)
+
+
+    def has_feature_layers(self) -> bool:
+        """ Return true if any layer other than ADDRESS or POI is requested.
+        """
+        return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
 
     def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect:
         if not self.details.geometry_output:
@@ -87,25 +132,27 @@ class ReverseGeocoder:
         return sql.add_columns(*out)
 
 
-    def _filter_by_layer(self, table: SaTable) -> SaColumn:
-        if self.layer & DataLayer.MANMADE:
+    def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
+        if self.layer_enabled(DataLayer.MANMADE):
             exclude = []
-            if not (self.layer & DataLayer.RAILWAY):
+            if self.layer_disabled(DataLayer.RAILWAY):
                 exclude.append('railway')
-            if not (self.layer & DataLayer.NATURAL):
+            if self.layer_disabled(DataLayer.NATURAL):
                 exclude.extend(('natural', 'water', 'waterway'))
             return table.c.class_.not_in(tuple(exclude))
 
         include = []
-        if self.layer & DataLayer.RAILWAY:
+        if self.layer_enabled(DataLayer.RAILWAY):
             include.append('railway')
-        if not (self.layer & DataLayer.NATURAL):
+        if self.layer_enabled(DataLayer.NATURAL):
             include.extend(('natural', 'water', 'waterway'))
         return table.c.class_.in_(tuple(include))
 
 
-    async def _find_closest_street_or_poi(self, wkt: WKTElement) -> SaRow:
-        """ Look up the clostest rank 26+ place in the database.
+    async def _find_closest_street_or_poi(self, wkt: WKTElement,
+                                          distance: float) -> Optional[SaRow]:
+        """ Look up the closest rank 26+ place in the database, which
+            is closer than the given distance.
         """
         t = self.conn.t.placex
 
@@ -113,38 +160,39 @@ class ReverseGeocoder:
                 .where(t.c.geometry.ST_DWithin(wkt, distance))\
                 .where(t.c.indexed_status == 0)\
                 .where(t.c.linked_place_id == None)\
-                .where(sa.or_(t.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
+                .where(sa.or_(t.c.geometry.ST_GeometryType()
+                                          .not_in(('ST_Polygon', 'ST_MultiPolygon')),
                               t.c.centroid.ST_Distance(wkt) < distance))\
                 .order_by('distance')\
                 .limit(1)
 
         sql = self._add_geometry_columns(sql, t.c.geometry)
 
-        restrict = []
+        restrict: List[SaColumn] = []
 
-        if self.layer & DataLayer.ADDRESS:
+        if self.layer_enabled(DataLayer.ADDRESS):
             restrict.append(sa.and_(t.c.rank_address >= 26,
-                                    t.c.rank_address <= self.max_rank))
+                                    t.c.rank_address <= min(29, self.max_rank)))
             if self.max_rank == 30:
                 restrict.append(_is_address_point(t))
-        if self.layer & DataLayer.POI and max_rank == 30:
+        if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
             restrict.append(sa.and_(t.c.rank_search == 30,
                                     t.c.class_.not_in(('place', 'building')),
                                     t.c.geometry.ST_GeometryType() != 'ST_LineString'))
-        if self.layer & (DataLayer.RAILWAY | DataLayer.MANMADE | DataLayer.NATURAL):
-            restrict.append(sa.and_(t.c.rank_search >= 26,
-                                    tc.rank_search <= self.max_rank,
+        if self.has_feature_layers():
+            restrict.append(sa.and_(t.c.rank_search.between(26, self.max_rank),
+                                    t.c.rank_address == 0,
                                     self._filter_by_layer(t)))
 
-        if restrict:
-            sql = sql.where(sa.or_(*restrict))
+        if not restrict:
+            return None
 
-        return (await self.conn.execute(sql)).one_or_none()
+        return (await self.conn.execute(sql.where(sa.or_(*restrict)))).one_or_none()
 
 
     async def _find_housenumber_for_street(self, parent_place_id: int,
                                            wkt: WKTElement) -> Optional[SaRow]:
-        t = conn.t.placex
+        t = self.conn.t.placex
 
         sql = _select_from_placex(t, wkt)\
                 .where(t.c.geometry.ST_DWithin(wkt, 0.001))\
@@ -161,26 +209,28 @@ class ReverseGeocoder:
 
 
     async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
-                                             wkt: WKTElement) -> Optional[SaRow]:
+                                             wkt: WKTElement,
+                                             distance: float) -> Optional[SaRow]:
         t = self.conn.t.osmline
 
-        inner = sa.select(t,
-                          t.c.linegeo.ST_Distance(wkt).label('distance'),
-                          t.c.linegeo.ST_LineLocatePoint(wkt).label('position'))\
-                  .where(t.c.linegeo.ST_DWithin(wkt, distance))\
-                  .order_by('distance')\
-                  .limit(1)
+        sql = sa.select(t,
+                        t.c.linegeo.ST_Distance(wkt).label('distance'),
+                        _locate_interpolation(t, wkt))\
+                .where(t.c.linegeo.ST_DWithin(wkt, distance))\
+                .where(t.c.startnumber != None)\
+                .order_by('distance')\
+                .limit(1)
 
         if parent_place_id is not None:
-            inner = inner.where(t.c.parent_place_id == parent_place_id)
+            sql = sql.where(t.c.parent_place_id == parent_place_id)
 
-        inner = inner.subquery()
+        inner = sql.subquery()
 
         sql = sa.select(inner.c.place_id, inner.c.osm_id,
                         inner.c.parent_place_id, inner.c.address,
                         _interpolated_housenumber(inner),
+                        _interpolated_position(inner),
                         inner.c.postcode, inner.c.country_code,
-                        inner.c.linegeo.ST_LineInterpolatePoint(inner.c.position).label('centroid'),
                         inner.c.distance)
 
         if self.details.geometry_output:
@@ -191,12 +241,13 @@ class ReverseGeocoder:
 
 
     async def _find_tiger_number_for_street(self, parent_place_id: int,
+                                            parent_type: str, parent_id: int,
                                             wkt: WKTElement) -> Optional[SaRow]:
         t = self.conn.t.tiger
 
         inner = sa.select(t,
                           t.c.linegeo.ST_Distance(wkt).label('distance'),
-                          sa.func.ST_LineLocatePoint(t.c.linegeo, wkt).label('position'))\
+                          _locate_interpolation(t, wkt))\
                   .where(t.c.linegeo.ST_DWithin(wkt, 0.001))\
                   .where(t.c.parent_place_id == parent_place_id)\
                   .order_by('distance')\
@@ -205,34 +256,37 @@ class ReverseGeocoder:
 
         sql = sa.select(inner.c.place_id,
                         inner.c.parent_place_id,
+                        sa.literal(parent_type).label('osm_type'),
+                        sa.literal(parent_id).label('osm_id'),
                         _interpolated_housenumber(inner),
+                        _interpolated_position(inner),
                         inner.c.postcode,
-                        inner.c.linegeo.ST_LineInterpolatePoint(inner.c.position).label('centroid'),
                         inner.c.distance)
 
         if self.details.geometry_output:
             sub = sql.subquery()
             sql = self._add_geometry_columns(sql, sub.c.centroid)
 
-        return (await conn.execute(sql)).one_or_none()
+        return (await self.conn.execute(sql)).one_or_none()
 
 
-    async def lookup_street_poi(self, wkt: WKTElement) -> Optional[nres.ReverseResult]:
+    async def lookup_street_poi(self,
+                                wkt: WKTElement) -> Tuple[Optional[SaRow], RowFunc]:
         """ Find a street or POI/address for the given WKT point.
         """
         log().section('Reverse lookup on street/address level')
-        result = None
         distance = 0.006
         parent_place_id = None
 
-        row = await self._find_closest_street_or_poi(wkt)
+        row = await self._find_closest_street_or_poi(wkt, distance)
+        row_func: RowFunc = nres.create_from_placex_row
         log().var_dump('Result (street/building)', row)
 
         # If the closest result was a street, but an address was requested,
         # check for a housenumber nearby which is part of the street.
         if row is not None:
             if self.max_rank > 27 \
-               and self.layer & DataLayer.ADDRESS \
+               and self.layer_enabled(DataLayer.ADDRESS) \
                and row.rank_address <= 27:
                 distance = 0.001
                 parent_place_id = row.place_id
@@ -242,27 +296,34 @@ class ReverseGeocoder:
 
                 if addr_row is not None:
                     row = addr_row
+                    row_func = nres.create_from_placex_row
                     distance = addr_row.distance
                 elif row.country_code == 'us' and parent_place_id is not None:
                     log().comment('Find TIGER housenumber for street')
-                    addr_row = await self._find_tiger_number_for_street(parent_place_id, wkt)
+                    addr_row = await self._find_tiger_number_for_street(parent_place_id,
+                                                                        row.osm_type,
+                                                                        row.osm_id,
+                                                                        wkt)
                     log().var_dump('Result (street Tiger housenumber)', addr_row)
 
                     if addr_row is not None:
-                        result = nres.create_from_tiger_row(addr_row)
+                        row = addr_row
+                        row_func = nres.create_from_tiger_row
             else:
                 distance = row.distance
 
         # Check for an interpolation that is either closer than our result
         # or belongs to a close street found.
-        if self.max_rank > 27 and self.layer & DataLayer.ADDRESS:
+        if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
             log().comment('Find interpolation for street')
-            addr_row = await self._find_interpolation_for_street(parent_place_id, wkt)
+            addr_row = await self._find_interpolation_for_street(parent_place_id,
+                                                                 wkt, distance)
             log().var_dump('Result (street interpolation)', addr_row)
             if addr_row is not None:
-                result = nres.create_from_osmline_row(addr_row)
+                row = addr_row
+                row_func = nres.create_from_osmline_row
 
-        return result or nres.create_from_placex_row(row)
+        return row, row_func
 
 
     async def _lookup_area_address(self, wkt: WKTElement) -> Optional[SaRow]:
@@ -296,13 +357,13 @@ class ReverseGeocoder:
         address_row = (await self.conn.execute(sql)).one_or_none()
         log().var_dump('Result (area)', address_row)
 
-        if address_row is not None and address_row.rank_search < max_rank:
+        if address_row is not None and address_row.rank_search < self.max_rank:
             log().comment('Search for better matching place nodes inside the area')
             inner = sa.select(t,
                               t.c.geometry.ST_Distance(wkt).label('distance'))\
                       .where(t.c.osm_type == 'N')\
                       .where(t.c.rank_search > address_row.rank_search)\
-                      .where(t.c.rank_search <= max_rank)\
+                      .where(t.c.rank_search <= self.max_rank)\
                       .where(t.c.rank_address.between(5, 25))\
                       .where(t.c.name != None)\
                       .where(t.c.indexed_status == 0)\
@@ -315,10 +376,10 @@ class ReverseGeocoder:
                       .limit(50)\
                       .subquery()
 
-            touter = conn.t.placex.alias('outer')
+            touter = self.conn.t.placex.alias('outer')
             sql = _select_from_placex(inner)\
+                  .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
                   .where(touter.c.place_id == address_row.place_id)\
-                  .where(touter.c.geometry.ST_Contains(inner.c.geometry))\
                   .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
                   .limit(1)
@@ -335,7 +396,7 @@ class ReverseGeocoder:
 
 
     async def _lookup_area_others(self, wkt: WKTElement) -> Optional[SaRow]:
-        t = conn.t.placex
+        t = self.conn.t.placex
 
         inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\
                   .where(t.c.rank_address == 0)\
@@ -344,13 +405,16 @@ class ReverseGeocoder:
                   .where(t.c.indexed_status == 0)\
                   .where(t.c.linked_place_id == None)\
                   .where(self._filter_by_layer(t))\
-                  .where(sa.func.reverse_buffered_extent(t.c.geometry, type_=Geometry)
+                  .where(t.c.geometry
+                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
                                 .intersects(wkt))\
                   .order_by(sa.desc(t.c.rank_search))\
-                  .limit(50)
+                  .limit(50)\
+                  .subquery()
 
         sql = _select_from_placex(inner)\
-                  .where(sa._or(inner.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
+                  .where(sa.or_(inner.c.geometry.ST_GeometryType()
+                                                .not_in(('ST_Polygon', 'ST_MultiPolygon')),
                                 inner.c.geometry.ST_Contains(wkt)))\
                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
                   .limit(1)
@@ -363,32 +427,25 @@ class ReverseGeocoder:
         return row
 
 
-    async def lookup_area(self, wkt: WKTElement) -> Optional[nres.ReverseResult]:
+    async def lookup_area(self, wkt: WKTElement) -> Optional[SaRow]:
         """ Lookup large areas for the given WKT point.
         """
         log().section('Reverse lookup by larger area features')
-        t = self.conn.t.placex
 
-        if self.layer & DataLayer.ADDRESS:
+        if self.layer_enabled(DataLayer.ADDRESS):
             address_row = await self._lookup_area_address(wkt)
-            address_distance = address_row.distance
         else:
             address_row = None
-            address_distance = 1000
 
-        if self.layer & (~DataLayer.ADDRESS & ~DataLayer.POI):
+        if self.has_feature_layers():
             other_row = await self._lookup_area_others(wkt)
-            other_distance = other_row.distance
         else:
             other_row = None
-            other_distance = 1000
-
-        result = address_row if address_distance <= other_distance else other_row
 
-        return nres.create_from_placex_row(result)
+        return _get_closest(address_row, other_row)
 
 
-    async def lookup_country(self, wkt: WKTElement) -> Optional[nres.ReverseResult]:
+    async def lookup_country(self, wkt: WKTElement) -> Optional[SaRow]:
         """ Lookup the country for the given WKT point.
         """
         log().section('Reverse lookup by country code')
@@ -402,10 +459,10 @@ class ReverseGeocoder:
         if not ccodes:
             return None
 
-        if self.layer & DataLayer.ADDRESS and self.max_rank > 4:
+        t = self.conn.t.placex
+        if self.max_rank > 4:
             log().comment('Search for place nodes in country')
 
-            t = conn.t.placex
             inner = sa.select(t,
                               t.c.geometry.ST_Distance(wkt).label('distance'))\
                       .where(t.c.osm_type == 'N')\
@@ -436,40 +493,8 @@ class ReverseGeocoder:
         else:
             address_row = None
 
-        if layer & (~DataLayer.ADDRESS & ~DataLayer.POI) and self.max_rank > 4:
-            log().comment('Search for non-address features inside country')
-
-            t = conn.t.placex
-            inner = sa.select(t, t.c.geometry.ST_Distance(wkt).label('distance'))\
-                      .where(t.c.rank_address == 0)\
-                      .where(t.c.rank_search.between(5, self.max_rank))\
-                      .where(t.c.name != None)\
-                      .where(t.c.indexed_status == 0)\
-                      .where(t.c.linked_place_id == None)\
-                      .where(self._filter_by_layer(t))\
-                      .where(t.c.country_code.in_(ccode))\
-                      .where(sa.func.reverse_buffered_extent(t.c.geometry, type_=Geometry)
-                                    .intersects(wkt))\
-                      .order_by(sa.desc(t.c.rank_search))\
-                      .limit(50)\
-                      .subquery()
-
-            sql = _select_from_placex(inner)\
-                      .where(sa._or(inner.c.geometry.ST_GeometryType().not_in(('ST_Polygon', 'ST_MultiPolygon')),
-                                    inner.c.geometry.ST_Contains(wkt)))\
-                      .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
-                      .limit(1)
-
-            sql = self._add_geometry_columns(sql, inner.c.geometry)
-
-            other_row = (await self.conn.execute(sql)).one_or_none()
-            log().var_dump('Result (non-address feature)', other_row)
-        else:
-            other_row = None
-
-        if layer & DataLayer.ADDRESS and address_row is None and other_row is None:
+        if address_row is None:
             # Still nothing, then return a country with the appropriate country code.
-            t = conn.t.placex
             sql = _select_from_placex(t, wkt)\
                       .where(t.c.country_code.in_(ccodes))\
                       .where(t.c.rank_address == 4)\
@@ -477,11 +502,11 @@ class ReverseGeocoder:
                       .where(t.c.linked_place_id == None)\
                       .order_by('distance')
 
-            sql = self._add_geometry_columns(sql, inner.c.geometry)
+            sql = self._add_geometry_columns(sql, t.c.geometry)
 
             address_row = (await self.conn.execute(sql)).one_or_none()
 
-        return nres.create_from_placex_row(_get_closest_row(address_row, other_row))
+        return address_row
 
 
     async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
@@ -495,15 +520,24 @@ class ReverseGeocoder:
 
         wkt = WKTElement(f'POINT({coord[0]} {coord[1]})', srid=4326)
 
-        result: Optional[ReverseResult] = None
+        row: Optional[SaRow] = None
+        row_func: RowFunc = nres.create_from_placex_row
+
+        if self.max_rank >= 26:
+            row, tmp_row_func = await self.lookup_street_poi(wkt)
+            if row is not None:
+                row_func = tmp_row_func
+        if row is None and self.max_rank > 4:
+            row = await self.lookup_area(wkt)
+        if row is None and self.layer_enabled(DataLayer.ADDRESS):
+            row = await self.lookup_country(wkt)
 
-        if max_rank >= 26:
-            result = await self.lookup_street_poi(wkt)
-        if result is None and max_rank > 4:
-            result = await self.lookup_area(wkt)
-        if result is None:
-            result = await self.lookup_country(wkt)
+        result = row_func(row, nres.ReverseResult)
         if result is not None:
+            assert row is not None
+            result.distance = row.distance
+            if hasattr(row, 'bbox'):
+                result.bbox = Bbox.from_wkb(row.bbox.data)
             await nres.add_result_details(self.conn, result, self.details)
 
         return result