]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/reverse.py
add penalty for single words that look like stop words
[nominatim.git] / nominatim / api / reverse.py
index 63836b4924f18589ddec07b52bc78ff11f942e64..e16742cfa34170f6a8afbd0da75b6c945d7635d7 100644 (file)
@@ -19,7 +19,6 @@ import nominatim.api.results as nres
 from nominatim.api.logging import log
 from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
 from nominatim.db.sqlalchemy_types import Geometry
-import nominatim.db.sqlalchemy_functions as snfn
 
 # In SQLAlchemy expression which compare with NULL need to be expressed with
 # the equal sign.
@@ -57,6 +56,7 @@ def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
                      t.c.importance, t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
                      centroid,
+                     t.c.linked_place_id, t.c.admin_level,
                      distance.label('distance'),
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
@@ -84,12 +84,6 @@ def _locate_interpolation(table: SaFromClause) -> SaLabel:
                    else_=0).label('position')
 
 
-def _is_address_point(table: SaFromClause) -> SaColumn:
-    return sa.and_(table.c.rank_address == 30,
-                   sa.or_(table.c.housenumber != None,
-                          table.c.name.has_key('addr:housename')))
-
-
 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
     return min(rows, key=lambda row: 1000 if row is None else row.distance)
 
@@ -99,9 +93,11 @@ class ReverseGeocoder:
         coordinate.
     """
 
-    def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None:
+    def __init__(self, conn: SearchConnection, params: ReverseDetails,
+                 restrict_to_country_areas: bool = False) -> None:
         self.conn = conn
         self.params = params
+        self.restrict_to_country_areas = restrict_to_country_areas
 
         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
 
@@ -144,13 +140,13 @@ class ReverseGeocoder:
             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
 
         if self.params.geometry_output & GeometryFormat.GEOJSON:
-            out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+            out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
         if self.params.geometry_output & GeometryFormat.TEXT:
             out.append(sa.func.ST_AsText(col).label('geometry_text'))
         if self.params.geometry_output & GeometryFormat.KML:
-            out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+            out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
         if self.params.geometry_output & GeometryFormat.SVG:
-            out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+            out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
 
         return sql.add_columns(*out)
 
@@ -179,12 +175,12 @@ class ReverseGeocoder:
         t = self.conn.t.placex
 
         # PostgreSQL must not get the distance as a parameter because
-        # there is a danger it won't be able to proberly estimate index use
+        # there is a danger it won't be able to properly estimate index use
         # when used with prepared statements
         diststr = sa.text(f"{distance}")
 
         sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
+                .where(t.c.geometry.within_distance(WKT_PARAM, diststr))
                 .where(t.c.indexed_status == 0)
                 .where(t.c.linked_place_id == None)
                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
@@ -201,7 +197,7 @@ class ReverseGeocoder:
             max_rank = min(29, self.max_rank)
             restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
             if self.max_rank == 30:
-                restrict.append(lambda: _is_address_point(t))
+                restrict.append(lambda: sa.func.IsAddressPoint(t))
         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
             restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
                                             t.c.class_.not_in(('place', 'building')),
@@ -222,17 +218,21 @@ class ReverseGeocoder:
     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
         t = self.conn.t.placex
 
-        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
-                .where(t.c.parent_place_id == parent_place_id)
-                .where(_is_address_point(t))
-                .where(t.c.indexed_status == 0)
-                .where(t.c.linked_place_id == None)
-                .order_by('distance')
-                .limit(1))
-
+        def _base_query() -> SaSelect:
+            return _select_from_placex(t)\
+                .where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
+                .where(t.c.parent_place_id == parent_place_id)\
+                .where(sa.func.IsAddressPoint(t))\
+                .where(t.c.indexed_status == 0)\
+                .where(t.c.linked_place_id == None)\
+                .order_by('distance')\
+                .limit(1)
+
+        sql: SaLambdaSelect
         if self.has_geometries():
-            sql = self._add_geometry_columns(sql, t.c.geometry)
+            sql = self._add_geometry_columns(_base_query(), t.c.geometry)
+        else:
+            sql = sa.lambda_stmt(_base_query)
 
         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
@@ -241,30 +241,26 @@ class ReverseGeocoder:
                                              distance: float) -> Optional[SaRow]:
         t = self.conn.t.osmline
 
-        sql: Any = sa.lambda_stmt(lambda:
-                   sa.select(t,
-                             t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
-                             _locate_interpolation(t))
-                     .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
-                     .where(t.c.startnumber != None)
-                     .order_by('distance')
-                     .limit(1))
+        sql = sa.select(t,
+                        t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+                        _locate_interpolation(t))\
+                .where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
+                .where(t.c.startnumber != None)\
+                .order_by('distance')\
+                .limit(1)
 
         if parent_place_id is not None:
-            sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
+            sql = sql.where(t.c.parent_place_id == parent_place_id)
 
-        def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
-            inner = base_sql.subquery('ipol')
+        inner = sql.subquery('ipol')
 
-            return sa.select(inner.c.place_id, inner.c.osm_id,
+        sql = sa.select(inner.c.place_id, inner.c.osm_id,
                              inner.c.parent_place_id, inner.c.address,
                              _interpolated_housenumber(inner),
                              _interpolated_position(inner),
                              inner.c.postcode, inner.c.country_code,
                              inner.c.distance)
 
-        sql += _wrap_query
-
         if self.has_geometries():
             sub = sql.subquery('geom')
             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
@@ -279,7 +275,7 @@ class ReverseGeocoder:
             inner = sa.select(t,
                               t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
                               _locate_interpolation(t))\
-                      .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
+                      .where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
                       .where(t.c.parent_place_id == parent_place_id)\
                       .order_by('distance')\
                       .limit(1)\
@@ -292,11 +288,12 @@ class ReverseGeocoder:
                              inner.c.postcode,
                              inner.c.distance)
 
-        sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
-
+        sql: SaLambdaSelect
         if self.has_geometries():
-            sub = sql.subquery('geom')
+            sub = _base_query().subquery('geom')
             sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
+        else:
+            sql = sa.lambda_stmt(_base_query)
 
         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
@@ -368,7 +365,7 @@ class ReverseGeocoder:
             inner = sa.select(t, sa.literal(0.0).label('distance'))\
                       .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
                       .where(t.c.geometry.intersects(WKT_PARAM))\
-                      .where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
+                      .where(sa.func.PlacexGeometryReverseLookuppolygon())\
                       .order_by(sa.desc(t.c.rank_search))\
                       .limit(50)\
                       .subquery('area')
@@ -398,10 +395,7 @@ class ReverseGeocoder:
                       .where(t.c.rank_search > address_rank)\
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
                       .where(t.c.indexed_status == 0)\
-                      .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
-                      .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
                       .order_by(sa.desc(t.c.rank_search))\
                       .limit(50)\
                       .subquery('places')
@@ -410,13 +404,15 @@ class ReverseGeocoder:
                 return _select_from_placex(inner, False)\
                     .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
                     .where(touter.c.place_id == address_id)\
-                    .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
                     .limit(1)
 
-            sql = sa.lambda_stmt(_place_inside_area_query)
             if self.has_geometries():
-                sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
+                sql = self._add_geometry_columns(_place_inside_area_query(),
+                                                 sa.literal_column('places.geometry'))
+            else:
+                sql = sa.lambda_stmt(_place_inside_area_query)
 
             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
             log().var_dump('Result (place node)', place_address_row)
@@ -437,10 +433,9 @@ class ReverseGeocoder:
                   .where(t.c.indexed_status == 0)\
                   .where(t.c.linked_place_id == None)\
                   .where(self._filter_by_layer(t))\
-                  .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                  .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
                   .order_by(sa.desc(t.c.rank_search))\
+                  .order_by('distance')\
                   .limit(50)\
                   .subquery()
 
@@ -477,16 +472,24 @@ class ReverseGeocoder:
         return _get_closest(address_row, other_row)
 
 
-    async def lookup_country(self) -> Optional[SaRow]:
+    async def lookup_country_codes(self) -> List[str]:
         """ Lookup the country for the current search.
         """
         log().section('Reverse lookup by country code')
         t = self.conn.t.country_grid
-        sql: SaLambdaSelect = sa.select(t.c.country_code).distinct()\
+        sql = sa.select(t.c.country_code).distinct()\
                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
 
-        ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
+        ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
         log().var_dump('Country codes', ccodes)
+        return ccodes
+
+
+    async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
+        """ Lookup the country for the current search.
+        """
+        if not ccodes:
+            ccodes = await self.lookup_country_codes()
 
         if not ccodes:
             return None
@@ -503,22 +506,22 @@ class ReverseGeocoder:
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
                       .where(t.c.indexed_status == 0)\
                       .where(t.c.country_code.in_(ccodes))\
-                      .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
-                      .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
                       .order_by(sa.desc(t.c.rank_search))\
                       .limit(50)\
                       .subquery('area')
 
                 return _select_from_placex(inner, False)\
-                    .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
                     .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
                     .limit(1)
 
-            sql = sa.lambda_stmt(_base_query)
+            sql: SaLambdaSelect
             if self.has_geometries():
-                sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
+                sql = self._add_geometry_columns(_base_query(),
+                                                 sa.literal_column('area.geometry'))
+            else:
+                sql = sa.lambda_stmt(_base_query)
 
             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
             log().var_dump('Result (addressable place node)', address_row)
@@ -527,16 +530,19 @@ class ReverseGeocoder:
 
         if address_row is None:
             # Still nothing, then return a country with the appropriate country code.
-            sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
-                      .where(t.c.country_code.in_(ccodes))\
-                      .where(t.c.rank_address == 4)\
-                      .where(t.c.rank_search == 4)\
-                      .where(t.c.linked_place_id == None)\
-                      .order_by('distance')\
-                      .limit(1))
+            def _country_base_query() -> SaSelect:
+                return _select_from_placex(t)\
+                         .where(t.c.country_code.in_(ccodes))\
+                         .where(t.c.rank_address == 4)\
+                         .where(t.c.rank_search == 4)\
+                         .where(t.c.linked_place_id == None)\
+                         .order_by('distance')\
+                         .limit(1)
 
             if self.has_geometries():
-                sql = self._add_geometry_columns(sql, t.c.geometry)
+                sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
+            else:
+                sql = sa.lambda_stmt(_country_base_query)
 
             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
@@ -559,10 +565,19 @@ class ReverseGeocoder:
             row, tmp_row_func = await self.lookup_street_poi()
             if row is not None:
                 row_func = tmp_row_func
-        if row is None and self.max_rank > 4:
-            row = await self.lookup_area()
-        if row is None and self.layer_enabled(DataLayer.ADDRESS):
-            row = await self.lookup_country()
+
+        if row is None:
+            if self.restrict_to_country_areas:
+                ccodes = await self.lookup_country_codes()
+                if not ccodes:
+                    return None
+            else:
+                ccodes = []
+
+            if self.max_rank > 4:
+                row = await self.lookup_area()
+            if row is None and self.layer_enabled(DataLayer.ADDRESS):
+                row = await self.lookup_country(ccodes)
 
         result = row_func(row, nres.ReverseResult)
         if result is not None: