]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/search/db_searches.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / api / search / db_searches.py
index 85dc30193f8991aaaafa02cc3031c819c609ec21..232f816ef89609f050ea15e79f3651410222ef86 100644 (file)
@@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry
 #pylint: disable=singleton-comparison,not-callable
 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
 
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
     """ Create a dictionary from search parameters that can be used
         as bind parameter for SQL execute.
@@ -59,8 +66,9 @@ def _select_placex(t: SaFromClause) -> SaSelect:
                      t.c.class_, t.c.type,
                      t.c.address, t.c.extratags,
                      t.c.housenumber, t.c.postcode, t.c.country_code,
-                     t.c.importance, t.c.wikipedia,
+                     t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                     t.c.linked_place_id, t.c.admin_level,
                      t.c.centroid,
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
@@ -72,13 +80,13 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet
         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
 
     if details.geometry_output & GeometryFormat.GEOJSON:
-        out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+        out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
     if details.geometry_output & GeometryFormat.TEXT:
         out.append(sa.func.ST_AsText(col).label('geometry_text'))
     if details.geometry_output & GeometryFormat.KML:
-        out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+        out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
     if details.geometry_output & GeometryFormat.SVG:
-        out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+        out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
 
     return sql.add_columns(*out)
 
@@ -106,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
     orexpr: List[SaExpression] = []
     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
-        orexpr.append(table.c.rank_address.between(1, 30))
+        orexpr.append(no_index(table.c.rank_address).between(1, 30))
     elif layers & DataLayer.ADDRESS:
-        orexpr.append(table.c.rank_address.between(1, 29))
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(no_index(table.c.rank_address).between(1, 29))
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                               sa.or_(table.c.housenumber != None,
-                                     table.c.address.has_key('housename'))))
+                                     table.c.address.has_key('addr:housename'))))
     elif layers & DataLayer.POI:
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                               table.c.class_.not_in(('place', 'building'))))
 
     if layers & DataLayer.MANMADE:
@@ -123,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
         if not layers & DataLayer.NATURAL:
             exclude.extend(('natural', 'water', 'waterway'))
         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
     else:
         include = []
         if layers & DataLayer.RAILWAY:
@@ -131,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
         if layers & DataLayer.NATURAL:
             include.extend(('natural', 'water', 'waterway'))
         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
 
     if len(orexpr) == 1:
         return orexpr[0]
@@ -150,7 +158,8 @@ async def _get_placex_housenumbers(conn: SearchConnection,
                                    place_ids: List[int],
                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
     t = conn.t.placex
-    sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
+    sql = _select_placex(t).add_columns(t.c.importance)\
+                           .where(t.c.place_id.in_(place_ids))
 
     if details.geometry_output:
         sql = _add_geometry_columns(sql, t.c.geometry, details)
@@ -247,9 +256,20 @@ class NearSearch(AbstractSearch):
 
         base.sort(key=lambda r: (r.accuracy, r.rank_search))
         max_accuracy = base[0].accuracy + 0.5
+        if base[0].rank_address == 0:
+            min_rank = 0
+            max_rank = 0
+        elif base[0].rank_address < 26:
+            min_rank = 1
+            max_rank = min(25, base[0].rank_address + 4)
+        else:
+            min_rank = 26
+            max_rank = 30
         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
                                                      and r.accuracy <= max_accuracy
-                                                     and r.bbox and r.bbox.area < 20)
+                                                     and r.bbox and r.bbox.area < 20
+                                                     and r.rank_address >= min_rank
+                                                     and r.rank_address <= max_rank)
 
         if base:
             baseids = [b.place_id for b in base[:5] if b.place_id]
@@ -271,30 +291,39 @@ class NearSearch(AbstractSearch):
         """
         table = await conn.get_class_table(*category)
 
-        t = conn.t.placex.alias('p')
         tgeom = conn.t.placex.alias('pgeom')
 
-        sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
-                               .where(t.c.class_ == category[0])\
-                               .where(t.c.type == category[1])
-
         if table is None:
             # No classtype table available, do a simplified lookup in placex.
-            sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
-                     .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
+            table = conn.t.placex.alias('inner')
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
+                    .where(table.c.class_ == category[0])\
+                    .where(table.c.type == category[1])
         else:
             # Use classtype table. We can afford to use a larger
             # radius for the lookup.
-            sql = sql.join(table, t.c.place_id == table.c.place_id)\
-                     .join(tgeom,
-                           table.c.centroid.ST_CoveredBy(
-                               sa.case((sa.and_(tgeom.c.rank_address < 9,
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom,
+                          table.c.centroid.ST_CoveredBy(
+                              sa.case((sa.and_(tgeom.c.rank_address > 9,
                                                 tgeom.c.geometry.is_area()),
-                                        tgeom.c.geometry),
-                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))\
-                     .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                                       tgeom.c.geometry),
+                                      else_ = tgeom.c.centroid.ST_Expand(0.05))))
 
-        sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
+        inner = sql.where(tgeom.c.place_id.in_(ids))\
+                   .group_by(table.c.place_id).subquery()
+
+        t = conn.t.placex
+        sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
+                               .join(inner, inner.c.place_id == t.c.place_id)\
+                               .order_by(inner.c.dist)
+
+        sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
         if details.countries:
             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
         if details.excluded:
@@ -317,7 +346,7 @@ class PoiSearch(AbstractSearch):
     """
     def __init__(self, sdata: SearchData) -> None:
         super().__init__(sdata.penalty)
-        self.categories = sdata.qualifiers
+        self.qualifiers = sdata.qualifiers
         self.countries = sdata.countries
 
 
@@ -334,12 +363,14 @@ class PoiSearch(AbstractSearch):
             # simply search in placex table
             def _base_query() -> SaSelect:
                 return _select_placex(t) \
+                           .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
+                                         .label('importance'))\
                            .where(t.c.linked_place_id == None) \
                            .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
                            .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
                            .limit(LIMIT_PARAM)
 
-            classtype = self.categories.values
+            classtype = self.qualifiers.values
             if len(classtype) == 1:
                 cclass, ctype = classtype[0]
                 sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
@@ -358,10 +389,11 @@ class PoiSearch(AbstractSearch):
             rows.extend(await conn.execute(sql, bind_params))
         else:
             # use the class type tables
-            for category in self.categories.values:
+            for category in self.qualifiers.values:
                 table = await conn.get_class_table(*category)
                 if table is not None:
                     sql = _select_placex(t)\
+                               .add_columns(t.c.importance)\
                                .join(table, t.c.place_id == table.c.place_id)\
                                .where(t.c.class_ == category[0])\
                                .where(t.c.type == category[1])
@@ -384,7 +416,7 @@ class PoiSearch(AbstractSearch):
         for row in rows:
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
-            result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+            result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
@@ -406,9 +438,10 @@ class CountrySearch(AbstractSearch):
         t = conn.t.placex
 
         ccodes = self.countries.values
-        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_placex(t)\
+        sql = _select_placex(t)\
+                .add_columns(t.c.importance)\
                 .where(t.c.country_code.in_(ccodes))\
-                .where(t.c.rank_address == 4))
+                .where(t.c.rank_address == 4)
 
         if details.geometry_output:
             sql = _add_geometry_columns(sql, t.c.geometry, details)
@@ -427,6 +460,7 @@ class CountrySearch(AbstractSearch):
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+            result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
         return results or await self.lookup_in_country_table(conn, details)
@@ -447,7 +481,8 @@ class CountrySearch(AbstractSearch):
 
         sql = sa.select(tgrid.c.country_code,
                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
-                              .label('centroid'))\
+                              .label('centroid'),
+                        tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
                 .where(tgrid.c.country_code.in_(self.countries.values))\
                 .group_by(tgrid.c.country_code)
 
@@ -463,13 +498,17 @@ class CountrySearch(AbstractSearch):
                          + sa.func.coalesce(t.c.derived_name,
                                             sa.cast('', type_=conn.t.types.Composite))
                         ).label('name'),
-                        sub.c.centroid)\
+                        sub.c.centroid, sub.c.bbox)\
                 .join(sub, t.c.country_code == sub.c.country_code)
 
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, sub.c.centroid, details)
+
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
             result = nres.create_from_country_row(row, nres.SearchResult)
             assert result
+            result.bbox = Bbox.from_wkb(row.bbox)
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
             results.append(result)
 
@@ -495,12 +534,11 @@ class PostcodeSearch(AbstractSearch):
         t = conn.t.postcode
         pcs = self.postcodes.values
 
-        sql: SaLambdaSelect = sa.lambda_stmt(lambda:
-                sa.select(t.c.place_id, t.c.parent_place_id,
+        sql = sa.select(t.c.place_id, t.c.parent_place_id,
                         t.c.rank_search, t.c.rank_address,
                         t.c.postcode, t.c.country_code,
-                        t.c.geometry.label('centroid'))
-                .where(t.c.postcode.in_(pcs)))
+                        t.c.geometry.label('centroid'))\
+                .where(t.c.postcode.in_(pcs))
 
         if details.geometry_output:
             sql = _add_geometry_columns(sql, t.c.geometry, details)
@@ -512,8 +550,8 @@ class PostcodeSearch(AbstractSearch):
                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
-                                   else_=2.0)
+                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                                   else_=1.0)
 
         if details.near is not None:
             if details.near_radius is not None:
@@ -578,15 +616,7 @@ class PlaceSearch(AbstractSearch):
         tsearch = conn.t.search_name
 
         sql: SaLambdaSelect = sa.lambda_stmt(lambda:
-                  sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
-                            t.c.class_, t.c.type,
-                            t.c.address, t.c.extratags,
-                            t.c.housenumber, t.c.postcode, t.c.country_code,
-                            t.c.wikipedia,
-                            t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
-                            t.c.centroid,
-                            t.c.geometry.ST_Expand(0).label('bbox'))
-                   .where(t.c.place_id == tsearch.c.place_id))
+                  _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
 
 
         if details.geometry_output:
@@ -609,7 +639,7 @@ class PlaceSearch(AbstractSearch):
             pcs = self.postcodes.values
             if self.expected_count > 1000:
                 # Many results expected. Restrict by postcode.
-                sql = sql.where(lambda: sa.select(tpc.c.postcode)
+                sql = sql.where(sa.select(tpc.c.postcode)
                                   .where(tpc.c.postcode.in_(pcs))
                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
                                   .exists())
@@ -623,26 +653,43 @@ class PlaceSearch(AbstractSearch):
 
         if details.viewbox is not None:
             if details.bounded_viewbox:
-                sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                if details.viewbox.area < 0.2:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
+            elif self.expected_count >= 10000:
+                if details.viewbox.area < 0.5:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
-                                   else_=2.0)
+                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                                   else_=1.0)
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
-            sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
+                if details.near_radius < 0.1:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+                                                                           NEAR_RADIUS_PARAM))
+            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
         else:
-            sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
-                                  else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
+            if self.expected_count < 10000\
+               or (details.viewbox is not None and details.viewbox.area < 0.5):
+                sql = sql.order_by(
+                        penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
+                                    else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
             sql = sql.add_columns(t.c.importance)
 
 
-        sql = sql.add_columns(penalty.label('accuracy'))\
-                 .order_by(sa.text('accuracy'))
+        sql = sql.add_columns(penalty.label('accuracy'))
+
+        if self.expected_count < 10000:
+            sql = sql.order_by(sa.text('accuracy'))
 
         if self.housenumbers:
             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
@@ -664,11 +711,12 @@ class PlaceSearch(AbstractSearch):
                           .where(thnr.c.indexed_status == 0)
 
             if details.excluded:
-                place_sql = place_sql.where(_exclude_places(thnr))
+                place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
             if self.qualifiers:
                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
 
-            numerals = [int(n) for n in self.housenumbers.values if n.isdigit()]
+            numerals = [int(n) for n in self.housenumbers.values
+                        if n.isdigit() and len(n) < 8]
             interpol_sql: SaColumn
             tiger_sql: SaColumn
             if numerals and \
@@ -718,9 +766,6 @@ class PlaceSearch(AbstractSearch):
             assert result
             result.bbox = Bbox.from_wkb(row.bbox)
             result.accuracy = row.accuracy
-            if not details.excluded or not result.place_id in details.excluded:
-                results.append(result)
-
             if self.housenumbers and row.rank_address < 30:
                 if row.placex_hnr:
                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
@@ -740,6 +785,14 @@ class PlaceSearch(AbstractSearch):
                             sub.accuracy += 0.6
                         results.append(sub)
 
-                result.accuracy += 1.0 # penalty for missing housenumber
+                # Only add the street as a result, if it meets all other
+                # filter conditions.
+                if (not details.excluded or result.place_id not in details.excluded)\
+                   and (not self.qualifiers or result.category in self.qualifiers.values)\
+                   and result.rank_address >= details.min_rank:
+                    result.accuracy += 1.0 # penalty for missing housenumber
+                    results.append(result)
+            else:
+                results.append(result)
 
         return results