]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/search/db_searches.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / api / search / db_searches.py
index 68447f6a7429e3c40eea224a433985d4c5a58631..d74812e682a3453a55ea209c83ebf22a61e75a31 100644 (file)
@@ -5,7 +5,7 @@
 # Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 # Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
-Implementation of the acutal database accesses for forward search.
+Implementation of the actual database accesses for forward search.
 """
 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
 import abc
 """
 from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
 import abc
@@ -484,7 +484,14 @@ class CountrySearch(AbstractSearch):
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
-        return results or await self.lookup_in_country_table(conn, details)
+        if not results:
+            results = await self.lookup_in_country_table(conn, details)
+
+        if results:
+            details.min_rank = min(5, details.max_rank)
+            details.max_rank = min(25, details.max_rank)
+
+        return results
 
 
     async def lookup_in_country_table(self, conn: SearchConnection,
 
 
     async def lookup_in_country_table(self, conn: SearchConnection,
@@ -595,10 +602,28 @@ class PostcodeSearch(AbstractSearch):
 
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
 
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
-            result = nres.create_from_postcode_row(row, nres.SearchResult)
+            p = conn.t.placex
+            placex_sql = _select_placex(p).add_columns(p.c.importance)\
+                             .where(sa.text("""class = 'boundary'
+                                               AND type = 'postal_code'
+                                               AND osm_type = 'R'"""))\
+                             .where(p.c.country_code == row.country_code)\
+                             .where(p.c.postcode == row.postcode)\
+                             .limit(1)
+
+            if details.geometry_output:
+                placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details)
+
+            for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
+                result = nres.create_from_placex_row(prow, nres.SearchResult)
+                break
+            else:
+                result = nres.create_from_postcode_row(row, nres.SearchResult)
+
             assert result
             assert result
-            result.accuracy = row.accuracy
-            results.append(result)
+            if result.place_id not in details.excluded:
+                result.accuracy = row.accuracy
+                results.append(result)
 
         return results
 
 
         return results
 
@@ -620,97 +645,145 @@ class PlaceSearch(AbstractSearch):
         self.expected_count = expected_count
 
 
         self.expected_count = expected_count
 
 
-    async def lookup(self, conn: SearchConnection,
-                     details: SearchDetails) -> nres.SearchResults:
-        """ Find results for the search in the database.
+    def _inner_search_name_cte(self, conn: SearchConnection,
+                               details: SearchDetails) -> 'sa.CTE':
+        """ Create a subquery that preselects the rows in the search_name
+            table.
         """
         """
-        t = conn.t.placex
-        tsearch = conn.t.search_name
-
-        sql: SaLambdaSelect = sa.lambda_stmt(lambda:
-                  _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
-
-
-        if details.geometry_output:
-            sql = _add_geometry_columns(sql, t.c.geometry, details)
+        t = conn.t.search_name
 
         penalty: SaExpression = sa.literal(self.penalty)
         for ranking in self.rankings:
 
         penalty: SaExpression = sa.literal(self.penalty)
         for ranking in self.rankings:
-            penalty += ranking.sql_penalty(tsearch)
+            penalty += ranking.sql_penalty(t)
+
+        sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
+                        t.c.country_code, t.c.centroid,
+                        t.c.name_vector, t.c.nameaddress_vector,
+                        sa.case((t.c.importance > 0, t.c.importance),
+                                else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
+                          .label('importance'),
+                        penalty.label('penalty'))
 
         for lookup in self.lookups:
 
         for lookup in self.lookups:
-            sql = sql.where(lookup.sql_condition(tsearch))
+            sql = sql.where(lookup.sql_condition(t))
 
         if self.countries:
 
         if self.countries:
-            sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
+            sql = sql.where(t.c.country_code.in_(self.countries.values))
 
         if self.postcodes:
             # if a postcode is given, don't search for state or country level objects
 
         if self.postcodes:
             # if a postcode is given, don't search for state or country level objects
-            sql = sql.where(tsearch.c.address_rank > 9)
-            tpc = conn.t.postcode
-            pcs = self.postcodes.values
-            if self.expected_count > 5000:
+            sql = sql.where(t.c.address_rank > 9)
+            if self.expected_count > 10000:
                 # Many results expected. Restrict by postcode.
                 # Many results expected. Restrict by postcode.
+                tpc = conn.t.postcode
                 sql = sql.where(sa.select(tpc.c.postcode)
                 sql = sql.where(sa.select(tpc.c.postcode)
-                                  .where(tpc.c.postcode.in_(pcs))
-                                  .where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
+                                  .where(tpc.c.postcode.in_(self.postcodes.values))
+                                  .where(t.c.centroid.within_distance(tpc.c.geometry, 0.4))
                                   .exists())
 
                                   .exists())
 
-            # Less results, only have a preference for close postcodes
-            pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
+        if details.viewbox is not None:
+            if details.bounded_viewbox:
+                sql = sql.where(t.c.centroid
+                                   .intersects(VIEWBOX_PARAM,
+                                               use_index=details.viewbox.area < 0.2))
+            elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
+                sql = sql.where(t.c.centroid
+                                   .intersects(VIEWBOX2_PARAM,
+                                               use_index=details.viewbox.area < 0.5))
+
+        if details.near is not None and details.near_radius is not None:
+            if details.near_radius < 0.1:
+                sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM,
+                                                             NEAR_RADIUS_PARAM))
+            else:
+                sql = sql.where(t.c.centroid
+                                   .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
+
+        if self.housenumbers:
+            sql = sql.where(t.c.address_rank.between(16, 30))
+        else:
+            if details.excluded:
+                sql = sql.where(_exclude_places(t))
+            if details.min_rank > 0:
+                sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
+                                       t.c.search_rank >= MIN_RANK_PARAM))
+            if details.max_rank < 30:
+                sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
+                                       t.c.search_rank <= MAX_RANK_PARAM))
+
+        inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
+
+        sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
+                        inner.c.country_code, inner.c.centroid, inner.c.importance,
+                        inner.c.penalty)
+
+        # If the query is not an address search or has a geographic preference,
+        # preselect most important items to restrict the number of places
+        # that need to be looked up in placex.
+        if not self.housenumbers\
+           and (details.viewbox is None or details.bounded_viewbox)\
+           and (details.near is None or details.near_radius is not None)\
+           and not self.qualifiers:
+            sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
+                                       .over(order_by=inner.c.penalty - inner.c.importance)
+                                       .label('min_penalty'))
+
+            inner = sql.subquery()
+
+            sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
+                            inner.c.country_code, inner.c.centroid, inner.c.importance,
+                            inner.c.penalty)\
+                    .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
+
+        return sql.cte('searches')
+
+
+    async def lookup(self, conn: SearchConnection,
+                     details: SearchDetails) -> nres.SearchResults:
+        """ Find results for the search in the database.
+        """
+        t = conn.t.placex
+        tsearch = self._inner_search_name_cte(conn, details)
+
+        sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id)
+
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
+
+        penalty: SaExpression = tsearch.c.penalty
+
+        if self.postcodes:
+            tpc = conn.t.postcode
+            pcs = self.postcodes.values
+
+            pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\
                       .where(tpc.c.postcode.in_(pcs))\
                       .scalar_subquery()
             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
                                else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
 
                       .where(tpc.c.postcode.in_(pcs))\
                       .scalar_subquery()
             penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
                                else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
 
-        if details.viewbox is not None:
-            if details.bounded_viewbox:
-                sql = sql.where(tsearch.c.centroid
-                                         .intersects(VIEWBOX_PARAM,
-                                                     use_index=details.viewbox.area < 0.2))
-            elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
-                sql = sql.where(tsearch.c.centroid
-                                         .intersects(VIEWBOX2_PARAM,
-                                                     use_index=details.viewbox.area < 0.5))
-            else:
-                penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
-                                   else_=1.0)
+        if details.viewbox is not None and not details.bounded_viewbox:
+            penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
+                               (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
+                               else_=1.0)
 
         if details.near is not None:
 
         if details.near is not None:
-            if details.near_radius is not None:
-                if details.near_radius < 0.1:
-                    sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
-                                                                       NEAR_RADIUS_PARAM))
-                else:
-                    sql = sql.where(tsearch.c.centroid
-                                             .ST_Distance(NEAR_PARAM) <  NEAR_RADIUS_PARAM)
             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
         else:
             sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
         else:
-            if self.expected_count < 10000\
-               or (details.viewbox is not None and details.viewbox.area < 0.5):
-                sql = sql.order_by(
-                        penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
-                                    else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
-            sql = sql.add_columns(t.c.importance)
+            sql = sql.order_by(penalty - tsearch.c.importance)
+            sql = sql.add_columns(tsearch.c.importance)
 
 
 
 
-        sql = sql.add_columns(penalty.label('accuracy'))
-
-        if self.expected_count < 10000:
-            sql = sql.order_by(sa.text('accuracy'))
+        sql = sql.add_columns(penalty.label('accuracy'))\
+                 .order_by(sa.text('accuracy'))
 
         if self.housenumbers:
             hnr_list = '|'.join(self.housenumbers.values)
 
         if self.housenumbers:
             hnr_list = '|'.join(self.housenumbers.values)
-            sql = sql.where(tsearch.c.address_rank.between(16, 30))\
-                     .where(sa.or_(tsearch.c.address_rank < 30,
-                                   sa.func.RegexpWord(hnr_list, t.c.housenumber)))
-
-            # Cross check for housenumbers, need to do that on a rather large
-            # set. Worst case there are 40.000 main streets in OSM.
-            inner = sql.limit(10000).subquery()
+            inner = sql.where(sa.or_(tsearch.c.address_rank < 30,
+                                     sa.func.RegexpWord(hnr_list, t.c.housenumber)))\
+                       .subquery()
 
             # Housenumbers from placex
             thnr = conn.t.placex.alias('hnr')
 
             # Housenumbers from placex
             thnr = conn.t.placex.alias('hnr')
@@ -758,14 +831,6 @@ class PlaceSearch(AbstractSearch):
                      .where(t.c.indexed_status == 0)
             if self.qualifiers:
                 sql = sql.where(self.qualifiers.sql_restrict(t))
                      .where(t.c.indexed_status == 0)
             if self.qualifiers:
                 sql = sql.where(self.qualifiers.sql_restrict(t))
-            if details.excluded:
-                sql = sql.where(_exclude_places(tsearch))
-            if details.min_rank > 0:
-                sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
-                                       tsearch.c.search_rank >= MIN_RANK_PARAM))
-            if details.max_rank < 30:
-                sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
-                                       tsearch.c.search_rank <= MAX_RANK_PARAM))
             if details.layers is not None:
                 sql = sql.where(_filter_by_layer(t, details.layers))
 
             if details.layers is not None:
                 sql = sql.where(_filter_by_layer(t, details.layers))