]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/search/db_searches.py
more unit tests for search
[nominatim.git] / nominatim / api / search / db_searches.py
index cea19c852836ac7490b33e0511cc574e86ef317a..eb4da5490e5f8ad6377cba288908fccdf9d2e79f 100644 (file)
@@ -7,14 +7,14 @@
 """
 Implementation of the acutal database accesses for forward search.
 """
-from typing import List, Tuple, AsyncIterator, Dict, Any
+from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
 import abc
 
 import sqlalchemy as sa
 from sqlalchemy.dialects.postgresql import ARRAY, array_agg
 
 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
-                             SaExpression, SaSelect, SaRow, SaBind
+                             SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
 from nominatim.api.connection import SearchConnection
 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
 import nominatim.api.results as nres
@@ -46,9 +46,14 @@ VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
-EXCLUDED_PARAM: SaBind = sa.bindparam('excluded')
 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
 
+def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
+    return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
+
+def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
+    return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
+
 def _select_placex(t: SaFromClause) -> SaSelect:
     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
                      t.c.class_, t.c.type,
@@ -56,14 +61,12 @@ def _select_placex(t: SaFromClause) -> SaSelect:
                      t.c.housenumber, t.c.postcode, t.c.country_code,
                      t.c.importance, t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                     t.c.linked_place_id, t.c.admin_level,
                      t.c.centroid,
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
 
-def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
-    if not details.geometry_output:
-        return sql
-
+def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
     out = []
 
     if details.geometry_simplification > 0.0:
@@ -96,7 +99,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
                   for n in numerals)))
 
     if details.excluded:
-        sql = sql.where(table.c.place_id.not_in(EXCLUDED_PARAM))
+        sql = sql.where(_exclude_places(table))
 
     return sql.scalar_subquery()
 
@@ -109,7 +112,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
         orexpr.append(table.c.rank_address.between(1, 29))
         orexpr.append(sa.and_(table.c.rank_address == 30,
                               sa.or_(table.c.housenumber != None,
-                                     table.c.address.has_key('housename'))))
+                                     table.c.address.has_key('addr:housename'))))
     elif layers & DataLayer.POI:
         orexpr.append(sa.and_(table.c.rank_address == 30,
                               table.c.class_.not_in(('place', 'building'))))
@@ -150,7 +153,8 @@ async def _get_placex_housenumbers(conn: SearchConnection,
     t = conn.t.placex
     sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
 
-    sql = _add_geometry_columns(sql, t.c.geometry, details)
+    if details.geometry_output:
+        sql = _add_geometry_columns(sql, t.c.geometry, details)
 
     for row in await conn.execute(sql):
         result = nres.create_from_placex_row(row, nres.SearchResult)
@@ -268,7 +272,7 @@ class NearSearch(AbstractSearch):
         """
         table = await conn.get_class_table(*category)
 
-        t = conn.t.placex.alias('p')
+        t = conn.t.placex
         tgeom = conn.t.placex.alias('pgeom')
 
         sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
@@ -284,17 +288,18 @@ class NearSearch(AbstractSearch):
             # radius for the lookup.
             sql = sql.join(table, t.c.place_id == table.c.place_id)\
                      .join(tgeom,
-                           sa.case((sa.and_(tgeom.c.rank_address < 9,
-                                            tgeom.c.geometry.is_area()),
-                                    tgeom.c.geometry.ST_Contains(table.c.centroid)),
-                                   else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
+                           table.c.centroid.ST_CoveredBy(
+                               sa.case((sa.and_(tgeom.c.rank_address < 9,
+                                                tgeom.c.geometry.is_area()),
+                                        tgeom.c.geometry),
+                                       else_ = tgeom.c.centroid.ST_Expand(0.05))))\
                      .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
 
         sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
         if details.countries:
             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
         if details.layers is not None:
             sql = sql.where(_filter_by_layer(t, details.layers))
 
@@ -313,7 +318,7 @@ class PoiSearch(AbstractSearch):
     """
     def __init__(self, sdata: SearchData) -> None:
         super().__init__(sdata.penalty)
-        self.categories = sdata.qualifiers
+        self.qualifiers = sdata.qualifiers
         self.countries = sdata.countries
 
 
@@ -328,10 +333,22 @@ class PoiSearch(AbstractSearch):
 
         if details.near and details.near_radius is not None and details.near_radius < 0.2:
             # simply search in placex table
-            sql = _select_placex(t) \
-                      .where(t.c.linked_place_id == None) \
-                      .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
-                      .order_by(t.c.centroid.ST_Distance(NEAR_PARAM))
+            def _base_query() -> SaSelect:
+                return _select_placex(t) \
+                           .where(t.c.linked_place_id == None) \
+                           .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
+                           .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
+                           .limit(LIMIT_PARAM)
+
+            classtype = self.qualifiers.values
+            if len(classtype) == 1:
+                cclass, ctype = classtype[0]
+                sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
+                                                 .where(t.c.class_ == cclass)
+                                                 .where(t.c.type == ctype))
+            else:
+                sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
+                                                   for cls, typ in classtype)))
 
             if self.countries:
                 sql = sql.where(t.c.country_code.in_(self.countries.values))
@@ -339,19 +356,10 @@ class PoiSearch(AbstractSearch):
             if details.viewbox is not None and details.bounded_viewbox:
                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
 
-            classtype = self.categories.values
-            if len(classtype) == 1:
-                sql = sql.where(t.c.class_ == classtype[0][0]) \
-                         .where(t.c.type == classtype[0][1])
-            else:
-                sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
-                                         for cls, typ in classtype)))
-
-            sql = sql.limit(LIMIT_PARAM)
             rows.extend(await conn.execute(sql, bind_params))
         else:
             # use the class type tables
-            for category in self.categories.values:
+            for category in self.qualifiers.values:
                 table = await conn.get_class_table(*category)
                 if table is not None:
                     sql = _select_placex(t)\
@@ -377,7 +385,7 @@ class PoiSearch(AbstractSearch):
         for row in rows:
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
-            result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+            result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
@@ -398,26 +406,29 @@ class CountrySearch(AbstractSearch):
         """
         t = conn.t.placex
 
+        ccodes = self.countries.values
         sql = _select_placex(t)\
-                .where(t.c.country_code.in_(self.countries.values))\
+                .where(t.c.country_code.in_(ccodes))\
                 .where(t.c.rank_address == 4)
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
 
         if details.viewbox is not None and details.bounded_viewbox:
-            sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
+            sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
 
         if details.near is not None and details.near_radius is not None:
-            sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+            sql = sql.where(_within_near(t))
 
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+            result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
         return results or await self.lookup_in_country_table(conn, details)
@@ -438,14 +449,15 @@ class CountrySearch(AbstractSearch):
 
         sql = sa.select(tgrid.c.country_code,
                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
-                              .label('centroid'))\
+                              .label('centroid'),
+                        tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
                 .where(tgrid.c.country_code.in_(self.countries.values))\
                 .group_by(tgrid.c.country_code)
 
         if details.viewbox is not None and details.bounded_viewbox:
             sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
         if details.near is not None and details.near_radius is not None:
-            sql = sql.where(tgrid.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+            sql = sql.where(_within_near(tgrid))
 
         sub = sql.subquery('grid')
 
@@ -454,13 +466,17 @@ class CountrySearch(AbstractSearch):
                          + sa.func.coalesce(t.c.derived_name,
                                             sa.cast('', type_=conn.t.types.Composite))
                         ).label('name'),
-                        sub.c.centroid)\
+                        sub.c.centroid, sub.c.bbox)\
                 .join(sub, t.c.country_code == sub.c.country_code)
 
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, sub.c.centroid, details)
+
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
             result = nres.create_from_country_row(row, nres.SearchResult)
             assert result
+            result.bbox = Bbox.from_wkb(row.bbox)
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
             results.append(result)
 
@@ -484,14 +500,16 @@ class PostcodeSearch(AbstractSearch):
         """ Find results for the search in the database.
         """
         t = conn.t.postcode
+        pcs = self.postcodes.values
 
         sql = sa.select(t.c.place_id, t.c.parent_place_id,
                         t.c.rank_search, t.c.rank_address,
                         t.c.postcode, t.c.country_code,
                         t.c.geometry.label('centroid'))\
-                .where(t.c.postcode.in_(self.postcodes.values))
+                .where(t.c.postcode.in_(pcs))
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         penalty: SaExpression = sa.literal(self.penalty)
 
@@ -505,14 +523,14 @@ class PostcodeSearch(AbstractSearch):
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                sql = sql.where(_within_near(t))
             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
 
         if self.countries:
             sql = sql.where(t.c.country_code.in_(self.countries.values))
 
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
 
         if self.lookups:
             assert len(self.lookups) == 1
@@ -562,21 +580,23 @@ class PlaceSearch(AbstractSearch):
                      details: SearchDetails) -> nres.SearchResults:
         """ Find results for the search in the database.
         """
-        t = conn.t.placex.alias('p')
-        tsearch = conn.t.search_name.alias('s')
+        t = conn.t.placex
+        tsearch = conn.t.search_name
 
-        sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
-                        t.c.class_, t.c.type,
-                        t.c.address, t.c.extratags,
-                        t.c.housenumber, t.c.postcode, t.c.country_code,
-                        t.c.wikipedia,
-                        t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
-                        t.c.centroid,
-                        t.c.geometry.ST_Expand(0).label('bbox'))\
-                .where(t.c.place_id == tsearch.c.place_id)
+        sql: SaLambdaSelect = sa.lambda_stmt(lambda:
+                  sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                            t.c.class_, t.c.type,
+                            t.c.address, t.c.extratags, t.c.admin_level,
+                            t.c.housenumber, t.c.postcode, t.c.country_code,
+                            t.c.wikipedia,
+                            t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                            t.c.centroid,
+                            t.c.geometry.ST_Expand(0).label('bbox'))
+                   .where(t.c.place_id == tsearch.c.place_id))
 
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         penalty: SaExpression = sa.literal(self.penalty)
         for ranking in self.rankings:
@@ -592,23 +612,32 @@ class PlaceSearch(AbstractSearch):
             # if a postcode is given, don't search for state or country level objects
             sql = sql.where(tsearch.c.address_rank > 9)
             tpc = conn.t.postcode
+            pcs = self.postcodes.values
             if self.expected_count > 1000:
                 # Many results expected. Restrict by postcode.
                 sql = sql.where(sa.select(tpc.c.postcode)
-                                  .where(tpc.c.postcode.in_(self.postcodes.values))
+                                  .where(tpc.c.postcode.in_(pcs))
                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
                                   .exists())
 
             # Less results, only have a preference for close postcodes
             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
-                      .where(tpc.c.postcode.in_(self.postcodes.values))\
+                      .where(tpc.c.postcode.in_(pcs))\
                       .scalar_subquery()
-            penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0),
+            penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
                                else_=sa.func.coalesce(pc_near, 2.0))
 
         if details.viewbox is not None:
             if details.bounded_viewbox:
-                sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                if details.viewbox.area < 0.2:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
+            elif self.expected_count >= 10000:
+                if details.viewbox.area < 0.5:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
                                    (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
@@ -616,24 +645,33 @@ class PlaceSearch(AbstractSearch):
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
-            sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
+                if details.near_radius < 0.1:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+                                                                           NEAR_RADIUS_PARAM))
+            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
         else:
-            sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
-                                  else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
+            if self.expected_count < 10000\
+               or (details.viewbox is not None and details.viewbox.area < 0.5):
+                sql = sql.order_by(
+                        penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
+                                    else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
             sql = sql.add_columns(t.c.importance)
 
 
-        sql = sql.add_columns(penalty.label('accuracy'))\
-                 .order_by(sa.text('accuracy'))
+        sql = sql.add_columns(penalty.label('accuracy'))
+
+        if self.expected_count < 10000:
+            sql = sql.order_by(sa.text('accuracy'))
 
         if self.housenumbers:
             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
                      .where(sa.or_(tsearch.c.address_rank < 30,
-                                  t.c.housenumber.op('~*')(hnr_regexp)))
+                                   t.c.housenumber.op('~*')(hnr_regexp)))
 
             # Cross check for housenumbers, need to do that on a rather large
             # set. Worst case there are 40.000 main streets in OSM.
@@ -649,13 +687,14 @@ class PlaceSearch(AbstractSearch):
                           .where(thnr.c.indexed_status == 0)
 
             if details.excluded:
-                place_sql = place_sql.where(thnr.c.place_id.not_in(EXCLUDED_PARAM))
+                place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
             if self.qualifiers:
                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
 
-            numerals = [int(n) for n in self.housenumbers.values if n.isdigit()]
-            interpol_sql: SaExpression
-            tiger_sql: SaExpression
+            numerals = [int(n) for n in self.housenumbers.values
+                        if n.isdigit() and len(n) < 8]
+            interpol_sql: SaColumn
+            tiger_sql: SaColumn
             if numerals and \
                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
                 # Housenumbers from interpolations
@@ -667,8 +706,8 @@ class PlaceSearch(AbstractSearch):
                                                                   numerals, details)
                                     ), else_=None)
             else:
-                interpol_sql = sa.literal_column('NULL')
-                tiger_sql = sa.literal_column('NULL')
+                interpol_sql = sa.null()
+                tiger_sql = sa.null()
 
             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
                                interpol_sql.label('interpol_hnr'),
@@ -685,7 +724,7 @@ class PlaceSearch(AbstractSearch):
             if self.qualifiers:
                 sql = sql.where(self.qualifiers.sql_restrict(t))
             if details.excluded:
-                sql = sql.where(tsearch.c.place_id.not_in(EXCLUDED_PARAM))
+                sql = sql.where(_exclude_places(tsearch))
             if details.min_rank > 0:
                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
                                        tsearch.c.search_rank >= MIN_RANK_PARAM))