X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/317cc5c5443ef0a46b70be589fba2737d0685948..c29ffc38e6cef4bb99fd40060be8243ea70e5939:/nominatim/api/search/db_searches.py?ds=sidebyside diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index db35f726..85dc3019 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -7,22 +7,53 @@ """ Implementation of the acutal database accesses for forward search. """ -from typing import List, Tuple, AsyncIterator +from typing import List, Tuple, AsyncIterator, Dict, Any, Callable import abc import sqlalchemy as sa from sqlalchemy.dialects.postgresql import ARRAY, array_agg from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \ - SaExpression, SaSelect, SaRow + SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind from nominatim.api.connection import SearchConnection from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox import nominatim.api.results as nres from nominatim.api.search.db_search_fields import SearchData, WeightedCategories +from nominatim.db.sqlalchemy_types import Geometry #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: + """ Create a dictionary from search parameters that can be used + as bind parameter for SQL execute. + """ + return {'limit': details.max_results, + 'min_rank': details.min_rank, + 'max_rank': details.max_rank, + 'viewbox': details.viewbox, + 'viewbox2': details.viewbox_x2, + 'near': details.near, + 'near_radius': details.near_radius, + 'excluded': details.excluded, + 'countries': details.countries} + + +LIMIT_PARAM: SaBind = sa.bindparam('limit') +MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank') +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') +VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry) +VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry) +NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry) +NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius') +COUNTRIES_PARAM: SaBind = sa.bindparam('countries') + +def _within_near(t: SaFromClause) -> Callable[[], SaExpression]: + return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM) + +def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]: + return lambda: t.c.place_id.not_in(sa.bindparam('excluded')) + def _select_placex(t: SaFromClause) -> SaSelect: return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, @@ -34,23 +65,20 @@ def _select_placex(t: SaFromClause) -> SaSelect: t.c.geometry.ST_Expand(0).label('bbox')) -def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect: - if not details.geometry_output: - return sql - +def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect: out = [] if details.geometry_simplification > 0.0: - col = col.ST_SimplifyPreserveTopology(details.geometry_simplification) + col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification) if details.geometry_output & GeometryFormat.GEOJSON: - out.append(col.ST_AsGeoJSON().label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) if details.geometry_output & GeometryFormat.TEXT: - out.append(col.ST_AsText().label('geometry_text')) + out.append(sa.func.ST_AsText(col).label('geometry_text')) if details.geometry_output & GeometryFormat.KML: - out.append(col.ST_AsKML().label('geometry_kml')) + out.append(sa.func.ST_AsKML(col).label('geometry_kml')) if details.geometry_output & GeometryFormat.SVG: - out.append(col.ST_AsSVG().label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) return sql.add_columns(*out) @@ -70,7 +98,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, for n in numerals))) if details.excluded: - sql = sql.where(table.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(table)) return sql.scalar_subquery() @@ -124,12 +152,13 @@ async def _get_placex_housenumbers(conn: SearchConnection, t = conn.t.placex sql = _select_placex(t).where(t.c.place_id.in_(place_ids)) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) for row in await conn.execute(sql): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) yield result @@ -258,29 +287,27 @@ class NearSearch(AbstractSearch): # radius for the lookup. sql = sql.join(table, t.c.place_id == table.c.place_id)\ .join(tgeom, - sa.case((sa.and_(tgeom.c.rank_address < 9, - tgeom.c.geometry.ST_GeometryType().in_( - ('ST_Polygon', 'ST_MultiPolygon'))), - tgeom.c.geometry.ST_Contains(table.c.centroid)), - else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\ + table.c.centroid.ST_CoveredBy( + sa.case((sa.and_(tgeom.c.rank_address < 9, + tgeom.c.geometry.is_area()), + tgeom.c.geometry), + else_ = tgeom.c.centroid.ST_Expand(0.05))))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) + sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: - sql = sql.where(t.c.country_code.in_(details.countries)) - if details.min_rank > 0: - sql = sql.where(t.c.rank_address >= details.min_rank) - if details.max_rank < 30: - sql = sql.where(t.c.rank_address <= details.max_rank) + sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) - for row in await conn.execute(sql.limit(details.max_results)): + sql = sql.limit(LIMIT_PARAM) + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + penalty - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) @@ -298,33 +325,37 @@ class PoiSearch(AbstractSearch): details: SearchDetails) -> nres.SearchResults: """ Find results for the search in the database. """ + bind_params = _details_to_bind_params(details) t = conn.t.placex rows: List[SaRow] = [] if details.near and details.near_radius is not None and details.near_radius < 0.2: # simply search in placex table - sql = _select_placex(t) \ - .where(t.c.linked_place_id == None) \ - .where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) \ - .order_by(t.c.centroid.ST_Distance(details.near.sql_value())) + def _base_query() -> SaSelect: + return _select_placex(t) \ + .where(t.c.linked_place_id == None) \ + .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \ + .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \ + .limit(LIMIT_PARAM) + + classtype = self.categories.values + if len(classtype) == 1: + cclass, ctype = classtype[0] + sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query() + .where(t.c.class_ == cclass) + .where(t.c.type == ctype)) + else: + sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ) + for cls, typ in classtype))) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) - - classtype = self.categories.values - if len(classtype) == 1: - sql = sql.where(t.c.class_ == classtype[0][0]) \ - .where(t.c.type == classtype[0][1]) - else: - sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ) - for cls, typ in classtype))) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) - rows.extend(await conn.execute(sql.limit(details.max_results))) + rows.extend(await conn.execute(sql, bind_params)) else: # use the class type tables for category in self.categories.values: @@ -336,24 +367,25 @@ class PoiSearch(AbstractSearch): .where(t.c.type == category[1]) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(table.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM)) - if details.near: - sql = sql.order_by(table.c.centroid.ST_Distance(details.near.sql_value()))\ - .where(table.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius or 0.5)) + if details.near and details.near_radius is not None: + sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\ + .where(table.c.centroid.ST_DWithin(NEAR_PARAM, + NEAR_RADIUS_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) - rows.extend(await conn.execute(sql.limit(details.max_results))) + sql = sql.limit(LIMIT_PARAM) + rows.extend(await conn.execute(sql, bind_params)) results = nres.SearchResults() for row in rows: result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type)) - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) return results @@ -373,24 +405,25 @@ class CountrySearch(AbstractSearch): """ t = conn.t.placex - sql = _select_placex(t)\ - .where(t.c.country_code.in_(self.countries.values))\ - .where(t.c.rank_address == 4) + ccodes = self.countries.values + sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_placex(t)\ + .where(t.c.country_code.in_(ccodes))\ + .where(t.c.rank_address == 4)) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM)) if details.near is not None and details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = sql.where(_within_near(t)) results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) @@ -403,6 +436,12 @@ class CountrySearch(AbstractSearch): details: SearchDetails) -> nres.SearchResults: """ Look up the country in the fallback country tables. """ + # Avoid the fallback search when this is a more search. Country results + # usually are in the first batch of results and it is not possible + # to exclude these fallbacks. + if details.excluded: + return nres.SearchResults() + t = conn.t.country_name tgrid = conn.t.country_grid @@ -413,10 +452,9 @@ class CountrySearch(AbstractSearch): .group_by(tgrid.c.country_code) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(tgrid.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM)) if details.near is not None and details.near_radius is not None: - sql = sql.where(tgrid.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = sql.where(_within_near(tgrid)) sub = sql.subquery('grid') @@ -429,7 +467,7 @@ class CountrySearch(AbstractSearch): .join(sub, t.c.country_code == sub.c.country_code) results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_country_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) @@ -455,36 +493,38 @@ class PostcodeSearch(AbstractSearch): """ Find results for the search in the database. """ t = conn.t.postcode + pcs = self.postcodes.values - sql = sa.select(t.c.place_id, t.c.parent_place_id, + sql: SaLambdaSelect = sa.lambda_stmt(lambda: + sa.select(t.c.place_id, t.c.parent_place_id, t.c.rank_search, t.c.rank_address, t.c.postcode, t.c.country_code, - t.c.geometry.label('centroid'))\ - .where(t.c.postcode.in_(self.postcodes.values)) + t.c.geometry.label('centroid')) + .where(t.c.postcode.in_(pcs))) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) penalty: SaExpression = sa.literal(self.penalty) if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), else_=2.0) if details.near is not None: if details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.order_by(t.c.geometry.ST_Distance(details.near.sql_value())) + sql = sql.where(_within_near(t)) + sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) if self.lookups: assert len(self.lookups) == 1 @@ -503,10 +543,10 @@ class PostcodeSearch(AbstractSearch): sql = sql.add_columns(penalty.label('accuracy')) - sql = sql.order_by('accuracy') + sql = sql.order_by('accuracy').limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(details.max_results)): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_postcode_row(row, nres.SearchResult) assert result result.accuracy = row.accuracy @@ -534,22 +574,23 @@ class PlaceSearch(AbstractSearch): details: SearchDetails) -> nres.SearchResults: """ Find results for the search in the database. """ - t = conn.t.placex.alias('p') - tsearch = conn.t.search_name.alias('s') - limit = details.max_results + t = conn.t.placex + tsearch = conn.t.search_name - sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, - t.c.class_, t.c.type, - t.c.address, t.c.extratags, - t.c.housenumber, t.c.postcode, t.c.country_code, - t.c.wikipedia, - t.c.parent_place_id, t.c.rank_address, t.c.rank_search, - t.c.centroid, - t.c.geometry.ST_Expand(0).label('bbox'))\ - .where(t.c.place_id == tsearch.c.place_id) + sql: SaLambdaSelect = sa.lambda_stmt(lambda: + sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, + t.c.class_, t.c.type, + t.c.address, t.c.extratags, + t.c.housenumber, t.c.postcode, t.c.country_code, + t.c.wikipedia, + t.c.parent_place_id, t.c.rank_address, t.c.rank_search, + t.c.centroid, + t.c.geometry.ST_Expand(0).label('bbox')) + .where(t.c.place_id == tsearch.c.place_id)) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) penalty: SaExpression = sa.literal(self.penalty) for ranking in self.rankings: @@ -562,34 +603,36 @@ class PlaceSearch(AbstractSearch): sql = sql.where(tsearch.c.country_code.in_(self.countries.values)) if self.postcodes: + # if a postcode is given, don't search for state or country level objects + sql = sql.where(tsearch.c.address_rank > 9) tpc = conn.t.postcode + pcs = self.postcodes.values if self.expected_count > 1000: # Many results expected. Restrict by postcode. - sql = sql.where(sa.select(tpc.c.postcode) - .where(tpc.c.postcode.in_(self.postcodes.values)) + sql = sql.where(lambda: sa.select(tpc.c.postcode) + .where(tpc.c.postcode.in_(pcs)) .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12)) .exists()) # Less results, only have a preference for close postcodes pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\ - .where(tpc.c.postcode.in_(self.postcodes.values))\ + .where(tpc.c.postcode.in_(pcs))\ .scalar_subquery() - penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), + penalty += sa.case((t.c.postcode.in_(pcs), 0.0), else_=sa.func.coalesce(pc_near, 2.0)) if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(tsearch.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), else_=2.0) if details.near is not None: if details.near_radius is not None: - sql = sql.where(tsearch.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(details.near.sql_value()) + sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM) .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) else: @@ -605,7 +648,7 @@ class PlaceSearch(AbstractSearch): hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" sql = sql.where(tsearch.c.address_rank.between(16, 30))\ .where(sa.or_(tsearch.c.address_rank < 30, - t.c.housenumber.regexp_match(hnr_regexp, flags='i'))) + t.c.housenumber.op('~*')(hnr_regexp))) # Cross check for housenumbers, need to do that on a rather large # set. Worst case there are 40.000 main streets in OSM. @@ -616,18 +659,18 @@ class PlaceSearch(AbstractSearch): pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call] place_sql = sa.select(pid_list)\ .where(thnr.c.parent_place_id == inner.c.place_id)\ - .where(thnr.c.housenumber.regexp_match(hnr_regexp, flags='i'))\ + .where(thnr.c.housenumber.op('~*')(hnr_regexp))\ .where(thnr.c.linked_place_id == None)\ .where(thnr.c.indexed_status == 0) if details.excluded: - place_sql = place_sql.where(thnr.c.place_id.not_in(details.excluded)) + place_sql = place_sql.where(_exclude_places(thnr)) if self.qualifiers: place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr)) numerals = [int(n) for n in self.housenumbers.values if n.isdigit()] - interpol_sql: SaExpression - tiger_sql: SaExpression + interpol_sql: SaColumn + tiger_sql: SaColumn if numerals and \ (not self.qualifiers or ('place', 'house') in self.qualifiers.values): # Housenumbers from interpolations @@ -639,8 +682,8 @@ class PlaceSearch(AbstractSearch): numerals, details) ), else_=None) else: - interpol_sql = sa.literal_column('NULL') - tiger_sql = sa.literal_column('NULL') + interpol_sql = sa.null() + tiger_sql = sa.null() unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'), interpol_sql.label('interpol_hnr'), @@ -657,22 +700,23 @@ class PlaceSearch(AbstractSearch): if self.qualifiers: sql = sql.where(self.qualifiers.sql_restrict(t)) if details.excluded: - sql = sql.where(tsearch.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(tsearch)) if details.min_rank > 0: - sql = sql.where(sa.or_(tsearch.c.address_rank >= details.min_rank, - tsearch.c.search_rank >= details.min_rank)) + sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM, + tsearch.c.search_rank >= MIN_RANK_PARAM)) if details.max_rank < 30: - sql = sql.where(sa.or_(tsearch.c.address_rank <= details.max_rank, - tsearch.c.search_rank <= details.max_rank)) + sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM, + tsearch.c.search_rank <= MAX_RANK_PARAM)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) + sql = sql.limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(limit)): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) result.accuracy = row.accuracy if not details.excluded or not result.place_id in details.excluded: results.append(result)