X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d0a1e8e3110bfe3dea221933ab4e8964e8d74f66..8c54f9d7044c7f354fcad43de033b4a1cfc1ac56:/nominatim/api/search/db_searches.py diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index ba4a3995..d74812e6 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -5,59 +5,111 @@ # Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ -Implementation of the acutal database accesses for forward search. +Implementation of the actual database accesses for forward search. """ -from typing import List, Tuple, AsyncIterator +from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast import abc import sqlalchemy as sa -from sqlalchemy.dialects.postgresql import ARRAY, array_agg from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \ - SaExpression, SaSelect, SaRow + SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind from nominatim.api.connection import SearchConnection from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox import nominatim.api.results as nres from nominatim.api.search.db_search_fields import SearchData, WeightedCategories +from nominatim.db.sqlalchemy_types import Geometry, IntArray #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + + +def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: + """ Create a dictionary from search parameters that can be used + as bind parameter for SQL execute. + """ + return {'limit': details.max_results, + 'min_rank': details.min_rank, + 'max_rank': details.max_rank, + 'viewbox': details.viewbox, + 'viewbox2': details.viewbox_x2, + 'near': details.near, + 'near_radius': details.near_radius, + 'excluded': details.excluded, + 'countries': details.countries} + + +LIMIT_PARAM: SaBind = sa.bindparam('limit') +MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank') +MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') +VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry) +VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry) +NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry) +NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius') +COUNTRIES_PARAM: SaBind = sa.bindparam('countries') + + +def filter_by_area(sql: SaSelect, t: SaFromClause, + details: SearchDetails, avoid_index: bool = False) -> SaSelect: + """ Apply SQL statements for filtering by viewbox and near point, + if applicable. + """ + if details.near is not None and details.near_radius is not None: + if details.near_radius < 0.1 and not avoid_index: + sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) + else: + sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM) + if details.viewbox is not None and details.bounded_viewbox: + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM, + use_index=not avoid_index and + details.viewbox.area < 0.2)) + + return sql + + +def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]: + return lambda: t.c.place_id.not_in(sa.bindparam('excluded')) + + def _select_placex(t: SaFromClause) -> SaSelect: return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, t.c.address, t.c.extratags, t.c.housenumber, t.c.postcode, t.c.country_code, - t.c.importance, t.c.wikipedia, + t.c.wikipedia, t.c.parent_place_id, t.c.rank_address, t.c.rank_search, + t.c.linked_place_id, t.c.admin_level, t.c.centroid, t.c.geometry.ST_Expand(0).label('bbox')) -def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect: - if not details.geometry_output: - return sql - +def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect: out = [] if details.geometry_simplification > 0.0: - col = col.ST_SimplifyPreserveTopology(details.geometry_simplification) + col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification) if details.geometry_output & GeometryFormat.GEOJSON: - out.append(col.ST_AsGeoJSON().label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson')) if details.geometry_output & GeometryFormat.TEXT: - out.append(col.ST_AsText().label('geometry_text')) + out.append(sa.func.ST_AsText(col).label('geometry_text')) if details.geometry_output & GeometryFormat.KML: - out.append(col.ST_AsKML().label('geometry_kml')) + out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml')) if details.geometry_output & GeometryFormat.SVG: - out.append(col.ST_AsSVG().label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg')) return sql.add_columns(*out) def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, numerals: List[int], details: SearchDetails) -> SaScalarSelect: - all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call] + all_ids = sa.func.ArrayAgg(table.c.place_id) sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id) if len(numerals) == 1: @@ -70,7 +122,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, for n in numerals))) if details.excluded: - sql = sql.where(table.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(table)) return sql.scalar_subquery() @@ -78,14 +130,12 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: orexpr: List[SaExpression] = [] if layers & DataLayer.ADDRESS and layers & DataLayer.POI: - orexpr.append(table.c.rank_address.between(1, 30)) + orexpr.append(no_index(table.c.rank_address).between(1, 30)) elif layers & DataLayer.ADDRESS: - orexpr.append(table.c.rank_address.between(1, 29)) - orexpr.append(sa.and_(table.c.rank_address == 30, - sa.or_(table.c.housenumber != None, - table.c.address.has_key('housename')))) + orexpr.append(no_index(table.c.rank_address).between(1, 29)) + orexpr.append(sa.func.IsAddressPoint(table)) elif layers & DataLayer.POI: - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, table.c.class_.not_in(('place', 'building')))) if layers & DataLayer.MANMADE: @@ -95,7 +145,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if not layers & DataLayer.NATURAL: exclude.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) else: include = [] if layers & DataLayer.RAILWAY: @@ -103,7 +153,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if layers & DataLayer.NATURAL: include.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.in_(tuple(include)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) if len(orexpr) == 1: return orexpr[0] @@ -122,23 +172,34 @@ async def _get_placex_housenumbers(conn: SearchConnection, place_ids: List[int], details: SearchDetails) -> AsyncIterator[nres.SearchResult]: t = conn.t.placex - sql = _select_placex(t).where(t.c.place_id.in_(place_ids)) + sql = _select_placex(t).add_columns(t.c.importance)\ + .where(t.c.place_id.in_(place_ids)) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) for row in await conn.execute(sql): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) yield result +def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery': + """ Create a subselect that returns the given list of integers + as rows in the column 'nr'. + """ + vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\ + .table_valued(sa.column('value', type_=sa.JSON)) + return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery() + + async def _get_osmline(conn: SearchConnection, place_ids: List[int], numerals: List[int], details: SearchDetails) -> AsyncIterator[nres.SearchResult]: t = conn.t.osmline - values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\ - .data([(n,) for n in numerals]) + + values = _int_list_to_subquery(numerals) sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id, t.c.address, values.c.nr.label('housenumber'), @@ -161,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int], numerals: List[int], osm_id: int, details: SearchDetails) -> AsyncIterator[nres.SearchResult]: t = conn.t.tiger - values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\ - .data([(n,) for n in numerals]) + values = _int_list_to_subquery(numerals) sql = sa.select(t.c.place_id, t.c.parent_place_id, sa.literal('W').label('osm_type'), sa.literal(osm_id).label('osm_id'), @@ -185,6 +245,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int], class AbstractSearch(abc.ABC): """ Encapuslation of a single lookup in the database. """ + SEARCH_PRIO: int = 2 def __init__(self, penalty: float) -> None: self.penalty = penalty @@ -218,9 +279,20 @@ class NearSearch(AbstractSearch): base.sort(key=lambda r: (r.accuracy, r.rank_search)) max_accuracy = base[0].accuracy + 0.5 + if base[0].rank_address == 0: + min_rank = 0 + max_rank = 0 + elif base[0].rank_address < 26: + min_rank = 1 + max_rank = min(25, base[0].rank_address + 4) + else: + min_rank = 26 + max_rank = 30 base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX and r.accuracy <= max_accuracy - and r.bbox and r.bbox.area < 20) + and r.bbox and r.bbox.area < 20 + and r.rank_address >= min_rank + and r.rank_address <= max_rank) if base: baseids = [b.place_id for b in base[:5] if b.place_id] @@ -242,45 +314,52 @@ class NearSearch(AbstractSearch): """ table = await conn.get_class_table(*category) - t = conn.t.placex.alias('p') tgeom = conn.t.placex.alias('pgeom') - sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\ - .where(t.c.class_ == category[0])\ - .where(t.c.type == category[1]) - if table is None: # No classtype table available, do a simplified lookup in placex. - sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\ - .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid)) + table = conn.t.placex + sql = sa.select(table.c.place_id, + sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid)) + .label('dist'))\ + .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\ + .where(table.c.class_ == category[0])\ + .where(table.c.type == category[1]) else: # Use classtype table. We can afford to use a larger # radius for the lookup. - sql = sql.join(table, t.c.place_id == table.c.place_id)\ - .join(tgeom, - sa.case((sa.and_(tgeom.c.rank_address < 9, - tgeom.c.geometry.ST_GeometryType().in_( - ('ST_Polygon', 'ST_MultiPolygon'))), - tgeom.c.geometry.ST_Contains(table.c.centroid)), - else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\ - .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) + sql = sa.select(table.c.place_id, + sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid)) + .label('dist'))\ + .join(tgeom, + table.c.centroid.ST_CoveredBy( + sa.case((sa.and_(tgeom.c.rank_address > 9, + tgeom.c.geometry.is_area()), + tgeom.c.geometry), + else_ = tgeom.c.centroid.ST_Expand(0.05)))) + + inner = sql.where(tgeom.c.place_id.in_(ids))\ + .group_by(table.c.place_id).subquery() + + t = conn.t.placex + sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\ + .join(inner, inner.c.place_id == t.c.place_id)\ + .order_by(inner.c.dist) + sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: - sql = sql.where(t.c.country_code.in_(details.countries)) - if details.min_rank > 0: - sql = sql.where(t.c.rank_address >= details.min_rank) - if details.max_rank < 30: - sql = sql.where(t.c.rank_address <= details.max_rank) + sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) - for row in await conn.execute(sql.limit(details.max_results)): + sql = sql.limit(LIMIT_PARAM) + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + penalty - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) @@ -290,7 +369,7 @@ class PoiSearch(AbstractSearch): """ def __init__(self, sdata: SearchData) -> None: super().__init__(sdata.penalty) - self.categories = sdata.qualifiers + self.qualifiers = sdata.qualifiers self.countries = sdata.countries @@ -298,62 +377,70 @@ class PoiSearch(AbstractSearch): details: SearchDetails) -> nres.SearchResults: """ Find results for the search in the database. """ + bind_params = _details_to_bind_params(details) t = conn.t.placex rows: List[SaRow] = [] if details.near and details.near_radius is not None and details.near_radius < 0.2: # simply search in placex table - sql = _select_placex(t) \ - .where(t.c.linked_place_id == None) \ - .where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) \ - .order_by(t.c.centroid.ST_Distance(details.near.sql_value())) + def _base_query() -> SaSelect: + return _select_placex(t) \ + .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM)) + .label('importance'))\ + .where(t.c.linked_place_id == None) \ + .where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \ + .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \ + .limit(LIMIT_PARAM) + + classtype = self.qualifiers.values + if len(classtype) == 1: + cclass, ctype = classtype[0] + sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query() + .where(t.c.class_ == cclass) + .where(t.c.type == ctype)) + else: + sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ) + for cls, typ in classtype))) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) - - classtype = self.categories.values - if len(classtype) == 1: - sql = sql.where(t.c.class_ == classtype[0][0]) \ - .where(t.c.type == classtype[0][1]) - else: - sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ) - for cls, typ in classtype))) + sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) - rows.extend(await conn.execute(sql.limit(details.max_results))) + rows.extend(await conn.execute(sql, bind_params)) else: # use the class type tables - for category in self.categories.values: + for category in self.qualifiers.values: table = await conn.get_class_table(*category) if table is not None: sql = _select_placex(t)\ + .add_columns(t.c.importance)\ .join(table, t.c.place_id == table.c.place_id)\ .where(t.c.class_ == category[0])\ .where(t.c.type == category[1]) if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(table.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM)) - if details.near: - sql = sql.order_by(table.c.centroid.ST_Distance(details.near.sql_value()))\ - .where(table.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius or 0.5)) + if details.near and details.near_radius is not None: + sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\ + .where(table.c.centroid.within_distance(NEAR_PARAM, + NEAR_RADIUS_PARAM)) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) - rows.extend(await conn.execute(sql.limit(details.max_results))) + sql = sql.limit(LIMIT_PARAM) + rows.extend(await conn.execute(sql, bind_params)) results = nres.SearchResults() for row in rows: result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type)) - result.bbox = Bbox.from_wkb(row.bbox.data) + result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type)) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) return results @@ -362,6 +449,8 @@ class PoiSearch(AbstractSearch): class CountrySearch(AbstractSearch): """ Search for a country name or country code. """ + SEARCH_PRIO = 0 + def __init__(self, sdata: SearchData) -> None: super().__init__(sdata.penalty) self.countries = sdata.countries @@ -373,65 +462,75 @@ class CountrySearch(AbstractSearch): """ t = conn.t.placex + ccodes = self.countries.values sql = _select_placex(t)\ - .where(t.c.country_code.in_(self.countries.values))\ + .add_columns(t.c.importance)\ + .where(t.c.country_code.in_(ccodes))\ .where(t.c.rank_address == 4) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) - if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) - - if details.near is not None and details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = filter_by_area(sql, t, details) results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) - return results or await self.lookup_in_country_table(conn, details) + if not results: + results = await self.lookup_in_country_table(conn, details) + + if results: + details.min_rank = min(5, details.max_rank) + details.max_rank = min(25, details.max_rank) + + return results async def lookup_in_country_table(self, conn: SearchConnection, details: SearchDetails) -> nres.SearchResults: """ Look up the country in the fallback country tables. """ + # Avoid the fallback search when this is a more search. Country results + # usually are in the first batch of results and it is not possible + # to exclude these fallbacks. + if details.excluded: + return nres.SearchResults() + t = conn.t.country_name tgrid = conn.t.country_grid sql = sa.select(tgrid.c.country_code, tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid() - .label('centroid'))\ + .label('centroid'), + tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\ .where(tgrid.c.country_code.in_(self.countries.values))\ .group_by(tgrid.c.country_code) - if details.viewbox is not None and details.bounded_viewbox: - sql = sql.where(tgrid.c.geometry.intersects(details.viewbox.sql_value())) - if details.near is not None and details.near_radius is not None: - sql = sql.where(tgrid.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) + sql = filter_by_area(sql, tgrid, details, avoid_index=True) sub = sql.subquery('grid') sql = sa.select(t.c.country_code, - (t.c.name - + sa.func.coalesce(t.c.derived_name, - sa.cast('', type_=conn.t.types.Composite)) - ).label('name'), - sub.c.centroid)\ + t.c.name.merge(t.c.derived_name).label('name'), + sub.c.centroid, sub.c.bbox)\ .join(sub, t.c.country_code == sub.c.country_code) + if details.geometry_output: + sql = _add_geometry_columns(sql, sub.c.centroid, details) + results = nres.SearchResults() - for row in await conn.execute(sql): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_country_row(row, nres.SearchResult) assert result + result.bbox = Bbox.from_wkb(row.bbox) result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) results.append(result) @@ -455,46 +554,42 @@ class PostcodeSearch(AbstractSearch): """ Find results for the search in the database. """ t = conn.t.postcode + pcs = self.postcodes.values sql = sa.select(t.c.place_id, t.c.parent_place_id, t.c.rank_search, t.c.rank_address, t.c.postcode, t.c.country_code, t.c.geometry.label('centroid'))\ - .where(t.c.postcode.in_(self.postcodes.values)) + .where(t.c.postcode.in_(pcs)) - sql = _add_geometry_columns(sql, t.c.geometry, details) + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) penalty: SaExpression = sa.literal(self.penalty) - if details.viewbox is not None: - if details.bounded_viewbox: - sql = sql.where(t.c.geometry.intersects(details.viewbox.sql_value())) - else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), - else_=2.0) + if details.viewbox is not None and not details.bounded_viewbox: + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5), + else_=1.0) if details.near is not None: - if details.near_radius is not None: - sql = sql.where(t.c.geometry.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.order_by(t.c.geometry.ST_Distance(details.near.sql_value())) + sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM)) + + sql = filter_by_area(sql, t, details) if self.countries: sql = sql.where(t.c.country_code.in_(self.countries.values)) if details.excluded: - sql = sql.where(t.c.place_id.not_in(details.excluded)) + sql = sql.where(_exclude_places(t)) if self.lookups: assert len(self.lookups) == 1 - assert self.lookups[0].lookup_type == 'restrict' tsearch = conn.t.search_name sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\ - .where(sa.func.array_cat(tsearch.c.name_vector, - tsearch.c.nameaddress_vector, - type_=ARRAY(sa.Integer)) - .contains(self.lookups[0].tokens)) + .where((tsearch.c.name_vector + tsearch.c.nameaddress_vector) + .contains(sa.type_coerce(self.lookups[0].tokens, + IntArray))) for ranking in self.rankings: penalty += ranking.sql_penalty(conn.t.search_name) @@ -503,14 +598,32 @@ class PostcodeSearch(AbstractSearch): sql = sql.add_columns(penalty.label('accuracy')) - sql = sql.order_by('accuracy') + sql = sql.order_by('accuracy').limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(details.max_results)): - result = nres.create_from_postcode_row(row, nres.SearchResult) + for row in await conn.execute(sql, _details_to_bind_params(details)): + p = conn.t.placex + placex_sql = _select_placex(p).add_columns(p.c.importance)\ + .where(sa.text("""class = 'boundary' + AND type = 'postal_code' + AND osm_type = 'R'"""))\ + .where(p.c.country_code == row.country_code)\ + .where(p.c.postcode == row.postcode)\ + .limit(1) + + if details.geometry_output: + placex_sql = _add_geometry_columns(placex_sql, p.c.geometry, details) + + for prow in await conn.execute(placex_sql, _details_to_bind_params(details)): + result = nres.create_from_placex_row(prow, nres.SearchResult) + break + else: + result = nres.create_from_postcode_row(row, nres.SearchResult) + assert result - result.accuracy = row.accuracy - results.append(result) + if result.place_id not in details.excluded: + result.accuracy = row.accuracy + results.append(result) return results @@ -519,6 +632,8 @@ class PostcodeSearch(AbstractSearch): class PlaceSearch(AbstractSearch): """ Generic search for an address or named place. """ + SEARCH_PRIO = 1 + def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None: super().__init__(sdata.penalty + extra_penalty) self.countries = sdata.countries @@ -530,106 +645,164 @@ class PlaceSearch(AbstractSearch): self.expected_count = expected_count - async def lookup(self, conn: SearchConnection, - details: SearchDetails) -> nres.SearchResults: - """ Find results for the search in the database. + def _inner_search_name_cte(self, conn: SearchConnection, + details: SearchDetails) -> 'sa.CTE': + """ Create a subquery that preselects the rows in the search_name + table. """ - t = conn.t.placex.alias('p') - tsearch = conn.t.search_name.alias('s') - limit = details.max_results - - sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, - t.c.class_, t.c.type, - t.c.address, t.c.extratags, - t.c.housenumber, t.c.postcode, t.c.country_code, - t.c.wikipedia, - t.c.parent_place_id, t.c.rank_address, t.c.rank_search, - t.c.centroid, - t.c.geometry.ST_Expand(0).label('bbox'))\ - .where(t.c.place_id == tsearch.c.place_id) - - - sql = _add_geometry_columns(sql, t.c.geometry, details) + t = conn.t.search_name penalty: SaExpression = sa.literal(self.penalty) for ranking in self.rankings: - penalty += ranking.sql_penalty(tsearch) + penalty += ranking.sql_penalty(t) + + sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank, + t.c.country_code, t.c.centroid, + t.c.name_vector, t.c.nameaddress_vector, + sa.case((t.c.importance > 0, t.c.importance), + else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75)) + .label('importance'), + penalty.label('penalty')) for lookup in self.lookups: - sql = sql.where(lookup.sql_condition(tsearch)) + sql = sql.where(lookup.sql_condition(t)) if self.countries: - sql = sql.where(tsearch.c.country_code.in_(self.countries.values)) + sql = sql.where(t.c.country_code.in_(self.countries.values)) if self.postcodes: # if a postcode is given, don't search for state or country level objects - sql = sql.where(tsearch.c.address_rank > 9) - tpc = conn.t.postcode - if self.expected_count > 1000: + sql = sql.where(t.c.address_rank > 9) + if self.expected_count > 10000: # Many results expected. Restrict by postcode. + tpc = conn.t.postcode sql = sql.where(sa.select(tpc.c.postcode) .where(tpc.c.postcode.in_(self.postcodes.values)) - .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12)) + .where(t.c.centroid.within_distance(tpc.c.geometry, 0.4)) .exists()) - # Less results, only have a preference for close postcodes - pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\ - .where(tpc.c.postcode.in_(self.postcodes.values))\ - .scalar_subquery() - penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), - else_=sa.func.coalesce(pc_near, 2.0)) - if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(tsearch.c.centroid.intersects(details.viewbox.sql_value())) + sql = sql.where(t.c.centroid + .intersects(VIEWBOX_PARAM, + use_index=details.viewbox.area < 0.2)) + elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000: + sql = sql.where(t.c.centroid + .intersects(VIEWBOX2_PARAM, + use_index=details.viewbox.area < 0.5)) + + if details.near is not None and details.near_radius is not None: + if details.near_radius < 0.1: + sql = sql.where(t.c.centroid.within_distance(NEAR_PARAM, + NEAR_RADIUS_PARAM)) else: - penalty += sa.case((t.c.geometry.intersects(details.viewbox.sql_value()), 0.0), - (t.c.geometry.intersects(details.viewbox_x2.sql_value()), 1.0), - else_=2.0) + sql = sql.where(t.c.centroid + .ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM) + + if self.housenumbers: + sql = sql.where(t.c.address_rank.between(16, 30)) + else: + if details.excluded: + sql = sql.where(_exclude_places(t)) + if details.min_rank > 0: + sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM, + t.c.search_rank >= MIN_RANK_PARAM)) + if details.max_rank < 30: + sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM, + t.c.search_rank <= MAX_RANK_PARAM)) + + inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery() + + sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank, + inner.c.country_code, inner.c.centroid, inner.c.importance, + inner.c.penalty) + + # If the query is not an address search or has a geographic preference, + # preselect most important items to restrict the number of places + # that need to be looked up in placex. + if not self.housenumbers\ + and (details.viewbox is None or details.bounded_viewbox)\ + and (details.near is None or details.near_radius is not None)\ + and not self.qualifiers: + sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance) + .over(order_by=inner.c.penalty - inner.c.importance) + .label('min_penalty')) + + inner = sql.subquery() + + sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank, + inner.c.country_code, inner.c.centroid, inner.c.importance, + inner.c.penalty)\ + .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5) + + return sql.cte('searches') + + + async def lookup(self, conn: SearchConnection, + details: SearchDetails) -> nres.SearchResults: + """ Find results for the search in the database. + """ + t = conn.t.placex + tsearch = self._inner_search_name_cte(conn, details) + + sql = _select_placex(t).join(tsearch, t.c.place_id == tsearch.c.place_id) + + if details.geometry_output: + sql = _add_geometry_columns(sql, t.c.geometry, details) + + penalty: SaExpression = tsearch.c.penalty + + if self.postcodes: + tpc = conn.t.postcode + pcs = self.postcodes.values + + pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(t.c.centroid)))\ + .where(tpc.c.postcode.in_(pcs))\ + .scalar_subquery() + penalty += sa.case((t.c.postcode.in_(pcs), 0.0), + else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0))) + + if details.viewbox is not None and not details.bounded_viewbox: + penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0), + (t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5), + else_=1.0) if details.near is not None: - if details.near_radius is not None: - sql = sql.where(tsearch.c.centroid.ST_DWithin(details.near.sql_value(), - details.near_radius)) - sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(details.near.sql_value()) + sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM)) .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) else: - sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), - else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) - sql = sql.add_columns(t.c.importance) + sql = sql.order_by(penalty - tsearch.c.importance) + sql = sql.add_columns(tsearch.c.importance) sql = sql.add_columns(penalty.label('accuracy'))\ .order_by(sa.text('accuracy')) if self.housenumbers: - hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" - sql = sql.where(tsearch.c.address_rank.between(16, 30))\ - .where(sa.or_(tsearch.c.address_rank < 30, - t.c.housenumber.regexp_match(hnr_regexp, flags='i'))) - - # Cross check for housenumbers, need to do that on a rather large - # set. Worst case there are 40.000 main streets in OSM. - inner = sql.limit(10000).subquery() + hnr_list = '|'.join(self.housenumbers.values) + inner = sql.where(sa.or_(tsearch.c.address_rank < 30, + sa.func.RegexpWord(hnr_list, t.c.housenumber)))\ + .subquery() # Housenumbers from placex thnr = conn.t.placex.alias('hnr') - pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call] + pid_list = sa.func.ArrayAgg(thnr.c.place_id) place_sql = sa.select(pid_list)\ .where(thnr.c.parent_place_id == inner.c.place_id)\ - .where(thnr.c.housenumber.regexp_match(hnr_regexp, flags='i'))\ + .where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\ .where(thnr.c.linked_place_id == None)\ .where(thnr.c.indexed_status == 0) if details.excluded: - place_sql = place_sql.where(thnr.c.place_id.not_in(details.excluded)) + place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded'))) if self.qualifiers: place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr)) - numerals = [int(n) for n in self.housenumbers.values if n.isdigit()] - interpol_sql: SaExpression - tiger_sql: SaExpression + numerals = [int(n) for n in self.housenumbers.values + if n.isdigit() and len(n) < 8] + interpol_sql: SaColumn + tiger_sql: SaColumn if numerals and \ (not self.qualifiers or ('place', 'house') in self.qualifiers.values): # Housenumbers from interpolations @@ -641,8 +814,8 @@ class PlaceSearch(AbstractSearch): numerals, details) ), else_=None) else: - interpol_sql = sa.literal_column('NULL') - tiger_sql = sa.literal_column('NULL') + interpol_sql = sa.null() + tiger_sql = sa.null() unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'), interpol_sql.label('interpol_hnr'), @@ -658,27 +831,17 @@ class PlaceSearch(AbstractSearch): .where(t.c.indexed_status == 0) if self.qualifiers: sql = sql.where(self.qualifiers.sql_restrict(t)) - if details.excluded: - sql = sql.where(tsearch.c.place_id.not_in(details.excluded)) - if details.min_rank > 0: - sql = sql.where(sa.or_(tsearch.c.address_rank >= details.min_rank, - tsearch.c.search_rank >= details.min_rank)) - if details.max_rank < 30: - sql = sql.where(sa.or_(tsearch.c.address_rank <= details.max_rank, - tsearch.c.search_rank <= details.max_rank)) if details.layers is not None: sql = sql.where(_filter_by_layer(t, details.layers)) + sql = sql.limit(LIMIT_PARAM) results = nres.SearchResults() - for row in await conn.execute(sql.limit(limit)): + for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.bbox = Bbox.from_wkb(row.bbox.data) + result.bbox = Bbox.from_wkb(row.bbox) result.accuracy = row.accuracy - if not details.excluded or not result.place_id in details.excluded: - results.append(result) - if self.housenumbers and row.rank_address < 30: if row.placex_hnr: subs = _get_placex_housenumbers(conn, row.placex_hnr, details) @@ -698,6 +861,14 @@ class PlaceSearch(AbstractSearch): sub.accuracy += 0.6 results.append(sub) - result.accuracy += 1.0 # penalty for missing housenumber + # Only add the street as a result, if it meets all other + # filter conditions. + if (not details.excluded or result.place_id not in details.excluded)\ + and (not self.qualifiers or result.category in self.qualifiers.values)\ + and result.rank_address >= details.min_rank: + result.accuracy += 1.0 # penalty for missing housenumber + results.append(result) + else: + results.append(result) return results