X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/2e56182a7f2837ee09ca08a3c0ac67557b62114f..ac5ef6470161994850f0088b37f1c441b76bd99e:/nominatim/api/search/db_searches.py diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 5c1d98c9..41434f06 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + + def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: """ Create a dictionary from search parameters that can be used as bind parameter for SQL execute. @@ -61,6 +68,7 @@ def _select_placex(t: SaFromClause) -> SaSelect: t.c.housenumber, t.c.postcode, t.c.country_code, t.c.importance, t.c.wikipedia, t.c.parent_place_id, t.c.rank_address, t.c.rank_search, + t.c.linked_place_id, t.c.admin_level, t.c.centroid, t.c.geometry.ST_Expand(0).label('bbox')) @@ -72,13 +80,13 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification) if details.geometry_output & GeometryFormat.GEOJSON: - out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson')) + out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson')) if details.geometry_output & GeometryFormat.TEXT: out.append(sa.func.ST_AsText(col).label('geometry_text')) if details.geometry_output & GeometryFormat.KML: - out.append(sa.func.ST_AsKML(col).label('geometry_kml')) + out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml')) if details.geometry_output & GeometryFormat.SVG: - out.append(sa.func.ST_AsSVG(col).label('geometry_svg')) + out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg')) return sql.add_columns(*out) @@ -106,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: orexpr: List[SaExpression] = [] if layers & DataLayer.ADDRESS and layers & DataLayer.POI: - orexpr.append(table.c.rank_address.between(1, 30)) + orexpr.append(no_index(table.c.rank_address).between(1, 30)) elif layers & DataLayer.ADDRESS: - orexpr.append(table.c.rank_address.between(1, 29)) - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(no_index(table.c.rank_address).between(1, 29)) + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, sa.or_(table.c.housenumber != None, - table.c.address.has_key('housename')))) + table.c.address.has_key('addr:housename')))) elif layers & DataLayer.POI: - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, table.c.class_.not_in(('place', 'building')))) if layers & DataLayer.MANMADE: @@ -123,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if not layers & DataLayer.NATURAL: exclude.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) else: include = [] if layers & DataLayer.RAILWAY: @@ -131,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if layers & DataLayer.NATURAL: include.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.in_(tuple(include)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) if len(orexpr) == 1: return orexpr[0] @@ -271,7 +279,7 @@ class NearSearch(AbstractSearch): """ table = await conn.get_class_table(*category) - t = conn.t.placex.alias('p') + t = conn.t.placex tgeom = conn.t.placex.alias('pgeom') sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\ @@ -287,13 +295,14 @@ class NearSearch(AbstractSearch): # radius for the lookup. sql = sql.join(table, t.c.place_id == table.c.place_id)\ .join(tgeom, - sa.case((sa.and_(tgeom.c.rank_address < 9, - tgeom.c.geometry.is_area()), - tgeom.c.geometry.ST_Contains(table.c.centroid)), - else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\ + table.c.centroid.ST_CoveredBy( + sa.case((sa.and_(tgeom.c.rank_address < 9, + tgeom.c.geometry.is_area()), + tgeom.c.geometry), + else_ = tgeom.c.centroid.ST_Expand(0.05))))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) - sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) + sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: @@ -316,7 +325,7 @@ class PoiSearch(AbstractSearch): """ def __init__(self, sdata: SearchData) -> None: super().__init__(sdata.penalty) - self.categories = sdata.qualifiers + self.qualifiers = sdata.qualifiers self.countries = sdata.countries @@ -338,7 +347,7 @@ class PoiSearch(AbstractSearch): .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \ .limit(LIMIT_PARAM) - classtype = self.categories.values + classtype = self.qualifiers.values if len(classtype) == 1: cclass, ctype = classtype[0] sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query() @@ -357,7 +366,7 @@ class PoiSearch(AbstractSearch): rows.extend(await conn.execute(sql, bind_params)) else: # use the class type tables - for category in self.categories.values: + for category in self.qualifiers.values: table = await conn.get_class_table(*category) if table is not None: sql = _select_placex(t)\ @@ -383,7 +392,7 @@ class PoiSearch(AbstractSearch): for row in rows: result = nres.create_from_placex_row(row, nres.SearchResult) assert result - result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type)) + result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type)) result.bbox = Bbox.from_wkb(row.bbox) results.append(result) @@ -405,9 +414,9 @@ class CountrySearch(AbstractSearch): t = conn.t.placex ccodes = self.countries.values - sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_placex(t)\ + sql = _select_placex(t)\ .where(t.c.country_code.in_(ccodes))\ - .where(t.c.rank_address == 4)) + .where(t.c.rank_address == 4) if details.geometry_output: sql = _add_geometry_columns(sql, t.c.geometry, details) @@ -426,6 +435,7 @@ class CountrySearch(AbstractSearch): result = nres.create_from_placex_row(row, nres.SearchResult) assert result result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) + result.bbox = Bbox.from_wkb(row.bbox) results.append(result) return results or await self.lookup_in_country_table(conn, details) @@ -446,7 +456,8 @@ class CountrySearch(AbstractSearch): sql = sa.select(tgrid.c.country_code, tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid() - .label('centroid'))\ + .label('centroid'), + tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\ .where(tgrid.c.country_code.in_(self.countries.values))\ .group_by(tgrid.c.country_code) @@ -462,13 +473,17 @@ class CountrySearch(AbstractSearch): + sa.func.coalesce(t.c.derived_name, sa.cast('', type_=conn.t.types.Composite)) ).label('name'), - sub.c.centroid)\ + sub.c.centroid, sub.c.bbox)\ .join(sub, t.c.country_code == sub.c.country_code) + if details.geometry_output: + sql = _add_geometry_columns(sql, sub.c.centroid, details) + results = nres.SearchResults() for row in await conn.execute(sql, _details_to_bind_params(details)): result = nres.create_from_country_row(row, nres.SearchResult) assert result + result.bbox = Bbox.from_wkb(row.bbox) result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0) results.append(result) @@ -494,12 +509,11 @@ class PostcodeSearch(AbstractSearch): t = conn.t.postcode pcs = self.postcodes.values - sql: SaLambdaSelect = sa.lambda_stmt(lambda: - sa.select(t.c.place_id, t.c.parent_place_id, + sql = sa.select(t.c.place_id, t.c.parent_place_id, t.c.rank_search, t.c.rank_address, t.c.postcode, t.c.country_code, - t.c.geometry.label('centroid')) - .where(t.c.postcode.in_(pcs))) + t.c.geometry.label('centroid'))\ + .where(t.c.postcode.in_(pcs)) if details.geometry_output: sql = _add_geometry_columns(sql, t.c.geometry, details) @@ -511,8 +525,8 @@ class PostcodeSearch(AbstractSearch): sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM)) else: penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), - (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), - else_=2.0) + (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5), + else_=1.0) if details.near is not None: if details.near_radius is not None: @@ -579,7 +593,7 @@ class PlaceSearch(AbstractSearch): sql: SaLambdaSelect = sa.lambda_stmt(lambda: sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name, t.c.class_, t.c.type, - t.c.address, t.c.extratags, + t.c.address, t.c.extratags, t.c.admin_level, t.c.housenumber, t.c.postcode, t.c.country_code, t.c.wikipedia, t.c.parent_place_id, t.c.rank_address, t.c.rank_search, @@ -608,7 +622,7 @@ class PlaceSearch(AbstractSearch): pcs = self.postcodes.values if self.expected_count > 1000: # Many results expected. Restrict by postcode. - sql = sql.where(lambda: sa.select(tpc.c.postcode) + sql = sql.where(sa.select(tpc.c.postcode) .where(tpc.c.postcode.in_(pcs)) .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12)) .exists()) @@ -622,26 +636,43 @@ class PlaceSearch(AbstractSearch): if details.viewbox is not None: if details.bounded_viewbox: - sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) + if details.viewbox.area < 0.2: + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM)) + elif self.expected_count >= 10000: + if details.viewbox.area < 0.5: + sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM)) else: penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0), - (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0), - else_=2.0) + (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5), + else_=1.0) if details.near is not None: if details.near_radius is not None: - sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) - sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM) + if details.near_radius < 0.1: + sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) + else: + sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM, + NEAR_RADIUS_PARAM)) + sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM)) .label('importance')) sql = sql.order_by(sa.desc(sa.text('importance'))) else: - sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), - else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) + if self.expected_count < 10000\ + or (details.viewbox is not None and details.viewbox.area < 0.5): + sql = sql.order_by( + penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance), + else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40))) sql = sql.add_columns(t.c.importance) - sql = sql.add_columns(penalty.label('accuracy'))\ - .order_by(sa.text('accuracy')) + sql = sql.add_columns(penalty.label('accuracy')) + + if self.expected_count < 10000: + sql = sql.order_by(sa.text('accuracy')) if self.housenumbers: hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M" @@ -663,11 +694,12 @@ class PlaceSearch(AbstractSearch): .where(thnr.c.indexed_status == 0) if details.excluded: - place_sql = place_sql.where(_exclude_places(thnr)) + place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded'))) if self.qualifiers: place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr)) - numerals = [int(n) for n in self.housenumbers.values if n.isdigit()] + numerals = [int(n) for n in self.housenumbers.values + if n.isdigit() and len(n) < 8] interpol_sql: SaColumn tiger_sql: SaColumn if numerals and \