From: Sarah Hoffmann Date: Mon, 10 Jul 2023 09:45:29 +0000 (+0200) Subject: Merge pull request #3109 from lonvia/prepared-statements X-Git-Tag: v4.3.0~55 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/157f0b8a837a01c0759b17abcf7dec72c8ac8c30?hp=c01386b5b4960df099815f357434f0b944b97b55 Merge pull request #3109 from lonvia/prepared-statements Make prepared statements work with Python API --- diff --git a/nominatim/api/core.py b/nominatim/api/core.py index 8d503fa5..32d420db 100644 --- a/nominatim/api/core.py +++ b/nominatim/api/core.py @@ -54,11 +54,10 @@ class NominatimAPIAsync: return dsn = self.config.get_database_params() + pool_size = self.config.get_int('API_POOL_SIZE') query = {k: v for k, v in dsn.items() if k not in ('user', 'password', 'dbname', 'host', 'port')} - if PGCORE_LIB == 'asyncpg': - query['prepared_statement_cache_size'] = '0' dburl = sa.engine.URL.create( f'postgresql+{PGCORE_LIB}', @@ -67,6 +66,7 @@ class NominatimAPIAsync: host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None, query=query) engine = sa_asyncio.create_async_engine(dburl, future=True, + max_overflow=0, pool_size=pool_size, echo=self.config.get_bool('DEBUG_SQL')) try: diff --git a/nominatim/api/reverse.py b/nominatim/api/reverse.py index 00605d45..3a8be0fd 100644 --- a/nominatim/api/reverse.py +++ b/nominatim/api/reverse.py @@ -17,6 +17,7 @@ import nominatim.api.results as nres from nominatim.api.logging import log from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox from nominatim.db.sqlalchemy_types import Geometry +import nominatim.db.sqlalchemy_functions as snfn # In SQLAlchemy expression which compare with NULL need to be expressed with # the equal sign. @@ -27,6 +28,13 @@ RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.Re WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry) MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank') +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + + def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect: """ Create a select statement with the columns relevant for reverse results. @@ -171,12 +179,17 @@ class ReverseGeocoder: """ t = self.conn.t.placex + # PostgreSQL must not get the distance as a parameter because + # there is a danger it won't be able to proberly estimate index use + # when used with prepared statements + dist_param = sa.text(f"{distance}") + sql = _select_from_placex(t)\ - .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\ + .where(t.c.geometry.ST_DWithin(WKT_PARAM, dist_param))\ .where(t.c.indexed_status == 0)\ .where(t.c.linked_place_id == None)\ .where(sa.or_(sa.not_(t.c.geometry.is_area()), - t.c.centroid.ST_Distance(WKT_PARAM) < distance))\ + t.c.centroid.ST_Distance(WKT_PARAM) < dist_param))\ .order_by('distance')\ .limit(1) @@ -185,17 +198,16 @@ class ReverseGeocoder: restrict: List[SaColumn] = [] if self.layer_enabled(DataLayer.ADDRESS): - restrict.append(sa.and_(t.c.rank_address >= 26, - t.c.rank_address <= min(29, self.max_rank))) + restrict.append(no_index(t.c.rank_address).between(26, min(29, self.max_rank))) if self.max_rank == 30: restrict.append(_is_address_point(t)) if self.layer_enabled(DataLayer.POI) and self.max_rank == 30: - restrict.append(sa.and_(t.c.rank_search == 30, + restrict.append(sa.and_(no_index(t.c.rank_search) == 30, t.c.class_.not_in(('place', 'building')), sa.not_(t.c.geometry.is_line_like()))) if self.has_feature_layers(): - restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM), - t.c.rank_address == 0, + restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM), + no_index(t.c.rank_address) == 0, self._filter_by_layer(t))) if not restrict: @@ -348,13 +360,8 @@ class ReverseGeocoder: # later only a minimum of results needs to be checked with ST_Contains. inner = sa.select(t, sa.literal(0.0).label('distance'))\ .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.geometry.is_area())\ .where(t.c.geometry.intersects(WKT_PARAM))\ - .where(t.c.name != None)\ - .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ + .where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\ .order_by(sa.desc(t.c.rank_search))\ .limit(50)\ .subquery('area') @@ -373,14 +380,10 @@ class ReverseGeocoder: log().comment('Search for better matching place nodes inside the area') inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ - .where(t.c.osm_type == 'N')\ .where(t.c.rank_search > address_row.rank_search)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ + .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\ .where(t.c.geometry .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) .intersects(WKT_PARAM))\ @@ -476,15 +479,11 @@ class ReverseGeocoder: inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\ - .where(t.c.osm_type == 'N')\ .where(t.c.rank_search > 4)\ .where(t.c.rank_search <= MAX_RANK_PARAM)\ - .where(t.c.rank_address.between(5, 25))\ - .where(t.c.name != None)\ .where(t.c.indexed_status == 0)\ - .where(t.c.linked_place_id == None)\ - .where(t.c.type != 'postcode')\ .where(t.c.country_code.in_(ccodes))\ + .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\ .where(t.c.geometry .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search)) .intersects(WKT_PARAM))\ diff --git a/nominatim/db/sqlalchemy_functions.py b/nominatim/db/sqlalchemy_functions.py new file mode 100644 index 00000000..27eec794 --- /dev/null +++ b/nominatim/db/sqlalchemy_functions.py @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2023 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Custom functions and expressions for SQLAlchemy. +""" + +import sqlalchemy as sa + +def select_index_placex_geometry_reverse_lookuppolygon(table: str) -> 'sa.TextClause': + """ Create an expression with the necessary conditions over a placex + table that the index 'idx_placex_geometry_reverse_lookupPolygon' + can be used. + """ + return sa.text(f"ST_GeometryType({table}.geometry) in ('ST_Polygon', 'ST_MultiPolygon')" + f" AND {table}.rank_address between 4 and 25" + f" AND {table}.type != 'postcode'" + f" AND {table}.name is not null" + f" AND {table}.indexed_status = 0" + f" AND {table}.linked_place_id is null") + +def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause': + """ Create an expression with the necessary conditions over a placex + table that the index 'idx_placex_geometry_reverse_lookupPlaceNode' + can be used. + """ + return sa.text(f"{table}.rank_address between 4 and 25" + f" AND {table}.type != 'postcode'" + f" AND {table}.name is not null" + f" AND {table}.linked_place_id is null" + f" AND {table}.osm_type = 'N'") diff --git a/nominatim/db/sqlalchemy_types.py b/nominatim/db/sqlalchemy_types.py index 1718b873..f31966cd 100644 --- a/nominatim/db/sqlalchemy_types.py +++ b/nominatim/db/sqlalchemy_types.py @@ -48,9 +48,7 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg] def bind_expression(self, bindvalue: SaBind) -> SaColumn: - return sa.func.ST_GeomFromText(bindvalue, - sa.bindparam('geometry_srid', value=4326, literal_execute=True), - type_=self) + return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self) class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg] diff --git a/settings/env.defaults b/settings/env.defaults index 9c2f7cac..f9f590da 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -209,6 +209,11 @@ NOMINATIM_POLYGON_OUTPUT_MAX_TYPES=1 # under .php NOMINATIM_SERVE_LEGACY_URLS=yes +# Maximum number of connection a single API object can use. (Python API only) +# When running Nominatim as a server, then this is the maximum number +# of connections _per worker_. +NOMINATIM_API_POOL_SIZE=10 + ### Log settings # # The following options allow to enable logging of API requests.