]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3109 from lonvia/prepared-statements
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 10 Jul 2023 09:45:29 +0000 (11:45 +0200)
committerGitHub <noreply@github.com>
Mon, 10 Jul 2023 09:45:29 +0000 (11:45 +0200)
Make prepared statements work with Python API

nominatim/api/core.py
nominatim/api/reverse.py
nominatim/db/sqlalchemy_functions.py [new file with mode: 0644]
nominatim/db/sqlalchemy_types.py
settings/env.defaults

index 8d503fa5e836cbb9b82ab992530de5cead00bf4b..32d420dbb84f4325cfb31c5a8761fb70c4e28144 100644 (file)
@@ -54,11 +54,10 @@ class NominatimAPIAsync:
                 return
 
             dsn = self.config.get_database_params()
+            pool_size = self.config.get_int('API_POOL_SIZE')
 
             query = {k: v for k, v in dsn.items()
                       if k not in ('user', 'password', 'dbname', 'host', 'port')}
-            if PGCORE_LIB == 'asyncpg':
-                query['prepared_statement_cache_size'] = '0'
 
             dburl = sa.engine.URL.create(
                        f'postgresql+{PGCORE_LIB}',
@@ -67,6 +66,7 @@ class NominatimAPIAsync:
                        host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
                        query=query)
             engine = sa_asyncio.create_async_engine(dburl, future=True,
+                                                    max_overflow=0, pool_size=pool_size,
                                                     echo=self.config.get_bool('DEBUG_SQL'))
 
             try:
index 00605d45b3841ad5742e25287bb2d956fd1f10ad..3a8be0fddd92c4f62fee3f79e0eaecec2883bef8 100644 (file)
@@ -17,6 +17,7 @@ import nominatim.api.results as nres
 from nominatim.api.logging import log
 from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
 from nominatim.db.sqlalchemy_types import Geometry
+import nominatim.db.sqlalchemy_functions as snfn
 
 # In SQLAlchemy expression which compare with NULL need to be expressed with
 # the equal sign.
@@ -27,6 +28,13 @@ RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.Re
 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
 
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
     """ Create a select statement with the columns relevant for reverse
         results.
@@ -171,12 +179,17 @@ class ReverseGeocoder:
         """
         t = self.conn.t.placex
 
+        # PostgreSQL must not get the distance as a parameter because
+        # there is a danger it won't be able to proberly estimate index use
+        # when used with prepared statements
+        dist_param = sa.text(f"{distance}")
+
         sql = _select_from_placex(t)\
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\
+                .where(t.c.geometry.ST_DWithin(WKT_PARAM, dist_param))\
                 .where(t.c.indexed_status == 0)\
                 .where(t.c.linked_place_id == None)\
                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
-                              t.c.centroid.ST_Distance(WKT_PARAM) < distance))\
+                              t.c.centroid.ST_Distance(WKT_PARAM) < dist_param))\
                 .order_by('distance')\
                 .limit(1)
 
@@ -185,17 +198,16 @@ class ReverseGeocoder:
         restrict: List[SaColumn] = []
 
         if self.layer_enabled(DataLayer.ADDRESS):
-            restrict.append(sa.and_(t.c.rank_address >= 26,
-                                    t.c.rank_address <= min(29, self.max_rank)))
+            restrict.append(no_index(t.c.rank_address).between(26, min(29, self.max_rank)))
             if self.max_rank == 30:
                 restrict.append(_is_address_point(t))
         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
-            restrict.append(sa.and_(t.c.rank_search == 30,
+            restrict.append(sa.and_(no_index(t.c.rank_search) == 30,
                                     t.c.class_.not_in(('place', 'building')),
                                     sa.not_(t.c.geometry.is_line_like())))
         if self.has_feature_layers():
-            restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM),
-                                    t.c.rank_address == 0,
+            restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
+                                    no_index(t.c.rank_address) == 0,
                                     self._filter_by_layer(t)))
 
         if not restrict:
@@ -348,13 +360,8 @@ class ReverseGeocoder:
         # later only a minimum of results needs to be checked with ST_Contains.
         inner = sa.select(t, sa.literal(0.0).label('distance'))\
                   .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
-                  .where(t.c.rank_address.between(5, 25))\
-                  .where(t.c.geometry.is_area())\
                   .where(t.c.geometry.intersects(WKT_PARAM))\
-                  .where(t.c.name != None)\
-                  .where(t.c.indexed_status == 0)\
-                  .where(t.c.linked_place_id == None)\
-                  .where(t.c.type != 'postcode')\
+                  .where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
                   .order_by(sa.desc(t.c.rank_search))\
                   .limit(50)\
                   .subquery('area')
@@ -373,14 +380,10 @@ class ReverseGeocoder:
             log().comment('Search for better matching place nodes inside the area')
             inner = sa.select(t,
                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
-                      .where(t.c.osm_type == 'N')\
                       .where(t.c.rank_search > address_row.rank_search)\
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
-                      .where(t.c.rank_address.between(5, 25))\
-                      .where(t.c.name != None)\
                       .where(t.c.indexed_status == 0)\
-                      .where(t.c.linked_place_id == None)\
-                      .where(t.c.type != 'postcode')\
+                      .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
                       .where(t.c.geometry
                                 .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
                                 .intersects(WKT_PARAM))\
@@ -476,15 +479,11 @@ class ReverseGeocoder:
 
             inner = sa.select(t,
                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
-                      .where(t.c.osm_type == 'N')\
                       .where(t.c.rank_search > 4)\
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
-                      .where(t.c.rank_address.between(5, 25))\
-                      .where(t.c.name != None)\
                       .where(t.c.indexed_status == 0)\
-                      .where(t.c.linked_place_id == None)\
-                      .where(t.c.type != 'postcode')\
                       .where(t.c.country_code.in_(ccodes))\
+                      .where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
                       .where(t.c.geometry
                                 .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
                                 .intersects(WKT_PARAM))\
diff --git a/nominatim/db/sqlalchemy_functions.py b/nominatim/db/sqlalchemy_functions.py
new file mode 100644 (file)
index 0000000..27eec79
--- /dev/null
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions and expressions for SQLAlchemy.
+"""
+
+import sqlalchemy as sa
+
+def select_index_placex_geometry_reverse_lookuppolygon(table: str) -> 'sa.TextClause':
+    """ Create an expression with the necessary conditions over a placex
+        table that the index 'idx_placex_geometry_reverse_lookupPolygon'
+        can be used.
+    """
+    return sa.text(f"ST_GeometryType({table}.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
+                   f" AND {table}.rank_address between 4 and 25"
+                   f" AND {table}.type != 'postcode'"
+                   f" AND {table}.name is not null"
+                   f" AND {table}.indexed_status = 0"
+                   f" AND {table}.linked_place_id is null")
+
+def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
+    """ Create an expression with the necessary conditions over a placex
+        table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
+        can be used.
+    """
+    return sa.text(f"{table}.rank_address between 4 and 25"
+                   f" AND {table}.type != 'postcode'"
+                   f" AND {table}.name is not null"
+                   f" AND {table}.linked_place_id is null"
+                   f" AND {table}.osm_type = 'N'")
index 1718b87307984662b299709867650aa1377f613a..f31966cd60093a4bc09095a8fff83cbf33743219 100644 (file)
@@ -48,9 +48,7 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
 
 
     def bind_expression(self, bindvalue: SaBind) -> SaColumn:
-        return sa.func.ST_GeomFromText(bindvalue,
-                                       sa.bindparam('geometry_srid', value=4326, literal_execute=True),
-                                       type_=self)
+        return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
 
 
     class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
index 9c2f7cac18da3636b445663627dd400a7683848c..f9f590da65fb50156d1f3139403d7a9cba4a59c6 100644 (file)
@@ -209,6 +209,11 @@ NOMINATIM_POLYGON_OUTPUT_MAX_TYPES=1
 # under <endpoint>.php
 NOMINATIM_SERVE_LEGACY_URLS=yes
 
+# Maximum number of connection a single API object can use. (Python API only)
+# When running Nominatim as a server, then this is the maximum number
+# of connections _per worker_.
+NOMINATIM_API_POOL_SIZE=10
+
 ### Log settings
 #
 # The following options allow to enable logging of API requests.