]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3591 from lonvia/increase-required-postgresql
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 18 Nov 2024 12:37:03 +0000 (13:37 +0100)
committerGitHub <noreply@github.com>
Mon, 18 Nov 2024 12:37:03 +0000 (13:37 +0100)
Increase version requirements for PostgreSQL and PostGIS

15 files changed:
.github/actions/build-nominatim/action.yml
.github/workflows/ci-tests.yml
docs/admin/Installation.md
lib-sql/indices.sql
lib-sql/tables.sql
lib-sql/tiger_import_finish.sql
src/nominatim_api/core.py
src/nominatim_db/db/sql_preprocessor.py
src/nominatim_db/tokenizer/icu_tokenizer.py
src/nominatim_db/tools/check_database.py
src/nominatim_db/tools/database_import.py
src/nominatim_db/tools/refresh.py
src/nominatim_db/tools/replication.py
src/nominatim_db/version.py
test/bdd/api/reverse/geometry.feature

index d601fc7b7880534eec5934e4bb0e7ab57a2c03c1..d05fd319b6d6a1c5c43e10d4d6094ef3f043d032 100644 (file)
@@ -25,7 +25,7 @@ runs:
           shell: bash
         - name: Install${{ matrix.flavour }} prerequisites
           run: |
-            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite-dev libsqlite3-mod-spatialite
             if [ "$FLAVOUR" == "oldstuff" ]; then
                 pip3 install MarkupSafe==2.0.1 python-dotenv jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 psycopg==3.1.7 datrie asyncpg aiosqlite
             else
index fb664d9917411eab4839e74d23676b008c929e3c..293743df03f5e3546cb60a28f439f77b6a896c43 100644 (file)
@@ -37,21 +37,16 @@ jobs:
         needs: create-archive
         strategy:
             matrix:
-                flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
+                flavour: ["ubuntu-20", "ubuntu-24"]
                 include:
-                    - flavour: oldstuff
-                      ubuntu: 20
-                      postgresql: '9.6'
-                      postgis: '2.5'
-                      lua: '5.1'
                     - flavour: ubuntu-20
                       ubuntu: 20
-                      postgresql: 13
+                      postgresql: 12
                       postgis: 3
-                      lua: '5.3'
-                    - flavour: ubuntu-22
-                      ubuntu: 22
-                      postgresql: 15
+                      lua: '5.1'
+                    - flavour: ubuntu-24
+                      ubuntu: 24
+                      postgresql: 17
                       postgis: 3
                       lua: '5.3'
 
@@ -80,37 +75,25 @@ jobs:
                   flavour: ${{ matrix.flavour }}
                   lua: ${{ matrix.lua }}
 
-            - name: Install test prerequisites (behave from apt)
-              run: sudo apt-get install -y -qq python3-behave
-              if: matrix.flavour == 'ubuntu-20'
-
-            - name: Install test prerequisites (behave from pip)
+            - name: Install test prerequisites
               run: pip3 install behave==1.2.6
-              if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
 
-            - name: Install test prerequisites (from apt for Ununtu 2x)
+            - name: Install test prerequisites
               run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
-              if: matrix.flavour != 'oldstuff'
 
             - name: Install newer pytest-asyncio
               run: pip3 install -U pytest-asyncio
               if: matrix.flavour == 'ubuntu-20'
 
-            - name: Install test prerequisites (from pip for Ubuntu 18)
-              run: pip3 install pytest pytest-asyncio uvicorn
-              if: matrix.flavour == 'oldstuff'
-
             - name: Install Python webservers
               run: pip3 install falcon starlette asgi_lifespan
 
             - name: Install latest flake8
               run: pip3 install -U flake8
-              if: matrix.flavour == 'ubuntu-22'
 
             - name: Python linting
               run: python3 -m flake8 src
               working-directory: Nominatim
-              if: matrix.flavour == 'ubuntu-22'
 
             - name: Python unit tests
               run: python3 -m pytest test/python
@@ -124,12 +107,10 @@ jobs:
 
             - name: Install mypy and typechecking info
               run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
-              if: matrix.flavour != 'oldstuff'
 
             - name: Python static typechecking
-              run: python3 -m mypy --strict src
+              run: python3 -m mypy --strict --python-version 3.8 src
               working-directory: Nominatim
-              if: matrix.flavour != 'oldstuff'
 
     install:
         runs-on: ubuntu-latest
index 78062908c9ca230179023a960008a6f04fd9deb1..3db16c3bf463a34c8bf65e519efe5501167d83f8 100644 (file)
@@ -22,15 +22,10 @@ and can't offer support.
 
 ### Software
 
-!!! Warning
-    For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
-    otherwise import and queries will be slow to the point of being unusable.
-    Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.
-
 For running Nominatim:
 
-  * [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
-  * [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
+  * [PostgreSQL](https://www.postgresql.org) (12+ will work, 13+ strongly recommended)
+  * [PostGIS](https://postgis.net) (3.0+ will work, 3.2+ strongly recommended)
   * [osm2pgsql](https://osm2pgsql.org) (1.8+, optional when building with CMake)
   * [Python 3](https://www.python.org/) (3.7+)
 
index 4d92452d1f588bdc8a0be63cd6d522e96bdab25d..8a4e91cd0a1c0cd463975206cd63ed485aa9c01a 100644 (file)
@@ -97,18 +97,14 @@ CREATE INDEX IF NOT EXISTS idx_postcode_postcode
 ---
   CREATE INDEX IF NOT EXISTS idx_search_name_centroid
     ON search_name USING GIST (centroid) {{db.tablespace.search_index}};
-
-  {% if postgres.has_index_non_key_column %}
----
-    CREATE INDEX IF NOT EXISTS idx_placex_housenumber
-      ON placex USING btree (parent_place_id)
-      INCLUDE (housenumber) {{db.tablespace.search_index}}
-      WHERE housenumber is not null;
----
-    CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
-      ON location_property_osmline USING btree(parent_place_id)
-      INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
-      WHERE startnumber is not null;
-  {% endif %}
-
+---
+  CREATE INDEX IF NOT EXISTS idx_placex_housenumber
+    ON placex USING btree (parent_place_id)
+    INCLUDE (housenumber) {{db.tablespace.search_index}}
+    WHERE housenumber is not null;
+---
+  CREATE INDEX IF NOT EXISTS idx_osmline_parent_osm_id_with_hnr
+    ON location_property_osmline USING btree(parent_place_id)
+    INCLUDE (startnumber, endnumber) {{db.tablespace.search_index}}
+    WHERE startnumber is not null;
 {% endif %}
index d3bc972a5e94e053c17b06b010627836935385f7..cde33952fe2fc3020d5c7c31848beb1d9032fa56 100644 (file)
@@ -184,21 +184,21 @@ CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
 
 -- Usage: - POI is within building with housenumber
 CREATE INDEX idx_placex_geometry_buildings ON placex
-  USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
+  USING SPGIST (geometry) {{db.tablespace.address_index}}
   WHERE address is not null and rank_search = 30
         and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
 
 -- Usage: - linking of similar named places to boundaries
 --        - linking of place nodes with same type to boundaries
 CREATE INDEX idx_placex_geometry_placenode ON placex
-  USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
+  USING SPGIST (geometry) {{db.tablespace.address_index}}
   WHERE osm_type = 'N' and rank_search < 26
         and class = 'place' and type != 'postcode';
 
 -- Usage: - is node part of a way?
 --        - find parent of interpolation spatially
 CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
-  USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
+  USING SPGIST (geometry) {{db.tablespace.address_index}}
   WHERE osm_type = 'W' and rank_search >= 26;
 
 -- Usage: - linking place nodes by wikidata tag to boundaries
index c02ce2a3d79cc396c5bb9c33de4cdf38295847b6..b7c32d72884df98a08a0105feb553b2222471bf7 100644 (file)
@@ -8,9 +8,7 @@
 --index only on parent_place_id
 CREATE INDEX IF NOT EXISTS idx_location_property_tiger_parent_place_id_imp
   ON location_property_tiger_import (parent_place_id)
-{% if postgres.has_index_non_key_column %}
   INCLUDE (startnumber, endnumber, step)
-{% endif %}
   {{db.tablespace.aux_index}};
 CREATE UNIQUE INDEX IF NOT EXISTS idx_location_property_tiger_place_id_imp
   ON location_property_tiger_import (place_id) {{db.tablespace.aux_index}};
index c45b24de5d8dc8751e770dfe96cc39b8b8a21039..3cf9e989141b5f118fe8d287805ad754124d42df 100644 (file)
@@ -138,19 +138,17 @@ class NominatimAPIAsync:
                     async with engine.begin() as conn:
                         result = await conn.scalar(sa.text('SHOW server_version_num'))
                         server_version = int(result)
-                        if server_version >= 110000:
-                            await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
-                            await conn.execute(sa.text(
-                                    "SET max_parallel_workers_per_gather TO '0'"))
+                        await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
+                        await conn.execute(sa.text(
+                                "SET max_parallel_workers_per_gather TO '0'"))
                 except (PGCORE_ERROR, sa.exc.OperationalError):
                     server_version = 0
 
-                if server_version >= 110000:
-                    @sa.event.listens_for(engine.sync_engine, "connect")
-                    def _on_connect(dbapi_con: Any, _: Any) -> None:
-                        cursor = dbapi_con.cursor()
-                        cursor.execute("SET jit_above_cost TO '-1'")
-                        cursor.execute("SET max_parallel_workers_per_gather TO '0'")
+                @sa.event.listens_for(engine.sync_engine, "connect")
+                def _on_connect(dbapi_con: Any, _: Any) -> None:
+                    cursor = dbapi_con.cursor()
+                    cursor.execute("SET jit_above_cost TO '-1'")
+                    cursor.execute("SET max_parallel_workers_per_gather TO '0'")
 
             self._property_cache['DB:server_version'] = server_version
 
index 85dbaabc97bc582fa92a1126d812d7dc5ebe01fb..4424b3d81f9e5088ce2bb014c46d6c74bcc38b93 100644 (file)
@@ -11,7 +11,7 @@ from typing import Set, Dict, Any, cast
 
 import jinja2
 
-from .connection import Connection, server_version_tuple, postgis_version_tuple
+from .connection import Connection
 from ..config import Configuration
 from ..db.query_pool import QueryPool
 
@@ -69,14 +69,7 @@ def _setup_postgresql_features(conn: Connection) -> Dict[str, Any]:
     """ Set up a dictionary with various optional Postgresql/Postgis features that
         depend on the database version.
     """
-    pg_version = server_version_tuple(conn)
-    postgis_version = postgis_version_tuple(conn)
-    pg11plus = pg_version >= (11, 0, 0)
-    ps3 = postgis_version >= (3, 0)
-    return {
-        'has_index_non_key_column': pg11plus,
-        'spgist_geom': 'SPGIST' if pg11plus and ps3 else 'GIST'
-    }
+    return {}
 
 
 class SQLPreprocessor:
index 83928644a9c3a9964e26af05c81ef061b8cfeb05..5595fcb2c604ae309bdb08dccf82e150764308dc 100644 (file)
@@ -17,7 +17,7 @@ from pathlib import Path
 from psycopg.types.json import Jsonb
 from psycopg import sql as pysql
 
-from ..db.connection import connect, Connection, Cursor, server_version_tuple, \
+from ..db.connection import connect, Connection, Cursor, \
                             drop_tables, table_exists, execute_scalar
 from ..config import Configuration
 from ..db.sql_preprocessor import SQLPreprocessor
@@ -110,80 +110,37 @@ class ICUTokenizer(AbstractTokenizer):
                     cur.execute(pysql.SQL('SET max_parallel_workers_per_gather TO {}')
                                      .format(pysql.Literal(min(threads, 6),)))
 
-                if server_version_tuple(conn) < (12, 0):
-                    LOG.info('Computing word frequencies')
-                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
-                    cur.execute("""CREATE TEMP TABLE word_frequencies AS
-                                     SELECT unnest(name_vector) as id, count(*)
-                                     FROM search_name GROUP BY id""")
-                    cur.execute('CREATE INDEX ON word_frequencies(id)')
-                    cur.execute("""CREATE TEMP TABLE addressword_frequencies AS
-                                     SELECT unnest(nameaddress_vector) as id, count(*)
-                                     FROM search_name GROUP BY id""")
-                    cur.execute('CREATE INDEX ON addressword_frequencies(id)')
-                    cur.execute("""
-                        CREATE OR REPLACE FUNCTION word_freq_update(wid INTEGER,
-                                                                    INOUT info JSONB)
-                        AS $$
-                        DECLARE rec RECORD;
-                        BEGIN
-                        IF info is null THEN
-                          info = '{}'::jsonb;
-                        END IF;
-                        FOR rec IN SELECT count FROM word_frequencies WHERE id = wid
-                        LOOP
-                          info = info || jsonb_build_object('count', rec.count);
-                        END LOOP;
-                        FOR rec IN SELECT count FROM addressword_frequencies WHERE id = wid
-                        LOOP
-                          info = info || jsonb_build_object('addr_count', rec.count);
-                        END LOOP;
-                        IF info = '{}'::jsonb THEN
-                          info = null;
-                        END IF;
-                        END;
-                        $$ LANGUAGE plpgsql IMMUTABLE;
-                        """)
-                    LOG.info('Update word table with recomputed frequencies')
-                    drop_tables(conn, 'tmp_word')
-                    cur.execute("""CREATE TABLE tmp_word AS
-                                    SELECT word_id, word_token, type, word,
-                                           word_freq_update(word_id, info) as info
-                                    FROM word
-                                """)
-                    drop_tables(conn, 'word_frequencies', 'addressword_frequencies')
-                else:
-                    LOG.info('Computing word frequencies')
-                    drop_tables(conn, 'word_frequencies')
-                    cur.execute("""
-                      CREATE TEMP TABLE word_frequencies AS
-                      WITH word_freq AS MATERIALIZED (
-                               SELECT unnest(name_vector) as id, count(*)
-                                     FROM search_name GROUP BY id),
-                           addr_freq AS MATERIALIZED (
-                               SELECT unnest(nameaddress_vector) as id, count(*)
-                                     FROM search_name GROUP BY id)
-                      SELECT coalesce(a.id, w.id) as id,
-                             (CASE WHEN w.count is null THEN '{}'::JSONB
-                                  ELSE jsonb_build_object('count', w.count) END
-                              ||
-                              CASE WHEN a.count is null THEN '{}'::JSONB
-                                  ELSE jsonb_build_object('addr_count', a.count) END) as info
-                      FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
-                      """)
-                    cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
-                    cur.execute('ANALYSE word_frequencies')
-                    LOG.info('Update word table with recomputed frequencies')
-                    drop_tables(conn, 'tmp_word')
-                    cur.execute("""CREATE TABLE tmp_word AS
-                                    SELECT word_id, word_token, type, word,
-                                           (CASE WHEN wf.info is null THEN word.info
-                                            ELSE coalesce(word.info, '{}'::jsonb) || wf.info
-                                            END) as info
-                                    FROM word LEFT JOIN word_frequencies wf
-                                         ON word.word_id = wf.id
-                                """)
-                    drop_tables(conn, 'word_frequencies')
+                LOG.info('Computing word frequencies')
+                drop_tables(conn, 'word_frequencies')
+                cur.execute("""
+                  CREATE TEMP TABLE word_frequencies AS
+                  WITH word_freq AS MATERIALIZED (
+                           SELECT unnest(name_vector) as id, count(*)
+                                 FROM search_name GROUP BY id),
+                       addr_freq AS MATERIALIZED (
+                           SELECT unnest(nameaddress_vector) as id, count(*)
+                                 FROM search_name GROUP BY id)
+                  SELECT coalesce(a.id, w.id) as id,
+                         (CASE WHEN w.count is null THEN '{}'::JSONB
+                              ELSE jsonb_build_object('count', w.count) END
+                          ||
+                          CASE WHEN a.count is null THEN '{}'::JSONB
+                              ELSE jsonb_build_object('addr_count', a.count) END) as info
+                  FROM word_freq w FULL JOIN addr_freq a ON a.id = w.id;
+                  """)
+                cur.execute('CREATE UNIQUE INDEX ON word_frequencies(id) INCLUDE(info)')
+                cur.execute('ANALYSE word_frequencies')
+                LOG.info('Update word table with recomputed frequencies')
+                drop_tables(conn, 'tmp_word')
+                cur.execute("""CREATE TABLE tmp_word AS
+                                SELECT word_id, word_token, type, word,
+                                       (CASE WHEN wf.info is null THEN word.info
+                                        ELSE coalesce(word.info, '{}'::jsonb) || wf.info
+                                        END) as info
+                                FROM word LEFT JOIN word_frequencies wf
+                                     ON word.word_id = wf.id
+                            """)
+                drop_tables(conn, 'word_frequencies')
 
             with conn.cursor() as cur:
                 cur.execute('SET max_parallel_workers_per_gather TO 0')
index 79770142494821fc23bee2c63b72940911e43300..4c6f0331241c7b7db2e3471de0a660dc3cc8103a 100644 (file)
@@ -12,7 +12,7 @@ from enum import Enum
 from textwrap import dedent
 
 from ..config import Configuration
-from ..db.connection import connect, Connection, server_version_tuple, \
+from ..db.connection import connect, Connection, \
                             index_exists, table_exists, execute_scalar
 from ..db import properties
 from ..errors import UsageError
@@ -121,10 +121,9 @@ def _get_indexes(conn: Connection) -> List[str]:
     if table_exists(conn, 'search_name'):
         indexes.extend(('idx_search_name_nameaddress_vector',
                         'idx_search_name_name_vector',
-                        'idx_search_name_centroid'))
-        if server_version_tuple(conn) >= (11, 0, 0):
-            indexes.extend(('idx_placex_housenumber',
-                            'idx_osmline_parent_osm_id_with_hnr'))
+                        'idx_search_name_centroid',
+                        'idx_placex_housenumber',
+                        'idx_osmline_parent_osm_id_with_hnr'))
 
     # These won't exist if --no-updates import was used
     if table_exists(conn, 'place'):
index 415e9d249f3277ce95bc048c9c8858b684cfd1c3..a7ee807ebcd302855b2f23e2a4dc2935de4ed6ef 100644 (file)
@@ -98,10 +98,7 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
         with conn.cursor() as cur:
             cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
             cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
-
-            postgis_version = postgis_version_tuple(conn)
-            if postgis_version[0] >= 3:
-                cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+            cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
 
         conn.commit()
 
index dc98fe4140b1b552abce67609dda467690a9772d..80d0a74dac833fe1912664e2bd1aa1baf63c041e 100644 (file)
@@ -16,8 +16,7 @@ from pathlib import Path
 from psycopg import sql as pysql
 
 from ..config import Configuration
-from ..db.connection import Connection, connect, postgis_version_tuple, \
-                            drop_tables
+from ..db.connection import Connection, connect, drop_tables
 from ..db.utils import execute_file
 from ..db.sql_preprocessor import SQLPreprocessor
 
@@ -190,12 +189,6 @@ def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool =
     if not datafile.exists():
         return 1
 
-    with connect(dsn) as conn:
-        postgis_version = postgis_version_tuple(conn)
-        if postgis_version[0] < 3:
-            LOG.error('PostGIS version is too old for using OSM raster data.')
-            return 2
-
     execute_file(dsn, datafile, ignore_errors=ignore_errors)
 
     return 0
index 082523cd7483d0bcd7a27dc7d99f1f6dccbc7a86..e50eedee26a2fa7bc628c0e67c2b36173d9fa7f6 100644 (file)
@@ -18,7 +18,7 @@ import urllib.request as urlrequest
 
 from ..errors import UsageError
 from ..db import status
-from ..db.connection import Connection, connect, server_version_tuple
+from ..db.connection import Connection, connect
 from .exec_utils import run_osm2pgsql
 
 try:
@@ -156,7 +156,7 @@ def run_osm2pgsql_updates(conn: Connection, options: MutableMapping[str, Any]) -
 
     # Consume updates with osm2pgsql.
     options['append'] = True
-    options['disable_jit'] = server_version_tuple(conn) >= (11, 0)
+    options['disable_jit'] = True
     run_osm2pgsql(options)
 
     # Handle deletions
index 76de017dca23ae6248d91fb07fac7b77a083ffa9..75f2b7d6f53eaeacb521e415b350b63ac1a5356a 100644 (file)
@@ -57,8 +57,8 @@ def parse_version(version: str) -> NominatimVersion:
 
 NOMINATIM_VERSION = parse_version('4.5.0-0')
 
-POSTGRESQL_REQUIRED_VERSION = (9, 6)
-POSTGIS_REQUIRED_VERSION = (2, 2)
+POSTGRESQL_REQUIRED_VERSION = (12, 0)
+POSTGIS_REQUIRED_VERSION = (3, 0)
 OSM2PGSQL_REQUIRED_VERSION = (1, 8)
 
 # Cmake sets a variable @GIT_HASH@ by executing 'git --log'. It is not run
index 33fadbbdff5a106d26c8679f8c0fbf31d270b6be..aac82807031fa91ce5543de7bbf3300d7d983aff 100644 (file)
@@ -28,7 +28,7 @@ Feature: Geometries for reverse geocoding
           | 1            | 0.9               |
         Then results contain
           | geotext |
-          | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5227608 47.1379757, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
+          | ^POLYGON\(\([0-9. ]+, ?[0-9. ]+, ?[0-9. ]+, ?[0-9. ]+(, ?[0-9. ]+)?\)\) |
 
 
     Scenario: For polygons return the centroid as center point