]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 12 Mar 2024 09:05:43 +0000 (10:05 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 12 Mar 2024 09:05:43 +0000 (10:05 +0100)
19 files changed:
.codespellrc [new file with mode: 0644]
.github/workflows/ci-tests.yml
CMakeLists.txt
ChangeLog
SECURITY.md
lib-sql/indices.sql
module/CMakeLists.txt
module/nominatim.c
nominatim/api/search/db_search_builder.py
nominatim/api/search/db_searches.py
nominatim/clicmd/setup.py
nominatim/db/connection.py
nominatim/db/status.py
nominatim/tools/migration.py
nominatim/version.py
settings/env.defaults
test/bdd/api/details/simple.feature
test/bdd/db/query/postcodes.feature
test/python/db/test_status.py

diff --git a/.codespellrc b/.codespellrc
new file mode 100644 (file)
index 0000000..332bce5
--- /dev/null
@@ -0,0 +1,7 @@
+# https://github.com/codespell-project/codespell
+
+[codespell]
+skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
+# Need to be lowercase in the list
+# Unter = Unter den Linden (an example address)
+ignore-words-list = inout,unter
index 910114d7e5cf5e9e0a16d46949b14bbc2f8b5bd9..7ce6320d5f8a07d74d0e53c507ed175d5c47f17b 100644 (file)
@@ -134,6 +134,10 @@ jobs:
         needs: create-archive
         runs-on: ubuntu-20.04
 
+        strategy:
+            matrix:
+                postgresql: ["13", "16"]
+
         steps:
             - uses: actions/download-artifact@v4
               with:
@@ -149,11 +153,13 @@ jobs:
 
             - uses: ./Nominatim/.github/actions/setup-postgresql
               with:
-                  postgresql-version: 13
+                  postgresql-version: ${{ matrix.postgresql }}
                   postgis-version: 3
 
             - name: Install Postgresql server dev
-              run: sudo apt-get install postgresql-server-dev-13
+              run: sudo apt-get install postgresql-server-dev-$PGVER
+              env:
+                PGVER: ${{ matrix.postgresql }}
 
             - uses: ./Nominatim/.github/actions/build-nominatim
               with:
@@ -386,3 +392,10 @@ jobs:
 
           - name: Check full import
             run: nominatim admin --check-database
+
+    codespell:
+      runs-on: ubuntu-latest
+      steps:
+          - uses: codespell-project/actions-codespell@v2
+            with:
+                only_warn: 1
index 4e29a75e50d2498eebeeb40ec50a68085bad1c76..7011e463779a986f50967b17808e80d37e91ec6d 100644 (file)
@@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
 project(nominatim)
 
 set(NOMINATIM_VERSION_MAJOR 4)
-set(NOMINATIM_VERSION_MINOR 3)
+set(NOMINATIM_VERSION_MINOR 4)
 set(NOMINATIM_VERSION_PATCH 0)
 
 set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
index 49fed459050e7a9330cbf1cdbf4408b3ab7ec823..2f5d51d5f24c4442f3ba32ed61a433df31241e7c 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,47 @@
+4.4.0
+ * add export to SQLite database and SQLite support for the frontend
+ * switch to Python frontend as the default frontend
+ * update to osm2pgsql 1.11.0
+ * add support for new osm2pgsql middle table format
+ * simplify geometry for large polygon objects not used in addresses
+ * various performance tweaks for search in Python frontend
+ * fix regression in search with categories where it was confused with near
+   search
+ * partially roll back use of SQLAlchemy lambda statements due to bugs
+   in SQLAchemy
+ * fix handling of timezones for timestamps from the database
+ * fix handling of full address searches in connection with a viewbox
+ * fix postcode computation of highway areas
+ * fix handling of timeout errors for Python <= 3.10
+ * fix address computation for postcode areas
+ * fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
+ * make sure extratags are always null when empty
+ * reduce importance of places without wikipedia reference
+ * improve performance of word count computations
+ * drop support for wikipedia tags with full URLs
+ * replace get_addressdata() SQL implementation with a Python function
+ * improve display name for non-address features
+ * fix postcode validation for postcodes with country code
+   (thanks @pawel-wroniszewski)
+ * add possibility to run imports without superuser database rights
+   (thanks @robbe-haesendonck)
+ * new CLI command for cleaning deleted relations (thanks @lujoh)
+ * add check for database version in the CLI check command
+ * updates to import styles ignoring more unused objects
+ * various typo fixes (thanks @kumarUjjawal)
+
+4.3.2
+ * fix potential SQL injection issue for 'nominatim admin --collect-os-info'
+ * PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
+ * Python frontend: improve handling of viewbox
+ * Python frontend: correct deployment instructions
+
+4.3.1
+ * reintroduce result rematching
+ * improve search of multi-part names
+ * fix accidentally switched meaning of --reverse-only and --search-only in
+   warm command
+
 4.3.0
  * fix failing importance recalculation command
  * fix merging of linked names into unnamed boundaries
index 2cb351ce6011c5102e943059ad595b921e452d07..a14eba13169129ed498c3fcf8e2487ce5753e326 100644 (file)
@@ -9,6 +9,7 @@ versions.
 
 | Version | End of support for security updates |
 | ------- | ----------------------------------- |
+| 4.4.x   | 2026-03-07                          |
 | 4.3.x   | 2025-09-07                          |
 | 4.2.x   | 2024-11-24                          |
 | 4.1.x   | 2024-08-05                          |
index b802a660e7c31446c5a62483707de1e4f771228f..9c31f55699c06e58c9242c8c722e7d42ab430dd1 100644 (file)
@@ -21,6 +21,11 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
   ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}}
   WHERE parent_place_id IS NOT NULL;
 ---
+-- Used to find postcode areas after a search in location_postcode.
+CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas
+  ON placex USING BTREE (country_code, postcode) {{db.tablespace.search_index}}
+  WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code';
+---
 CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
   USING GIST (geometry) {{db.tablespace.search_index}};
 -- Index is needed during import but can be dropped as soon as a full
index c85942982fd955b26962f423000accffe3d33958..01831f90739751cc15ed0331173a8db7338da63c 100644 (file)
@@ -1,6 +1,6 @@
 # just use the pgxs makefile
 
-foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
+foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
     list(APPEND PG_CONFIG_HINTS
          "/usr/pgsql-${suffix}/bin")
 endforeach()
index 9d43c22f28e89da594ad20e39d9947dff2da7e23..54632f76d03e4bffe9a7ddebc2cb8ced6f3413e0 100644 (file)
 #include "mb/pg_wchar.h"
 #include <utfasciitable.h>
 
-#ifdef PG_MODULE_MAGIC
-PG_MODULE_MAGIC;
+#if PG_MAJORVERSION_NUM > 15
+#include "varatt.h"
 #endif
 
+PG_MODULE_MAGIC;
+
 Datum transliteration( PG_FUNCTION_ARGS );
 Datum gettokenstring( PG_FUNCTION_ARGS );
 void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int);
index f2b653f2c2def3c3657abdc9159c3be66d215d5f..ef7a66b8507387630c6d0aacc5bfb2b67a08b566 100644 (file)
@@ -5,7 +5,7 @@
 # Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
-Convertion from token assignment to an abstract DB search.
+Conversion from token assignment to an abstract DB search.
 """
 from typing import Optional, List, Tuple, Iterator, Dict
 import heapq
index be883953276ccf74eed7497d0ece6caf9a275498..5a13061e091074c87b2fc9aa51c78ab4abeb9354 100644 (file)
@@ -602,10 +602,24 @@ class PostcodeSearch(AbstractSearch):
 
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
-            result = nres.create_from_postcode_row(row, nres.SearchResult)
+            p = conn.t.placex
+            placex_sql = _select_placex(p).add_columns(p.c.importance)\
+                             .where(sa.text("""class = 'boundary'
+                                               AND type = 'postal_code'
+                                               AND osm_type = 'R'"""))\
+                             .where(p.c.country_code == row.country_code)\
+                             .where(p.c.postcode == row.postcode)\
+                             .limit(1)
+            for prow in await conn.execute(placex_sql, _details_to_bind_params(details)):
+                result = nres.create_from_placex_row(prow, nres.SearchResult)
+                break
+            else:
+                result = nres.create_from_postcode_row(row, nres.SearchResult)
+
             assert result
-            result.accuracy = row.accuracy
-            results.append(result)
+            if result.place_id not in details.excluded:
+                result.accuracy = row.accuracy
+                results.append(result)
 
         return results
 
index 38a5a5b520470b0deff8a69365073f1830f93eb0..2fd8b141a86c85be4eb22e78a649145a07ca9c06 100644 (file)
@@ -219,12 +219,11 @@ class SetupAll:
         """ Determine the database date and set the status accordingly.
         """
         with connect(dsn) as conn:
-            if not offline:
-                try:
-                    dbdate = status.compute_database_date(conn)
-                    status.set_status(conn, dbdate)
-                    LOG.info('Database is at %s.', dbdate)
-                except Exception as exc: # pylint: disable=broad-except
-                    LOG.error('Cannot determine date of database: %s', exc)
-
             properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
+
+            try:
+                dbdate = status.compute_database_date(conn, offline)
+                status.set_status(conn, dbdate)
+                LOG.info('Database is at %s.', dbdate)
+            except Exception as exc: # pylint: disable=broad-except
+                LOG.error('Cannot determine date of database: %s', exc)
index 82801ae7995c9d1e5527baec0d9dd89c85e70e4d..d686083697f03d03bec86ed061e509884a9905b2 100644 (file)
@@ -239,7 +239,7 @@ _PG_CONNECTION_STRINGS = {
 def get_pg_env(dsn: str,
                base_env: Optional[SysEnv] = None) -> Dict[str, str]:
     """ Return a copy of `base_env` with the environment variables for
-        PostgresSQL set up from the given database connection string.
+        PostgreSQL set up from the given database connection string.
         If `base_env` is None, then the OS environment is used as a base
         environment.
     """
index 2c01de71466dca8fb237461509062c5c8c3e248a..5f92d9599ce2577ffcc5373256d5b85c17b06529 100644 (file)
@@ -29,11 +29,24 @@ class StatusRow(TypedDict):
     indexed: Optional[bool]
 
 
-def compute_database_date(conn: Connection) -> dt.datetime:
+def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
     """ Determine the date of the database from the newest object in the
         data base.
     """
-    # First, find the node with the highest ID in the database
+    # If there is a date from osm2pgsql available, use that.
+    if conn.table_exists('osm2pgsql_properties'):
+        with conn.cursor() as cur:
+            cur.execute(""" SELECT value FROM osm2pgsql_properties
+                            WHERE property = 'current_timestamp' """)
+            row = cur.fetchone()
+            if row is not None:
+                return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
+                                  .replace(tzinfo=dt.timezone.utc)
+
+    if offline:
+        raise UsageError("Cannot determine database date from data in offline mode.")
+
+    # Else, find the node with the highest ID in the database
     with conn.cursor() as cur:
         if conn.table_exists('place'):
             osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
index ffeb4958f409ce285e7fa8f91ba2d342ffe2cfe9..e864ce5254e7840bee0059c98d8288beacff7af8 100644 (file)
@@ -382,3 +382,13 @@ def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) ->
                        WHERE rank_address between 4 and 25 AND type != 'postcode'
                          AND name is not null AND linked_place_id is null AND osm_type = 'N'
                     """)
+
+@_migration(4, 4, 99, 0)
+def create_postcode_ara_lookup_index(conn: Connection, **_: Any) -> None:
+    """ Create index needed for looking up postcode areas from postocde points.
+    """
+    with conn.cursor() as cur:
+        cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas
+                       ON placex USING BTREE (country_code, postcode)
+                       WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code'
+                    """)
index 95420b341f33f6c8dd3a2a95efd20b06ec785b29..76da1dbd97694b09ebd374849f0cd25ac5756a33 100644 (file)
@@ -34,7 +34,7 @@ class NominatimVersion(NamedTuple):
         return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
 
 
-NOMINATIM_VERSION = NominatimVersion(4, 3, 0, 0)
+NOMINATIM_VERSION = NominatimVersion(4, 4, 99, 0)
 
 POSTGRESQL_REQUIRED_VERSION = (9, 6)
 POSTGIS_REQUIRED_VERSION = (2, 2)
index 64a160c7197b9c7593a10670c439d66de3c7fab1..f4c33e7720abda310c95fd5012a4aaf5000a5baa 100644 (file)
@@ -123,9 +123,9 @@ NOMINATIM_TABLESPACE_ADDRESS_DATA=
 # Tablespace for indexes used during address computation. Used for import and update only.
 NOMINATIM_TABLESPACE_ADDRESS_INDEX=
 
-# Tablespace for tables for auxilary data, e.g. TIGER data, postcodes.
+# Tablespace for tables for auxiliary data, e.g. TIGER data, postcodes.
 NOMINATIM_TABLESPACE_AUX_DATA=
-# Tablespace for indexes for auxilary data, e.g. TIGER data, postcodes.
+# Tablespace for indexes for auxiliary data, e.g. TIGER data, postcodes.
 NOMINATIM_TABLESPACE_AUX_INDEX=
 
 
@@ -183,7 +183,7 @@ NOMINATIM_SEARCH_BATCH_MODE=no
 
 # Threshold for searches by name only.
 # Threshold where the lookup strategy in the database is switched. If there
-# are less occurences of a tem than given, the search does the lookup only
+# are less occurrences of a tem than given, the search does the lookup only
 # against the name, otherwise it uses indexes for name and address.
 NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD=500
 
@@ -210,8 +210,8 @@ NOMINATIM_API_POOL_SIZE=10
 NOMINATIM_QUERY_TIMEOUT=10
 
 # Maximum time a single request is allowed to take. When the timeout is
-# exceeeded, the available results are returned.
-# When empty, then timouts are disabled.
+# exceeded, the available results are returned.
+# When empty, then timeouts are disabled.
 NOMINATIM_REQUEST_TIMEOUT=60
 
 # Search elements just within countries
index 99d3422334b3baf36b18040dc883727b72c76bd7..0e456aa5bbf962af30693096626e102ae326ca8a 100644 (file)
@@ -3,14 +3,6 @@
 Feature: Object details
     Check details page for correctness
 
-    Scenario: Details by place ID
-        When sending details query for 107077
-        Then the result is valid json
-        And results contain
-            | place_id |
-            | 107077   |
-
-
     Scenario Outline: Details via OSM id
         When sending details query for <type><id>
         Then the result is valid json
index 78a26a90f5dfd723a2d3511e23a03a0293023536..9f0249594a03ce50c0ac7d266479155ef307b9f0 100644 (file)
@@ -95,3 +95,21 @@ Feature: Querying fo postcode variants
            | type     | display_name |
            | postcode | E4 7EA, United Kingdom |
 
+
+    @fail-legacy
+    @v1-api-python-only
+    Scenario: Postcode areas are preferred over postcode points
+        Given the grid with origin DE
+            | 1 | 2 |
+            | 4 | 3 |
+        Given the places
+            | osm | class    | type        | postcode | geometry    |
+            | R23 | boundary | postal_code | 12345    | (1,2,3,4,1) |
+        When importing
+        Then location_postcode contains exactly
+          | country | postcode |
+          | de      | 12345    |
+        When sending search query "12345, de"
+        Then results contain
+          | osm |
+          | R23 |
index 0cb12e0297889e4714aa302657e8863443b62bfe..05fb2c7f1c8bd47826559ab07dc708d148abae2c 100644 (file)
@@ -31,6 +31,22 @@ def setup_status_table(status_table):
     pass
 
 
+@pytest.mark.parametrize('offline', [True, False])
+def test_compute_database_date_from_osm2pgsql(table_factory, temp_db_conn, offline):
+    table_factory('osm2pgsql_properties', 'property TEXT, value TEXT',
+                  content=(('current_timestamp', '2024-01-03T23:45:54Z'), ))
+
+    date = nominatim.db.status.compute_database_date(temp_db_conn, offline=offline)
+    assert date == iso_date('2024-01-03T23:45:54')
+
+
+def test_compute_database_date_from_osm2pgsql_nodata(table_factory, temp_db_conn):
+    table_factory('osm2pgsql_properties', 'property TEXT, value TEXT')
+
+    with pytest.raises(UsageError, match='Cannot determine database date from data in offline mode'):
+        nominatim.db.status.compute_database_date(temp_db_conn, offline=True)
+
+
 def test_compute_database_date_place_empty(place_table, temp_db_conn):
     with pytest.raises(UsageError):
         nominatim.db.status.compute_database_date(temp_db_conn)