From: Sarah Hoffmann Date: Tue, 12 Mar 2024 09:05:43 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~17 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/a8aec65fb4278666fafb8b3b655093a3d1f0ae2a?hp=a234d606c9dcd76db1d69909bb78940d3dbcd862 Merge remote-tracking branch 'upstream/master' --- diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 00000000..332bce52 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,7 @@ +# https://github.com/codespell-project/codespell + +[codespell] +skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py +# Need to be lowercase in the list +# Unter = Unter den Linden (an example address) +ignore-words-list = inout,unter diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 910114d7..7ce6320d 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -134,6 +134,10 @@ jobs: needs: create-archive runs-on: ubuntu-20.04 + strategy: + matrix: + postgresql: ["13", "16"] + steps: - uses: actions/download-artifact@v4 with: @@ -149,11 +153,13 @@ jobs: - uses: ./Nominatim/.github/actions/setup-postgresql with: - postgresql-version: 13 + postgresql-version: ${{ matrix.postgresql }} postgis-version: 3 - name: Install Postgresql server dev - run: sudo apt-get install postgresql-server-dev-13 + run: sudo apt-get install postgresql-server-dev-$PGVER + env: + PGVER: ${{ matrix.postgresql }} - uses: ./Nominatim/.github/actions/build-nominatim with: @@ -386,3 +392,10 @@ jobs: - name: Check full import run: nominatim admin --check-database + + codespell: + runs-on: ubuntu-latest + steps: + - uses: codespell-project/actions-codespell@v2 + with: + only_warn: 1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 4e29a75e..7011e463 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") project(nominatim) set(NOMINATIM_VERSION_MAJOR 4) -set(NOMINATIM_VERSION_MINOR 3) +set(NOMINATIM_VERSION_MINOR 4) set(NOMINATIM_VERSION_PATCH 0) set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}") diff --git a/ChangeLog b/ChangeLog index 49fed459..2f5d51d5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,47 @@ +4.4.0 + * add export to SQLite database and SQLite support for the frontend + * switch to Python frontend as the default frontend + * update to osm2pgsql 1.11.0 + * add support for new osm2pgsql middle table format + * simplify geometry for large polygon objects not used in addresses + * various performance tweaks for search in Python frontend + * fix regression in search with categories where it was confused with near + search + * partially roll back use of SQLAlchemy lambda statements due to bugs + in SQLAchemy + * fix handling of timezones for timestamps from the database + * fix handling of full address searches in connection with a viewbox + * fix postcode computation of highway areas + * fix handling of timeout errors for Python <= 3.10 + * fix address computation for postcode areas + * fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT + * make sure extratags are always null when empty + * reduce importance of places without wikipedia reference + * improve performance of word count computations + * drop support for wikipedia tags with full URLs + * replace get_addressdata() SQL implementation with a Python function + * improve display name for non-address features + * fix postcode validation for postcodes with country code + (thanks @pawel-wroniszewski) + * add possibility to run imports without superuser database rights + (thanks @robbe-haesendonck) + * new CLI command for cleaning deleted relations (thanks @lujoh) + * add check for database version in the CLI check command + * updates to import styles ignoring more unused objects + * various typo fixes (thanks @kumarUjjawal) + +4.3.2 + * fix potential SQL injection issue for 'nominatim admin --collect-os-info' + * PHP frontend: fix on-the-fly lookup of postcode areas near boundaries + * Python frontend: improve handling of viewbox + * Python frontend: correct deployment instructions + +4.3.1 + * reintroduce result rematching + * improve search of multi-part names + * fix accidentally switched meaning of --reverse-only and --search-only in + warm command + 4.3.0 * fix failing importance recalculation command * fix merging of linked names into unnamed boundaries diff --git a/SECURITY.md b/SECURITY.md index 2cb351ce..a14eba13 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -9,6 +9,7 @@ versions. | Version | End of support for security updates | | ------- | ----------------------------------- | +| 4.4.x | 2026-03-07 | | 4.3.x | 2025-09-07 | | 4.2.x | 2024-11-24 | | 4.1.x | 2024-08-05 | diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql index b802a660..9c31f556 100644 --- a/lib-sql/indices.sql +++ b/lib-sql/indices.sql @@ -21,6 +21,11 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}} WHERE parent_place_id IS NOT NULL; --- +-- Used to find postcode areas after a search in location_postcode. +CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas + ON placex USING BTREE (country_code, postcode) {{db.tablespace.search_index}} + WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code'; +--- CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; -- Index is needed during import but can be dropped as soon as a full diff --git a/module/CMakeLists.txt b/module/CMakeLists.txt index c8594298..01831f90 100644 --- a/module/CMakeLists.txt +++ b/module/CMakeLists.txt @@ -1,6 +1,6 @@ # just use the pgxs makefile -foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6") +foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6") list(APPEND PG_CONFIG_HINTS "/usr/pgsql-${suffix}/bin") endforeach() diff --git a/module/nominatim.c b/module/nominatim.c index 9d43c22f..54632f76 100644 --- a/module/nominatim.c +++ b/module/nominatim.c @@ -11,10 +11,12 @@ #include "mb/pg_wchar.h" #include -#ifdef PG_MODULE_MAGIC -PG_MODULE_MAGIC; +#if PG_MAJORVERSION_NUM > 15 +#include "varatt.h" #endif +PG_MODULE_MAGIC; + Datum transliteration( PG_FUNCTION_ARGS ); Datum gettokenstring( PG_FUNCTION_ARGS ); void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int); diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index f2b653f2..ef7a66b8 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -5,7 +5,7 @@ # Copyright (C) 2023 by the Nominatim developer community. # For a full list of authors see the git log. """ -Convertion from token assignment to an abstract DB search. +Conversion from token assignment to an abstract DB search. """ from typing import Optional, List, Tuple, Iterator, Dict import heapq diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index be883953..5a13061e 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -602,10 +602,24 @@ class PostcodeSearch(AbstractSearch): results = nres.SearchResults() for row in await conn.execute(sql, _details_to_bind_params(details)): - result = nres.create_from_postcode_row(row, nres.SearchResult) + p = conn.t.placex + placex_sql = _select_placex(p).add_columns(p.c.importance)\ + .where(sa.text("""class = 'boundary' + AND type = 'postal_code' + AND osm_type = 'R'"""))\ + .where(p.c.country_code == row.country_code)\ + .where(p.c.postcode == row.postcode)\ + .limit(1) + for prow in await conn.execute(placex_sql, _details_to_bind_params(details)): + result = nres.create_from_placex_row(prow, nres.SearchResult) + break + else: + result = nres.create_from_postcode_row(row, nres.SearchResult) + assert result - result.accuracy = row.accuracy - results.append(result) + if result.place_id not in details.excluded: + result.accuracy = row.accuracy + results.append(result) return results diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 38a5a5b5..2fd8b141 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -219,12 +219,11 @@ class SetupAll: """ Determine the database date and set the status accordingly. """ with connect(dsn) as conn: - if not offline: - try: - dbdate = status.compute_database_date(conn) - status.set_status(conn, dbdate) - LOG.info('Database is at %s.', dbdate) - except Exception as exc: # pylint: disable=broad-except - LOG.error('Cannot determine date of database: %s', exc) - properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION)) + + try: + dbdate = status.compute_database_date(conn, offline) + status.set_status(conn, dbdate) + LOG.info('Database is at %s.', dbdate) + except Exception as exc: # pylint: disable=broad-except + LOG.error('Cannot determine date of database: %s', exc) diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index 82801ae7..d6860836 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -239,7 +239,7 @@ _PG_CONNECTION_STRINGS = { def get_pg_env(dsn: str, base_env: Optional[SysEnv] = None) -> Dict[str, str]: """ Return a copy of `base_env` with the environment variables for - PostgresSQL set up from the given database connection string. + PostgreSQL set up from the given database connection string. If `base_env` is None, then the OS environment is used as a base environment. """ diff --git a/nominatim/db/status.py b/nominatim/db/status.py index 2c01de71..5f92d959 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -29,11 +29,24 @@ class StatusRow(TypedDict): indexed: Optional[bool] -def compute_database_date(conn: Connection) -> dt.datetime: +def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime: """ Determine the date of the database from the newest object in the data base. """ - # First, find the node with the highest ID in the database + # If there is a date from osm2pgsql available, use that. + if conn.table_exists('osm2pgsql_properties'): + with conn.cursor() as cur: + cur.execute(""" SELECT value FROM osm2pgsql_properties + WHERE property = 'current_timestamp' """) + row = cur.fetchone() + if row is not None: + return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\ + .replace(tzinfo=dt.timezone.utc) + + if offline: + raise UsageError("Cannot determine database date from data in offline mode.") + + # Else, find the node with the highest ID in the database with conn.cursor() as cur: if conn.table_exists('place'): osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'") diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index ffeb4958..e864ce52 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -382,3 +382,13 @@ def add_improved_geometry_reverse_placenode_index(conn: Connection, **_: Any) -> WHERE rank_address between 4 and 25 AND type != 'postcode' AND name is not null AND linked_place_id is null AND osm_type = 'N' """) + +@_migration(4, 4, 99, 0) +def create_postcode_ara_lookup_index(conn: Connection, **_: Any) -> None: + """ Create index needed for looking up postcode areas from postocde points. + """ + with conn.cursor() as cur: + cur.execute("""CREATE INDEX IF NOT EXISTS idx_placex_postcode_areas + ON placex USING BTREE (country_code, postcode) + WHERE osm_type = 'R' AND class = 'boundary' AND type = 'postal_code' + """) diff --git a/nominatim/version.py b/nominatim/version.py index 95420b34..76da1dbd 100644 --- a/nominatim/version.py +++ b/nominatim/version.py @@ -34,7 +34,7 @@ class NominatimVersion(NamedTuple): return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}" -NOMINATIM_VERSION = NominatimVersion(4, 3, 0, 0) +NOMINATIM_VERSION = NominatimVersion(4, 4, 99, 0) POSTGRESQL_REQUIRED_VERSION = (9, 6) POSTGIS_REQUIRED_VERSION = (2, 2) diff --git a/settings/env.defaults b/settings/env.defaults index 64a160c7..f4c33e77 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -123,9 +123,9 @@ NOMINATIM_TABLESPACE_ADDRESS_DATA= # Tablespace for indexes used during address computation. Used for import and update only. NOMINATIM_TABLESPACE_ADDRESS_INDEX= -# Tablespace for tables for auxilary data, e.g. TIGER data, postcodes. +# Tablespace for tables for auxiliary data, e.g. TIGER data, postcodes. NOMINATIM_TABLESPACE_AUX_DATA= -# Tablespace for indexes for auxilary data, e.g. TIGER data, postcodes. +# Tablespace for indexes for auxiliary data, e.g. TIGER data, postcodes. NOMINATIM_TABLESPACE_AUX_INDEX= @@ -183,7 +183,7 @@ NOMINATIM_SEARCH_BATCH_MODE=no # Threshold for searches by name only. # Threshold where the lookup strategy in the database is switched. If there -# are less occurences of a tem than given, the search does the lookup only +# are less occurrences of a tem than given, the search does the lookup only # against the name, otherwise it uses indexes for name and address. NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD=500 @@ -210,8 +210,8 @@ NOMINATIM_API_POOL_SIZE=10 NOMINATIM_QUERY_TIMEOUT=10 # Maximum time a single request is allowed to take. When the timeout is -# exceeeded, the available results are returned. -# When empty, then timouts are disabled. +# exceeded, the available results are returned. +# When empty, then timeouts are disabled. NOMINATIM_REQUEST_TIMEOUT=60 # Search elements just within countries diff --git a/test/bdd/api/details/simple.feature b/test/bdd/api/details/simple.feature index 99d34223..0e456aa5 100644 --- a/test/bdd/api/details/simple.feature +++ b/test/bdd/api/details/simple.feature @@ -3,14 +3,6 @@ Feature: Object details Check details page for correctness - Scenario: Details by place ID - When sending details query for 107077 - Then the result is valid json - And results contain - | place_id | - | 107077 | - - Scenario Outline: Details via OSM id When sending details query for Then the result is valid json diff --git a/test/bdd/db/query/postcodes.feature b/test/bdd/db/query/postcodes.feature index 78a26a90..9f024959 100644 --- a/test/bdd/db/query/postcodes.feature +++ b/test/bdd/db/query/postcodes.feature @@ -95,3 +95,21 @@ Feature: Querying fo postcode variants | type | display_name | | postcode | E4 7EA, United Kingdom | + + @fail-legacy + @v1-api-python-only + Scenario: Postcode areas are preferred over postcode points + Given the grid with origin DE + | 1 | 2 | + | 4 | 3 | + Given the places + | osm | class | type | postcode | geometry | + | R23 | boundary | postal_code | 12345 | (1,2,3,4,1) | + When importing + Then location_postcode contains exactly + | country | postcode | + | de | 12345 | + When sending search query "12345, de" + Then results contain + | osm | + | R23 | diff --git a/test/python/db/test_status.py b/test/python/db/test_status.py index 0cb12e02..05fb2c7f 100644 --- a/test/python/db/test_status.py +++ b/test/python/db/test_status.py @@ -31,6 +31,22 @@ def setup_status_table(status_table): pass +@pytest.mark.parametrize('offline', [True, False]) +def test_compute_database_date_from_osm2pgsql(table_factory, temp_db_conn, offline): + table_factory('osm2pgsql_properties', 'property TEXT, value TEXT', + content=(('current_timestamp', '2024-01-03T23:45:54Z'), )) + + date = nominatim.db.status.compute_database_date(temp_db_conn, offline=offline) + assert date == iso_date('2024-01-03T23:45:54') + + +def test_compute_database_date_from_osm2pgsql_nodata(table_factory, temp_db_conn): + table_factory('osm2pgsql_properties', 'property TEXT, value TEXT') + + with pytest.raises(UsageError, match='Cannot determine database date from data in offline mode'): + nominatim.db.status.compute_database_date(temp_db_conn, offline=True) + + def test_compute_database_date_place_empty(place_table, temp_db_conn): with pytest.raises(UsageError): nominatim.db.status.compute_database_date(temp_db_conn)