]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3356 from lonvia/use-date-from-osm2pgsql-prop
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 5 Mar 2024 14:32:16 +0000 (15:32 +0100)
committerGitHub <noreply@github.com>
Tue, 5 Mar 2024 14:32:16 +0000 (15:32 +0100)
Use import date from osm2pgsql property table if available

.codespellrc [new file with mode: 0644]
.github/workflows/ci-tests.yml
lib-sql/functions/placex_triggers.sql
lib-sql/indices.sql
nominatim/api/results.py
nominatim/api/search/db_search_builder.py
nominatim/db/connection.py
settings/env.defaults

diff --git a/.codespellrc b/.codespellrc
new file mode 100644 (file)
index 0000000..332bce5
--- /dev/null
@@ -0,0 +1,7 @@
+# https://github.com/codespell-project/codespell
+
+[codespell]
+skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
+# Need to be lowercase in the list
+# Unter = Unter den Linden (an example address)
+ignore-words-list = inout,unter
index 910114d7e5cf5e9e0a16d46949b14bbc2f8b5bd9..d1079375df0821ce3fc8b16e6bf0e5f0bf1547d9 100644 (file)
@@ -386,3 +386,10 @@ jobs:
 
           - name: Check full import
             run: nominatim admin --check-database
+
+    codespell:
+      runs-on: ubuntu-latest
+      steps:
+          - uses: codespell-project/actions-codespell@v2
+            with:
+                only_warn: 1
index 386140f45c00d0f6fae651ccd970c19496ae78e7..0f74336fbc7d2312140219fb30ad547b7c9267cf 100644 (file)
@@ -1265,6 +1265,8 @@ BEGIN
     END IF;
   ELSEIF NEW.rank_address > 25 THEN
     max_rank := 25;
+  ELSEIF NEW.class in ('place','boundary') and NEW.type in ('postcode','postal_code') THEN
+    max_rank := NEW.rank_search;
   ELSE
     max_rank := NEW.rank_address;
   END IF;
index ed078895ee8901473ac0f613b97e6d9cabe8c88e..b802a660e7c31446c5a62483707de1e4f771228f 100644 (file)
@@ -23,6 +23,10 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
 ---
 CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
   USING GIST (geometry) {{db.tablespace.search_index}};
+-- Index is needed during import but can be dropped as soon as a full
+-- geometry index is in place. The partial index is almost as big as the full
+-- index.
+DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
 ---
 CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
   ON placex USING gist (geometry) {{db.tablespace.search_index}}
index d66da6c6f5c1df705075989136da50a23c214972..47fb85114634de804f36fa569dc79986894dccc4 100644 (file)
@@ -11,7 +11,7 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
 import enum
 import dataclasses
 import datetime as dt
@@ -501,15 +501,17 @@ def _get_address_lookup_id(result: BaseResultT) -> int:
 
 async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
     assert result.address_rows is not None
-    postcode = result.postcode
-    if not postcode and result.address:
-        postcode = result.address.get('postcode')
-    if postcode and ',' not in postcode and ';' not in postcode:
-        result.address_rows.append(AddressLine(
-            category=('place', 'postcode'),
-            names={'ref': postcode},
-            fromarea=False, isaddress=True, rank_address=5,
-            distance=0.0))
+    if result.category[0] not in ('boundary', 'place')\
+       or result.category[1] not in ('postal_code', 'postcode'):
+        postcode = result.postcode
+        if not postcode and result.address:
+            postcode = result.address.get('postcode')
+        if postcode and ',' not in postcode and ';' not in postcode:
+            result.address_rows.append(AddressLine(
+                category=('place', 'postcode'),
+                names={'ref': postcode},
+                fromarea=False, isaddress=True, rank_address=5,
+                distance=0.0))
     if result.country_code:
         async def _get_country_names() -> Optional[Dict[str, str]]:
             t = conn.t.country_name
@@ -551,7 +553,7 @@ def _setup_address_details(result: BaseResultT) -> None:
             extratags=result.extratags or {},
             admin_level=result.admin_level,
             fromarea=True, isaddress=True,
-            rank_address=result.rank_address or 100, distance=0.0))
+            rank_address=result.rank_address, distance=0.0))
     if result.source_table == SourceTable.PLACEX and result.address:
         housenumber = result.address.get('housenumber')\
                       or result.address.get('streetnumber')\
@@ -676,9 +678,12 @@ async def complete_address_details(conn: SearchConnection, results: List[BaseRes
                     rank_address=row.rank_address, distance=0.0))
 
     ### Now sort everything
+    def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
+        return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
+
     for result in results:
         assert result.address_rows is not None
-        result.address_rows.sort(key=lambda a: (-a.rank_address, a.isaddress))
+        result.address_rows.sort(key=mk_sort_key(result.place_id))
 
 
 def _placex_select_address_row(conn: SearchConnection,
index f2b653f2c2def3c3657abdc9159c3be66d215d5f..ef7a66b8507387630c6d0aacc5bfb2b67a08b566 100644 (file)
@@ -5,7 +5,7 @@
 # Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
-Convertion from token assignment to an abstract DB search.
+Conversion from token assignment to an abstract DB search.
 """
 from typing import Optional, List, Tuple, Iterator, Dict
 import heapq
index 82801ae7995c9d1e5527baec0d9dd89c85e70e4d..d686083697f03d03bec86ed061e509884a9905b2 100644 (file)
@@ -239,7 +239,7 @@ _PG_CONNECTION_STRINGS = {
 def get_pg_env(dsn: str,
                base_env: Optional[SysEnv] = None) -> Dict[str, str]:
     """ Return a copy of `base_env` with the environment variables for
-        PostgresSQL set up from the given database connection string.
+        PostgreSQL set up from the given database connection string.
         If `base_env` is None, then the OS environment is used as a base
         environment.
     """
index 64a160c7197b9c7593a10670c439d66de3c7fab1..f4c33e7720abda310c95fd5012a4aaf5000a5baa 100644 (file)
@@ -123,9 +123,9 @@ NOMINATIM_TABLESPACE_ADDRESS_DATA=
 # Tablespace for indexes used during address computation. Used for import and update only.
 NOMINATIM_TABLESPACE_ADDRESS_INDEX=
 
-# Tablespace for tables for auxilary data, e.g. TIGER data, postcodes.
+# Tablespace for tables for auxiliary data, e.g. TIGER data, postcodes.
 NOMINATIM_TABLESPACE_AUX_DATA=
-# Tablespace for indexes for auxilary data, e.g. TIGER data, postcodes.
+# Tablespace for indexes for auxiliary data, e.g. TIGER data, postcodes.
 NOMINATIM_TABLESPACE_AUX_INDEX=
 
 
@@ -183,7 +183,7 @@ NOMINATIM_SEARCH_BATCH_MODE=no
 
 # Threshold for searches by name only.
 # Threshold where the lookup strategy in the database is switched. If there
-# are less occurences of a tem than given, the search does the lookup only
+# are less occurrences of a tem than given, the search does the lookup only
 # against the name, otherwise it uses indexes for name and address.
 NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD=500
 
@@ -210,8 +210,8 @@ NOMINATIM_API_POOL_SIZE=10
 NOMINATIM_QUERY_TIMEOUT=10
 
 # Maximum time a single request is allowed to take. When the timeout is
-# exceeeded, the available results are returned.
-# When empty, then timouts are disabled.
+# exceeded, the available results are returned.
+# When empty, then timeouts are disabled.
 NOMINATIM_REQUEST_TIMEOUT=60
 
 # Search elements just within countries