Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
diff --git a/SECURITY.md b/SECURITY.md

index f6215f64ec479545270eca1c48d8f5fa90fe88aa..2cb351ce6011c5102e943059ad595b921e452d07 100644 (file)
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -12,7 +12,6 @@ versions.
  | 4.3.x   | 2025-09-07                          |
  | 4.2.x   | 2024-11-24                          |
  | 4.1.x   | 2024-08-05                          |
-| 4.0.x   | 2023-11-02                          |
  
  ## Reporting a Vulnerability
  
@@ -36,5 +35,6 @@ incident. Announcements will also be published at the
  
  ## List of Previous Incidents
  
-* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
+* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
  * 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
diff --git a/docs/library/Getting-Started.md b/docs/library/Getting-Started.md

index 6b0dad75c1563e2083849db51dc8b51d5b81c3b6..dd16b11dee09bf21aff8bb7f8c2db8ab853670ac 100644 (file)
--- a/docs/library/Getting-Started.md
+++ b/docs/library/Getting-Started.md
@@ -14,7 +14,7 @@ in the database.
  
      The library also misses a proper installation routine, so some manipulation
      of the PYTHONPATH is required. At the moment, use is only recommended for
-    developers wit some experience in Python.
+    developers with some experience in Python.
  
  ## Installation
  
diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql

index a32bfe710419e97bb65ba951b17b29502450fd87..cba11dbf3400d7fcc45a74f9c7fb85ed1a2f4c69 100644 (file)
--- a/lib-sql/functions/address_lookup.sql
+++ b/lib-sql/functions/address_lookup.sql
@@ -262,7 +262,7 @@ BEGIN
        -- If the place had a postcode assigned, take this one only
        -- into consideration when it is an area and the place does not have
        -- a postcode itself.
-      IF location.fromarea AND location.isaddress
+      IF location.fromarea AND location_isaddress
           AND (place.address is null or not place.address ? 'postcode')
        THEN
          place.postcode := null; -- remove the less exact postcode
diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py

index 5b6d0e4dbbc03573b643e88cba43a871d7bd038a..37ae7f5f04464241ad0e81062b56d125555cadff 100644 (file)
--- a/nominatim/api/logging.py
+++ b/nominatim/api/logging.py
@@ -235,6 +235,10 @@ class TextLogger(BaseLogger):
          self.buffer = io.StringIO()
  
  
+    def _timestamp(self) -> None:
+        self._write(f'[{dt.datetime.now()}]\n')
+
+
      def get_buffer(self) -> str:
          return self.buffer.getvalue()
  
@@ -247,6 +251,7 @@ class TextLogger(BaseLogger):
  
  
      def section(self, heading: str) -> None:
+        self._timestamp()
          self._write(f"\n# {heading}\n\n")
  
  
@@ -283,6 +288,7 @@ class TextLogger(BaseLogger):
  
  
      def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        self._timestamp()
          self._write(f'{heading}:\n')
          total = 0
          for rank, res in results:
@@ -298,6 +304,7 @@ class TextLogger(BaseLogger):
  
      def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
              params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+        self._timestamp()
          sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
          self._write(f"| {sqlstr}\n\n")
  
diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py

index 97c4292e56710ea22fe3d10fd48e9dc6bb8d04c5..41434f062e1bcf26eb4fb6a7cf0f58020d887045 100644 (file)
--- a/nominatim/api/search/db_searches.py
+++ b/nominatim/api/search/db_searches.py
@@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry
  #pylint: disable=singleton-comparison,not-callable
  #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
  
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
  def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
      """ Create a dictionary from search parameters that can be used
          as bind parameter for SQL execute.
@@ -107,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
  def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
      orexpr: List[SaExpression] = []
      if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
-        orexpr.append(table.c.rank_address.between(1, 30))
+        orexpr.append(no_index(table.c.rank_address).between(1, 30))
      elif layers & DataLayer.ADDRESS:
-        orexpr.append(table.c.rank_address.between(1, 29))
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(no_index(table.c.rank_address).between(1, 29))
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                                sa.or_(table.c.housenumber != None,
                                       table.c.address.has_key('addr:housename'))))
      elif layers & DataLayer.POI:
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                                table.c.class_.not_in(('place', 'building'))))
  
      if layers & DataLayer.MANMADE:
@@ -124,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
          if not layers & DataLayer.NATURAL:
              exclude.extend(('natural', 'water', 'waterway'))
          orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
      else:
          include = []
          if layers & DataLayer.RAILWAY:
@@ -132,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
          if layers & DataLayer.NATURAL:
              include.extend(('natural', 'water', 'waterway'))
          orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
  
      if len(orexpr) == 1:
          return orexpr[0]
@@ -295,7 +302,7 @@ class NearSearch(AbstractSearch):
                                         else_ = tgeom.c.centroid.ST_Expand(0.05))))\
                       .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
  
-        sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
+        sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
          if details.countries:
              sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
          if details.excluded:
diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py

index d2cdd96e16432d62f2b02d0fcbe390e060d905e1..14203e0081eb1df470025f6285b7b46896223123 100644 (file)
--- a/nominatim/api/search/icu_tokenizer.py
+++ b/nominatim/api/search/icu_tokenizer.py
@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
          penalty = 0.0
          if row.type == 'w':
              penalty = 0.3
+        elif row.type == 'W':
+            if len(row.word_token) == 1 and row.word_token == row.word:
+                penalty = 0.2 if row.word.isdigit() else 0.3
          elif row.type == 'H':
              penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
              if all(not c.isdigit() for c in row.word_token):
                  penalty += 0.2 * (len(row.word_token) - 1)
+        elif row.type == 'C':
+            if len(row.word_token) == 1:
+                penalty = 0.3
  
          if row.info is None:
              lookup_word = row.word
diff --git a/nominatim/api/types.py b/nominatim/api/types.py

index 3ca023e7bbe1c7dce2a82a8b510c248a029d8721..5767fe1604a7d31e5b1adffdf3699adf8420a439 100644 (file)
--- a/nominatim/api/types.py
+++ b/nominatim/api/types.py
@@ -538,7 +538,9 @@ class SearchDetails(LookupDetails):
                  or (self.bounded_viewbox
                      and self.viewbox is not None and self.near is not None
                      and self.viewbox.contains(self.near))
-                or self.layers is not None and not self.layers)
+                or (self.layers is not None and not self.layers)
+                or (self.max_rank <= 4 and
+                    self.layers is not None and not self.layers & DataLayer.ADDRESS))
  
  
      def layer_enabled(self, layer: DataLayer) -> bool:
diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py

index a64cbfaf99102ac0b24b56e609ad6462999b229e..fce897bc7250c814d6679188a5c6281a8ac3790c 100644 (file)
--- a/nominatim/db/connection.py
+++ b/nominatim/db/connection.py
@@ -31,7 +31,7 @@ class Cursor(psycopg2.extras.DictCursor):
          """ Query execution that logs the SQL query when debugging is enabled.
          """
          if LOG.isEnabledFor(logging.DEBUG):
-            LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[arg-type]
+            LOG.debug(self.mogrify(query, args).decode('utf-8'))
  
          super().execute(query, args)
  
diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py

index d9154ed90160b5b0a4c01e72fc6df5de4a9002cf..e3f0712a11b6f53551bc90e4734798e441f33142 100644 (file)
--- a/nominatim/db/utils.py
+++ b/nominatim/db/utils.py
@@ -118,4 +118,4 @@ class CopyBuffer:
          """
          if self.buffer.tell() > 0:
              self.buffer.seek(0)
-            cur.copy_from(self.buffer, table, columns=columns) # type: ignore[arg-type]
+            cur.copy_from(self.buffer, table, columns=columns)
diff --git a/nominatim/tools/collect_os_info.py b/nominatim/tools/collect_os_info.py

index 29e1cd535672c768daaecc6151a343166a29a522..c8fda908c731324e28eb074e9fc0a31bd99f574e 100644 (file)
--- a/nominatim/tools/collect_os_info.py
+++ b/nominatim/tools/collect_os_info.py
@@ -12,14 +12,13 @@ import os
  import subprocess
  import sys
  from pathlib import Path
-from typing import List, Optional, Tuple, Union, cast
+from typing import List, Optional, Tuple, Union
  
  import psutil
  from psycopg2.extensions import make_dsn, parse_dsn
  
  from nominatim.config import Configuration
  from nominatim.db.connection import connect
-from nominatim.typing import DictCursorResults
  from nominatim.version import NOMINATIM_VERSION
  
  
@@ -107,15 +106,15 @@ def report_system_information(config: Configuration) -> None:
          postgresql_ver: str = convert_version(conn.server_version_tuple())
  
          with conn.cursor() as cur:
-            cur.execute(f"""
-            SELECT datname FROM pg_catalog.pg_database 
-            WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
-            nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
-            if nominatim_db_exists:
-                with connect(config.get_libpq_dsn()) as conn:
-                    postgis_ver: str = convert_version(conn.postgis_version_tuple())
-            else:
-                postgis_ver = "Unable to connect to database"
+            num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
+                             (parse_dsn(config.get_libpq_dsn())['dbname'], ))
+            nominatim_db_exists = num == 1 if isinstance(num, int) else False
+
+    if nominatim_db_exists:
+        with connect(config.get_libpq_dsn()) as conn:
+            postgis_ver: str = convert_version(conn.postgis_version_tuple())
+    else:
+        postgis_ver = "Unable to connect to database"
  
      postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
author	Sarah Hoffmann <lonvia@denofr.de>
	Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
SECURITY.md		patch \| blob \| history
docs/library/Getting-Started.md		patch \| blob \| history
lib-sql/functions/address_lookup.sql		patch \| blob \| history
nominatim/api/logging.py		patch \| blob \| history
nominatim/api/search/db_searches.py		patch \| blob \| history
nominatim/api/search/icu_tokenizer.py		patch \| blob \| history
nominatim/api/types.py		patch \| blob \| history
nominatim/db/connection.py		patch \| blob \| history
nominatim/db/utils.py		patch \| blob \| history
nominatim/tools/collect_os_info.py		patch \| blob \| history