From: Sarah Hoffmann Date: Thu, 23 Nov 2023 11:06:17 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~35 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/8c82f6ceb3b77ebadc0bfc08422652902fc437a8?hp=1d051e813a1f7604feaea563ad3209a192780df9 Merge remote-tracking branch 'upstream/master' --- diff --git a/SECURITY.md b/SECURITY.md index f6215f64..2cb351ce 100644 --- a/SECURITY.md +++ b/SECURITY.md @@ -12,7 +12,6 @@ versions. | 4.3.x | 2025-09-07 | | 4.2.x | 2024-11-24 | | 4.1.x | 2024-08-05 | -| 4.0.x | 2023-11-02 | ## Reporting a Vulnerability @@ -36,5 +35,6 @@ incident. Announcements will also be published at the ## List of Previous Incidents -* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html) +* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html) * 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html) +* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html) diff --git a/docs/library/Getting-Started.md b/docs/library/Getting-Started.md index 6b0dad75..dd16b11d 100644 --- a/docs/library/Getting-Started.md +++ b/docs/library/Getting-Started.md @@ -14,7 +14,7 @@ in the database. The library also misses a proper installation routine, so some manipulation of the PYTHONPATH is required. At the moment, use is only recommended for - developers wit some experience in Python. + developers with some experience in Python. ## Installation diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql index a32bfe71..cba11dbf 100644 --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@ -262,7 +262,7 @@ BEGIN -- If the place had a postcode assigned, take this one only -- into consideration when it is an area and the place does not have -- a postcode itself. - IF location.fromarea AND location.isaddress + IF location.fromarea AND location_isaddress AND (place.address is null or not place.address ? 'postcode') THEN place.postcode := null; -- remove the less exact postcode diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py index 5b6d0e4d..37ae7f5f 100644 --- a/nominatim/api/logging.py +++ b/nominatim/api/logging.py @@ -235,6 +235,10 @@ class TextLogger(BaseLogger): self.buffer = io.StringIO() + def _timestamp(self) -> None: + self._write(f'[{dt.datetime.now()}]\n') + + def get_buffer(self) -> str: return self.buffer.getvalue() @@ -247,6 +251,7 @@ class TextLogger(BaseLogger): def section(self, heading: str) -> None: + self._timestamp() self._write(f"\n# {heading}\n\n") @@ -283,6 +288,7 @@ class TextLogger(BaseLogger): def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None: + self._timestamp() self._write(f'{heading}:\n') total = 0 for rank, res in results: @@ -298,6 +304,7 @@ class TextLogger(BaseLogger): def sql(self, conn: AsyncConnection, statement: 'sa.Executable', params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None: + self._timestamp() sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78)) self._write(f"| {sqlstr}\n\n") diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 97c4292e..41434f06 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + + def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: """ Create a dictionary from search parameters that can be used as bind parameter for SQL execute. @@ -107,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: orexpr: List[SaExpression] = [] if layers & DataLayer.ADDRESS and layers & DataLayer.POI: - orexpr.append(table.c.rank_address.between(1, 30)) + orexpr.append(no_index(table.c.rank_address).between(1, 30)) elif layers & DataLayer.ADDRESS: - orexpr.append(table.c.rank_address.between(1, 29)) - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(no_index(table.c.rank_address).between(1, 29)) + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, sa.or_(table.c.housenumber != None, table.c.address.has_key('addr:housename')))) elif layers & DataLayer.POI: - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, table.c.class_.not_in(('place', 'building')))) if layers & DataLayer.MANMADE: @@ -124,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if not layers & DataLayer.NATURAL: exclude.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) else: include = [] if layers & DataLayer.RAILWAY: @@ -132,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if layers & DataLayer.NATURAL: include.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.in_(tuple(include)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) if len(orexpr) == 1: return orexpr[0] @@ -295,7 +302,7 @@ class NearSearch(AbstractSearch): else_ = tgeom.c.centroid.ST_Expand(0.05))))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) - sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) + sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index d2cdd96e..14203e00 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -101,10 +101,16 @@ class ICUToken(qmod.Token): penalty = 0.0 if row.type == 'w': penalty = 0.3 + elif row.type == 'W': + if len(row.word_token) == 1 and row.word_token == row.word: + penalty = 0.2 if row.word.isdigit() else 0.3 elif row.type == 'H': penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit()) if all(not c.isdigit() for c in row.word_token): penalty += 0.2 * (len(row.word_token) - 1) + elif row.type == 'C': + if len(row.word_token) == 1: + penalty = 0.3 if row.info is None: lookup_word = row.word diff --git a/nominatim/api/types.py b/nominatim/api/types.py index 3ca023e7..5767fe16 100644 --- a/nominatim/api/types.py +++ b/nominatim/api/types.py @@ -538,7 +538,9 @@ class SearchDetails(LookupDetails): or (self.bounded_viewbox and self.viewbox is not None and self.near is not None and self.viewbox.contains(self.near)) - or self.layers is not None and not self.layers) + or (self.layers is not None and not self.layers) + or (self.max_rank <= 4 and + self.layers is not None and not self.layers & DataLayer.ADDRESS)) def layer_enabled(self, layer: DataLayer) -> bool: diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index a64cbfaf..fce897bc 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -31,7 +31,7 @@ class Cursor(psycopg2.extras.DictCursor): """ Query execution that logs the SQL query when debugging is enabled. """ if LOG.isEnabledFor(logging.DEBUG): - LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[arg-type] + LOG.debug(self.mogrify(query, args).decode('utf-8')) super().execute(query, args) diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index d9154ed9..e3f0712a 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -118,4 +118,4 @@ class CopyBuffer: """ if self.buffer.tell() > 0: self.buffer.seek(0) - cur.copy_from(self.buffer, table, columns=columns) # type: ignore[arg-type] + cur.copy_from(self.buffer, table, columns=columns) diff --git a/nominatim/tools/collect_os_info.py b/nominatim/tools/collect_os_info.py index 29e1cd53..c8fda908 100644 --- a/nominatim/tools/collect_os_info.py +++ b/nominatim/tools/collect_os_info.py @@ -12,14 +12,13 @@ import os import subprocess import sys from pathlib import Path -from typing import List, Optional, Tuple, Union, cast +from typing import List, Optional, Tuple, Union import psutil from psycopg2.extensions import make_dsn, parse_dsn from nominatim.config import Configuration from nominatim.db.connection import connect -from nominatim.typing import DictCursorResults from nominatim.version import NOMINATIM_VERSION @@ -107,15 +106,15 @@ def report_system_information(config: Configuration) -> None: postgresql_ver: str = convert_version(conn.server_version_tuple()) with conn.cursor() as cur: - cur.execute(f""" - SELECT datname FROM pg_catalog.pg_database - WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""") - nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall()) - if nominatim_db_exists: - with connect(config.get_libpq_dsn()) as conn: - postgis_ver: str = convert_version(conn.postgis_version_tuple()) - else: - postgis_ver = "Unable to connect to database" + num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s", + (parse_dsn(config.get_libpq_dsn())['dbname'], )) + nominatim_db_exists = num == 1 if isinstance(num, int) else False + + if nominatim_db_exists: + with connect(config.get_libpq_dsn()) as conn: + postgis_ver: str = convert_version(conn.postgis_version_tuple()) + else: + postgis_ver = "Unable to connect to database" postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))