From b93ec2522e25102e8a36ca78423a53b4579c68c2 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 23 Feb 2021 19:05:51 +0100 Subject: [PATCH] use psql for executing sql files This allows to run larger files without needing to keep them in memory. --- nominatim/db/utils.py | 37 +++++++++++++++++++++++++++++------- nominatim/tools/refresh.py | 8 ++++---- test/python/test_db_utils.py | 33 +++++++++++++++++++++----------- 3 files changed, 56 insertions(+), 22 deletions(-) diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index abd72519..1a104e51 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -1,12 +1,35 @@ """ Helper functions for handling DB accesses. """ +import subprocess +import logging -def execute_file(conn, fname): - """ Read an SQL file and run its contents against the given connection. +from .connection import get_pg_env +from ..errors import UsageError + +LOG = logging.getLogger() + +def execute_file(dsn, fname, ignore_errors=False): + """ Read an SQL file and run its contents against the given database + using psql. """ - with fname.open('r') as fdesc: - sql = fdesc.read() - with conn.cursor() as cur: - cur.execute(sql) - conn.commit() + cmd = ['psql'] + if not ignore_errors: + cmd.extend(('-v', 'ON_ERROR_STOP=1')) + proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) + + if not LOG.isEnabledFor(logging.INFO): + proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8')) + + with fname.open('rb') as fdesc: + chunk = fdesc.read(2048) + while chunk and proc.poll() is None: + proc.stdin.write(chunk) + chunk = fdesc.read(2048) + + proc.stdin.close() + + ret = proc.wait() + print(ret, chunk) + if ret != 0 or chunk: + raise UsageError("Failed to execute SQL file.") diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index f09c0ced..33efe8f8 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -12,17 +12,17 @@ from ..db.utils import execute_file LOG = logging.getLogger() -def update_postcodes(conn, sql_dir): +def update_postcodes(dsn, sql_dir): """ Recalculate postcode centroids and add, remove and update entries in the location_postcode table. `conn` is an opne connection to the database. """ - execute_file(conn, sql_dir / 'update-postcodes.sql') + execute_file(dsn, sql_dir / 'update-postcodes.sql') -def recompute_word_counts(conn, sql_dir): +def recompute_word_counts(dsn, sql_dir): """ Compute the frequency of full-word search terms. """ - execute_file(conn, sql_dir / 'words_from_search_name.sql') + execute_file(dsn, sql_dir / 'words_from_search_name.sql') def _add_address_level_rows_from_entry(rows, entry): diff --git a/test/python/test_db_utils.py b/test/python/test_db_utils.py index e756f2c4..b2586ed0 100644 --- a/test/python/test_db_utils.py +++ b/test/python/test_db_utils.py @@ -5,26 +5,37 @@ import psycopg2 import pytest import nominatim.db.utils as db_utils +from nominatim.errors import UsageError -def test_execute_file_success(temp_db_conn, tmp_path): +@pytest.fixture +def dsn(temp_db): + return 'dbname=' + temp_db + +def test_execute_file_success(dsn, temp_db_cursor, tmp_path): tmpfile = tmp_path / 'test.sql' tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') - db_utils.execute_file(temp_db_conn, tmpfile) + db_utils.execute_file(dsn, tmpfile) - with temp_db_conn.cursor() as cur: - cur.execute('SELECT * FROM test') + temp_db_cursor.execute('SELECT * FROM test') - assert cur.rowcount == 1 - assert cur.fetchone()[0] == 56 + assert temp_db_cursor.rowcount == 1 + assert temp_db_cursor.fetchone()[0] == 56 -def test_execute_file_bad_file(temp_db_conn, tmp_path): +def test_execute_file_bad_file(dsn, tmp_path): with pytest.raises(FileNotFoundError): - db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql') + db_utils.execute_file(dsn, tmp_path / 'test2.sql') + +def test_execute_file_bad_sql(dsn, tmp_path): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('CREATE STABLE test (id INT)') + + with pytest.raises(UsageError): + db_utils.execute_file(dsn, tmpfile) + -def test_execute_file_bad_sql(temp_db_conn, tmp_path): +def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path): tmpfile = tmp_path / 'test.sql' tmpfile.write_text('CREATE STABLE test (id INT)') - with pytest.raises(psycopg2.ProgrammingError): - db_utils.execute_file(temp_db_conn, tmpfile) + db_utils.execute_file(dsn, tmpfile, ignore_errors=True) -- 2.39.5