From: Sarah Hoffmann Date: Fri, 22 Jan 2021 22:25:37 +0000 (+0100) Subject: move update code for postcode and word count to Python X-Git-Tag: v3.7.0~46^2~11 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/e6c2842b66c607400a0a95b7b8e8de8cd5b12d51?ds=sidebyside move update code for postcode and word count to Python Adds also tests for the new function to execute a SQL script. --- diff --git a/lib/admin/update.php b/lib/admin/update.php index fe9658b5..48609c3e 100644 --- a/lib/admin/update.php +++ b/lib/admin/update.php @@ -104,14 +104,12 @@ if ($fPostgresVersion >= 11.0) { ); } - -$oIndexCmd = (new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) - ->addParams('index'); +$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); if ($aResult['quiet']) { - $oIndexCmd->addParams('--quiet'); + $oNominatimCmd->addParams('--quiet'); } if ($aResult['verbose']) { - $oIndexCmd->addParams('--verbose'); + $oNominatimCmd->addParams('--verbose'); } $sPyosmiumBin = getSetting('PYOSMIUM_BINARY'); @@ -220,9 +218,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { } if ($aResult['calculate-postcodes']) { - info('Update postcodes centroids'); - $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql'); - runSQLScript($sTemplate, true, true); + (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run(); } $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; @@ -271,15 +267,11 @@ if ($bHaveDiff) { } if ($aResult['recompute-word-counts']) { - info('Recompute frequency of full-word search terms'); - $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql'); - runSQLScript($sTemplate, true, true); + (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run(); } if ($aResult['index']) { - $oCmd = (clone $oIndexCmd) - ->addParams('--minrank', $aResult['index-rank']); - $oCmd->run(); + (clone $oNominatimCmd)->addParams('index', '--minrank', $aResult['index-rank'])->run(); } if ($aResult['update-address-levels']) { @@ -421,7 +413,8 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { if (!$aResult['no-index']) { $fCMDStartTime = time(); - $oThisIndexCmd = clone($oIndexCmd); + $oThisIndexCmd = clone($oNominatimCmd); + $oThisIndexCmd->addParams('index'); echo $oThisIndexCmd->escapedCmd()."\n"; $iErrorLevel = $oThisIndexCmd->run(); if ($iErrorLevel) { diff --git a/nominatim/cli.py b/nominatim/cli.py index c558eb84..4388902d 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -8,9 +8,13 @@ import argparse import logging from pathlib import Path +import psycopg2 + from .config import Configuration from .tools.exec_utils import run_legacy_script, run_api_script +LOG = logging.getLogger() + def _num_system_cpus(): try: cpus = len(os.sched_getaffinity(0)) @@ -366,32 +370,35 @@ class UpdateRefresh: @staticmethod def run(args): - if args.postcodes: - run_legacy_script('update.php', '--calculate-postcodes', - nominatim_env=args, throw_on_fail=True) - if args.word_counts: - run_legacy_script('update.php', '--recompute-word-counts', - nominatim_env=args, throw_on_fail=True) - if args.address_levels: - run_legacy_script('update.php', '--update-address-levels', - nominatim_env=args, throw_on_fail=True) - if args.functions: - params = ['setup.php', '--create-functions', '--create-partition-functions'] - if args.diffs: - params.append('--enable-diff-updates') - if args.enable_debug_statements: - params.append('--enable-debug-statements') - run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) - if args.wiki_data: - run_legacy_script('setup.php', '--import-wikipedia-articles', - nominatim_env=args, throw_on_fail=True) - # Attention: importance MUST come after wiki data import. - if args.importance: - run_legacy_script('update.php', '--recompute-importance', - nominatim_env=args, throw_on_fail=True) - if args.website: - run_legacy_script('setup.php', '--setup-website', - nominatim_env=args, throw_on_fail=True) + import nominatim.tools.refresh + + with psycopg2.connect(args.config.get_libpq_dsn()) as conn: + if args.postcodes: + LOG.warning("Update postcodes centroid") + nominatim.tools.refresh.update_postcodes(conn, args.data_dir) + if args.word_counts: + LOG.warning('Recompute frequency of full-word search terms') + nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir) + if args.address_levels: + run_legacy_script('update.php', '--update-address-levels', + nominatim_env=args, throw_on_fail=True) + if args.functions: + params = ['setup.php', '--create-functions', '--create-partition-functions'] + if args.diffs: + params.append('--enable-diff-updates') + if args.enable_debug_statements: + params.append('--enable-debug-statements') + run_legacy_script(*params, nominatim_env=args, throw_on_fail=True) + if args.wiki_data: + run_legacy_script('setup.php', '--import-wikipedia-articles', + nominatim_env=args, throw_on_fail=True) + # Attention: importance MUST come after wiki data import. + if args.importance: + run_legacy_script('update.php', '--recompute-importance', + nominatim_env=args, throw_on_fail=True) + if args.website: + run_legacy_script('setup.php', '--setup-website', + nominatim_env=args, throw_on_fail=True) return 0 diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py new file mode 100644 index 00000000..1a39746e --- /dev/null +++ b/nominatim/db/utils.py @@ -0,0 +1,11 @@ +""" +Helper functions for handling DB accesses. +""" + +def execute_file(conn, fname): + """ Read an SQL file and run its contents against the given connection. + """ + with fname.open('r') as fdesc: + sql = fdesc.read() + with conn.cursor() as cur: + cur.execute(sql) diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py new file mode 100644 index 00000000..859b5646 --- /dev/null +++ b/nominatim/tools/refresh.py @@ -0,0 +1,16 @@ +""" +Functions for bringing auxiliary data in the database up-to-date. +""" +from ..db.utils import execute_file + +def update_postcodes(conn, datadir): + """ Recalculate postcode centroids and add, remove and update entries in the + location_postcode table. `conn` is an opne connection to the database. + """ + execute_file(conn, datadir / 'sql' / 'update-postcodes.sql') + + +def recompute_word_counts(conn, datadir): + """ Compute the frequency of full-word search terms. + """ + execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql') diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 9ac62973..33c65ade 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -6,6 +6,7 @@ import pytest import nominatim.cli import nominatim.indexer.indexer +import nominatim.tools.refresh def call_nominatim(*args): return nominatim.cli.nominatim(module_dir='build/module', @@ -99,21 +100,30 @@ def test_index_command(monkeypatch, temp_db, params, do_bnds, do_ranks): @pytest.mark.parametrize("command,params", [ - ('postcodes', ('update.php', '--calculate-postcodes')), - ('word-counts', ('update.php', '--recompute-word-counts')), ('address-levels', ('update.php', '--update-address-levels')), ('functions', ('setup.php',)), ('wiki-data', ('setup.php', '--import-wikipedia-articles')), ('importance', ('update.php', '--recompute-importance')), ('website', ('setup.php', '--setup-website')), ]) -def test_refresh_command(mock_run_legacy, command, params): +def test_refresh_legacy_command(mock_run_legacy, command, params): assert 0 == call_nominatim('refresh', '--' + command) assert mock_run_legacy.called == 1 assert len(mock_run_legacy.last_args) >= len(params) assert mock_run_legacy.last_args[:len(params)] == params +@pytest.mark.parametrize("command,func", [ + ('postcodes', 'update_postcodes'), + ('word-counts', 'recompute_word_counts'), + ]) +def test_refresh_command(monkeypatch, command, func): + func_mock = MockParamCapture() + monkeypatch.setattr(nominatim.tools.refresh, func, func_mock) + + assert 0 == call_nominatim('refresh', '--' + command) + + assert func_mock.called == 1 def test_refresh_importance_computed_after_wiki_import(mock_run_legacy): assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') diff --git a/test/python/test_db_utils.py b/test/python/test_db_utils.py new file mode 100644 index 00000000..3210721e --- /dev/null +++ b/test/python/test_db_utils.py @@ -0,0 +1,33 @@ +""" +Tests for DB utility functions in db.utils +""" +import psycopg2 +import pytest + +import nominatim.db.utils as db_utils + +def test_execute_file_success(temp_db, tmp_path): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') + + with psycopg2.connect('dbname=' + temp_db) as conn: + db_utils.execute_file(conn, tmpfile) + + with conn.cursor() as cur: + cur.execute('SELECT * FROM test') + + assert cur.rowcount == 1 + assert cur.fetchone()[0] == 56 + +def test_execute_file_bad_file(temp_db, tmp_path): + with psycopg2.connect('dbname=' + temp_db) as conn: + with pytest.raises(FileNotFoundError): + db_utils.execute_file(conn, tmp_path / 'test2.sql') + +def test_execute_file_bad_sql(temp_db, tmp_path): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('CREATE STABLE test (id INT)') + + with psycopg2.connect('dbname=' + temp_db) as conn: + with pytest.raises(psycopg2.ProgrammingError): + db_utils.execute_file(conn, tmpfile)