From b169e4c88cd99075c7e932aeb6587c97f51e7ed1 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 18 Feb 2021 17:32:30 +0100 Subject: [PATCH] port check-database function to python This change also adapts the hints to use the nominatim tool. Slightly changed checks, so that they are just as effective on a frozen database. --- lib-php/DB.php | 70 ------ lib-php/admin/check_import_finished.php | 196 +---------------- nominatim/clicmd/admin.py | 12 +- nominatim/db/connection.py | 29 ++- nominatim/tools/check_database.py | 269 ++++++++++++++++++++++++ test/php/Nominatim/DBTest.php | 8 - test/python/test_cli.py | 16 +- 7 files changed, 322 insertions(+), 278 deletions(-) create mode 100644 nominatim/tools/check_database.py diff --git a/lib-php/DB.php b/lib-php/DB.php index c25c5ca9..abd23179 100644 --- a/lib-php/DB.php +++ b/lib-php/DB.php @@ -252,76 +252,6 @@ class DB return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0; } - /** - * Check if an index exists in the database. Optional filtered by tablename - * - * @param string $sTableName - * - * @return boolean - */ - public function indexExists($sIndexName, $sTableName = null) - { - return in_array($sIndexName, $this->getListOfIndices($sTableName)); - } - - /** - * Returns a list of index names in the database, optional filtered by tablename - * - * @param string $sTableName - * - * @return array - */ - public function getListOfIndices($sTableName = null) - { - // table_name | index_name | column_name - // -----------------------+---------------------------------+-------------- - // country_name | idx_country_name_country_code | country_code - // country_osm_grid | idx_country_osm_grid_geometry | geometry - // import_polygon_delete | idx_import_polygon_delete_osmid | osm_id - // import_polygon_delete | idx_import_polygon_delete_osmid | osm_type - // import_polygon_error | idx_import_polygon_error_osmid | osm_id - // import_polygon_error | idx_import_polygon_error_osmid | osm_type - $sSql = <<< END -SELECT - t.relname as table_name, - i.relname as index_name, - a.attname as column_name -FROM - pg_class t, - pg_class i, - pg_index ix, - pg_attribute a -WHERE - t.oid = ix.indrelid - and i.oid = ix.indexrelid - and a.attrelid = t.oid - and a.attnum = ANY(ix.indkey) - and t.relkind = 'r' - and i.relname NOT LIKE 'pg_%' - FILTERS - ORDER BY - t.relname, - i.relname, - a.attname -END; - - $aRows = null; - if ($sTableName) { - $sSql = str_replace('FILTERS', 'and t.relname = :tablename', $sSql); - $aRows = $this->getAll($sSql, array(':tablename' => $sTableName)); - } else { - $sSql = str_replace('FILTERS', '', $sSql); - $aRows = $this->getAll($sSql); - } - - $aIndexNames = array_unique(array_map(function ($aRow) { - return $aRow['index_name']; - }, $aRows)); - sort($aIndexNames); - - return $aIndexNames; - } - /** * Tries to connect to the database but on failure doesn't throw an exception. * diff --git a/lib-php/admin/check_import_finished.php b/lib-php/admin/check_import_finished.php index f189fc9a..a1811af3 100644 --- a/lib-php/admin/check_import_finished.php +++ b/lib-php/admin/check_import_finished.php @@ -5,197 +5,7 @@ require_once(CONST_LibDir.'/init-cmd.php'); loadSettings(getcwd()); -$term_colors = array( - 'green' => "\033[92m", - 'red' => "\x1B[31m", - 'normal' => "\033[0m" -); +(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) + ->addParams('admin', '--check-database') + ->run(); -$print_success = function ($message = 'OK') use ($term_colors) { - echo $term_colors['green'].$message.$term_colors['normal']."\n"; -}; - -$print_fail = function ($message = 'Failed') use ($term_colors) { - echo $term_colors['red'].$message.$term_colors['normal']."\n"; -}; - -$oDB = new Nominatim\DB; - - -function isReverseOnlyInstallation() -{ - global $oDB; - return !$oDB->tableExists('search_name'); -} - -// Check (guess) if the setup.php included --drop -function isNoUpdateInstallation() -{ - global $oDB; - return $oDB->tableExists('placex') && !$oDB->tableExists('planet_osm_rels') ; -} - - -echo 'Checking database got created ... '; -if ($oDB->checkConnection()) { - $print_success(); -} else { - $print_fail(); - echo <<< END - Hints: - * Is the database server started? - * Check the NOMINATIM_DATABASE_DSN variable in your local .env - * Try connecting to the database with the same settings - -END; - exit(1); -} - - -echo 'Checking nominatim.so module installed ... '; -$sStandardWord = $oDB->getOne("SELECT make_standard_name('a')"); -if ($sStandardWord === 'a') { - $print_success(); -} else { - $print_fail(); - echo <<< END - The Postgresql extension nominatim.so was not found in the database. - Hints: - * Check the output of the CMmake/make installation step - * Does nominatim.so exist? - * Does nominatim.so exist on the database server? - * Can nominatim.so be accessed by the database user? - -END; - exit(1); -} - -if (!isNoUpdateInstallation()) { - echo 'Checking place table ... '; - if ($oDB->tableExists('place')) { - $print_success(); - } else { - $print_fail(); - echo <<< END - * The import didn't finish. - Hints: - * Check the output of the utils/setup.php you ran. - Usually the osm2pgsql step failed. Check for errors related to - * the file you imported not containing any places - * harddrive full - * out of memory (RAM) - * osm2pgsql killed by other scripts, for consuming to much memory - - END; - exit(1); - } -} - - -echo 'Checking indexing status ... '; -$iUnindexed = $oDB->getOne('SELECT count(*) FROM placex WHERE indexed_status > 0'); -if ($iUnindexed == 0) { - $print_success(); -} else { - $print_fail(); - echo <<< END - The indexing didn't finish. There is still $iUnindexed places. See the - question 'Can a stopped/killed import process be resumed?' in the - troubleshooting guide. - -END; - exit(1); -} - -echo "Search index creation\n"; -$aExpectedIndices = array( - // sql/indices.src.sql - 'idx_word_word_id', - 'idx_place_addressline_address_place_id', - 'idx_placex_rank_search', - 'idx_placex_rank_address', - 'idx_placex_parent_place_id', - 'idx_placex_geometry_reverse_lookuppolygon', - 'idx_placex_geometry_reverse_placenode', - 'idx_osmline_parent_place_id', - 'idx_osmline_parent_osm_id', - 'idx_postcode_id', - 'idx_postcode_postcode' -); -if (!isReverseOnlyInstallation()) { - $aExpectedIndices = array_merge($aExpectedIndices, array( - // sql/indices_search.src.sql - 'idx_search_name_nameaddress_vector', - 'idx_search_name_name_vector', - 'idx_search_name_centroid' - )); -} -if (!isNoUpdateInstallation()) { - $aExpectedIndices = array_merge($aExpectedIndices, array( - 'idx_placex_pendingsector', - 'idx_location_area_country_place_id', - 'idx_place_osm_unique', - )); -} - -foreach ($aExpectedIndices as $sExpectedIndex) { - echo "Checking index $sExpectedIndex ... "; - if ($oDB->indexExists($sExpectedIndex)) { - $print_success(); - } else { - $print_fail(); - echo <<< END - Hints: - * Run './utils/setup.php --create-search-indices --ignore-errors' to - create missing indices. - -END; - exit(1); - } -} - -echo 'Checking search indices are valid ... '; -$sSQL = <<< END - SELECT relname - FROM pg_class, pg_index - WHERE pg_index.indisvalid = false - AND pg_index.indexrelid = pg_class.oid; -END; -$aInvalid = $oDB->getCol($sSQL); -if (empty($aInvalid)) { - $print_success(); -} else { - $print_fail(); - echo <<< END - At least one index is invalid. That can happen, e.g. when index creation was - disrupted and later restarted. You should delete the affected indices and - run the index stage of setup again. - See the question 'Can a stopped/killed import process be resumed?' in the - troubleshooting guide. - Affected indices: -END; - echo join(', ', $aInvalid) . "\n"; - exit(1); -} - - - -if (getSettingBool('USE_US_TIGER_DATA')) { - echo 'Checking TIGER table exists ... '; - if ($oDB->tableExists('location_property_tiger')) { - $print_success(); - } else { - $print_fail(); - echo <<< END - Table 'location_property_tiger' does not exist. Run the TIGER data - import again. - -END; - exit(1); - } -} - - - - -exit(0); diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 8d34f386..e5863575 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -1,6 +1,8 @@ """ Implementation of the 'admin' subcommand. """ +import logging + from ..tools.exec_utils import run_legacy_script from ..db.connection import connect @@ -9,6 +11,8 @@ from ..db.connection import connect # Using non-top-level imports to avoid eventually unused imports. # pylint: disable=E0012,C0415 +LOG = logging.getLogger() + class AdminFuncs: """\ Analyse and maintain the database. @@ -39,14 +43,17 @@ class AdminFuncs: @staticmethod def run(args): - from ..tools import admin if args.warm: AdminFuncs._warm(args) if args.check_database: - run_legacy_script('check_import_finished.php', nominatim_env=args) + LOG.warning('Checking database') + from ..tools import check_database + return check_database.check_database(args.config) if args.analyse_indexing: + LOG.warning('Analysing performance of indexing function') + from ..tools import admin conn = connect(args.config.get_libpq_dsn()) admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id) conn.close() @@ -56,6 +63,7 @@ class AdminFuncs: @staticmethod def _warm(args): + LOG.warning('Warming database caches') params = ['warm.php'] if args.target == 'reverse': params.append('--reverse-only') diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index c7e22c98..b941f46f 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -7,6 +7,8 @@ import psycopg2 import psycopg2.extensions import psycopg2.extras +from ..errors import UsageError + class _Cursor(psycopg2.extras.DictCursor): """ A cursor returning dict-like objects and providing specialised execution functions. @@ -42,14 +44,34 @@ class _Connection(psycopg2.extensions.connection): """ return super().cursor(cursor_factory=cursor_factory, **kwargs) + def table_exists(self, table): """ Check that a table with the given name exists in the database. """ with self.cursor() as cur: num = cur.scalar("""SELECT count(*) FROM pg_tables - WHERE tablename = %s""", (table, )) + WHERE tablename = %s and schemaname = 'public'""", (table, )) return num == 1 + + def index_exists(self, index, table=None): + """ Check that an index with the given name exists in the database. + If table is not None then the index must relate to the given + table. + """ + with self.cursor() as cur: + cur.execute("""SELECT tablename FROM pg_indexes + WHERE indexname = %s and schemaname = 'public'""", (index, )) + if cur.rowcount == 0: + return False + + if table is not None: + row = cur.fetchone() + return row[0] == table + + return True + + def server_version_tuple(self): """ Return the server version as a tuple of (major, minor). Converts correctly for pre-10 and post-10 PostgreSQL versions. @@ -64,4 +86,7 @@ def connect(dsn): """ Open a connection to the database using the specialised connection factory. """ - return psycopg2.connect(dsn, connection_factory=_Connection) + try: + return psycopg2.connect(dsn, connection_factory=_Connection) + except psycopg2.OperationalError as err: + raise UsageError("Cannot connect to database: {}".format(err)) from err diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py new file mode 100644 index 00000000..e99a3572 --- /dev/null +++ b/nominatim/tools/check_database.py @@ -0,0 +1,269 @@ +""" +Collection of functions that check if the database is complete and functional. +""" +from enum import Enum +from textwrap import dedent + +import psycopg2 + +from ..db.connection import connect +from ..errors import UsageError + +CHECKLIST = [] + +class CheckState(Enum): + """ Possible states of a check. FATAL stops check execution entirely. + """ + OK = 0 + FAIL = 1 + FATAL = 2 + NOT_APPLICABLE = 3 + +def _check(hint=None): + """ Decorator for checks. It adds the function to the list of + checks to execute and adds the code for printing progress messages. + """ + def decorator(func): + title = func.__doc__.split('\n', 1)[0].strip() + def run_check(conn, config): + print(title, end=' ... ') + ret = func(conn, config) + if isinstance(ret, tuple): + ret, params = ret + else: + params = {} + if ret == CheckState.OK: + print('\033[92mOK\033[0m') + elif ret == CheckState.NOT_APPLICABLE: + print('not applicable') + else: + print('\x1B[31mFailed\033[0m') + if hint: + print(dedent(hint.format(**params))) + return ret + + CHECKLIST.append(run_check) + return run_check + + return decorator + +class _BadConnection: # pylint: disable=R0903 + + def __init__(self, msg): + self.msg = msg + + def close(self): + """ Dummy function to provide the implementation. + """ + +def check_database(config): + """ Run a number of checks on the database and return the status. + """ + try: + conn = connect(config.get_libpq_dsn()) + except UsageError as err: + conn = _BadConnection(str(err)) + + overall_result = 0 + for check in CHECKLIST: + ret = check(conn, config) + if ret == CheckState.FATAL: + conn.close() + return 1 + if ret != CheckState.OK: + overall_result = 1 + + conn.close() + return overall_result + + +def _get_indexes(conn): + indexes = ['idx_word_word_id', + 'idx_place_addressline_address_place_id', + 'idx_placex_rank_search', + 'idx_placex_rank_address', + 'idx_placex_parent_place_id', + 'idx_placex_geometry_reverse_lookuppolygon', + 'idx_placex_geometry_reverse_placenode', + 'idx_osmline_parent_place_id', + 'idx_osmline_parent_osm_id', + 'idx_postcode_id', + 'idx_postcode_postcode' + ] + if conn.table_exists('search_name'): + indexes.extend(('idx_search_name_nameaddress_vector', + 'idx_search_name_name_vector', + 'idx_search_name_centroid')) + if conn.table_exists('place'): + indexes.extend(('idx_placex_pendingsector', + 'idx_location_area_country_place_id', + 'idx_place_osm_unique' + )) + + return indexes + + +### CHECK FUNCTIONS +# +# Functions are exectured in the order they appear here. + +@_check(hint="""\ + {error} + + Hints: + * Is the database server started? + * Check the NOMINATIM_DATABASE_DSN variable in your local .env + * Try connecting to the database with the same settings + + Project directory: {config.project_dir} + Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} + """) +def check_connection(conn, config): + """ Checking database connection + """ + if isinstance(conn, _BadConnection): + return CheckState.FATAL, dict(error=conn.msg, config=config) + + return CheckState.OK + +@_check(hint="""\ + placex table not found + + Hints: + * Are you connecting to the right database? + * Did the import process finish without errors? + + Project directory: {config.project_dir} + Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} + """) +def check_placex_table(conn, config): + """ Checking for placex table + """ + if conn.table_exists('placex'): + return CheckState.OK + + return CheckState.FATAL, dict(config=config) + + +@_check(hint="""placex table has no data. Did the import finish sucessfully?""") +def check_placex_size(conn, config): # pylint: disable=W0613 + """ Checking for placex content + """ + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x') + + return CheckState.OK if cnt > 0 else CheckState.FATAL + +@_check(hint="""\ + The Postgresql extension nominatim.so was not correctly loaded. + + Error: {error} + + Hints: + * Check the output of the CMmake/make installation step + * Does nominatim.so exist? + * Does nominatim.so exist on the database server? + * Can nominatim.so be accessed by the database user? + """) +def check_module(conn, config): # pylint: disable=W0613 + """ Checking that nominatim.so module is installed + """ + with conn.cursor() as cur: + try: + out = cur.scalar("SELECT make_standard_name('a')") + except psycopg2.ProgrammingError as err: + return CheckState.FAIL, dict(error=str(err)) + + if out != 'a': + return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()') + + return CheckState.OK + + +@_check(hint="""\ + The indexing didn't finish. {count} entries are not yet indexed. + + To index the remaining entries, run: {index_cmd} + """) +def check_indexing(conn, config): # pylint: disable=W0613 + """ Checking indexing status + """ + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0') + + if cnt == 0: + return CheckState.OK + + if conn.index_exists('idx_word_word_id'): + # Likely just an interrupted update. + index_cmd = 'nominatim index' + else: + # Looks like the import process got interupted. + index_cmd = 'nominatim import --continue indexing' + + return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd) + + + +@_check(hint="""\ + The following indexes are missing: + {indexes} + + Rerun the index creation with: nominatim import --continue db-postprocess + """) +def check_database_indexes(conn, config): # pylint: disable=W0613 + """ Checking that database indexes are complete + """ + missing = [] + for index in _get_indexes(conn): + if not conn.index_exists(index): + missing.append(index) + + if missing: + return CheckState.FAIL, dict(indexes='\n '.join(missing)) + + return CheckState.OK + + +@_check(hint="""\ + At least one index is invalid. That can happen, e.g. when index creation was + disrupted and later restarted. You should delete the affected indices + and recreate them. + + Invalid indexes: + {indexes} + """) +def check_database_index_valid(conn, config): # pylint: disable=W0613 + """ Checking that all database indexes are valid + """ + with conn.cursor() as cur: + cur.execute(""" SELECT relname FROM pg_class, pg_index + WHERE pg_index.indisvalid = false + AND pg_index.indexrelid = pg_class.oid""") + + broken = list(cur) + + if broken: + return CheckState.FAIL, dict(indexes='\n '.join(broken)) + + return CheckState.OK + + +@_check(hint="""\ + {error} + Run TIGER import again: nominatim add-data --tiger-data + """) +def check_tiger_table(conn, config): + """ Checking TIGER external data table. + """ + if not config.get_bool('USE_US_TIGER_DATA'): + return CheckState.NOT_APPLICABLE + + if not conn.table_exists('location_property_tiger'): + return CheckState.FAIL, dict(error='TIGER data table not found.') + + with conn.cursor() as cur: + if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0: + return CheckState.FAIL, dict(error='TIGER data table is empty.') + + return CheckState.OK diff --git a/test/php/Nominatim/DBTest.php b/test/php/Nominatim/DBTest.php index c5f4f395..1a2ecc86 100644 --- a/test/php/Nominatim/DBTest.php +++ b/test/php/Nominatim/DBTest.php @@ -164,14 +164,6 @@ class DBTest extends \PHPUnit\Framework\TestCase $this->assertTrue($oDB->tableExists('table1')); $this->assertFalse($oDB->tableExists('table99')); $this->assertFalse($oDB->tableExists(null)); - - $this->assertEmpty($oDB->getListOfIndices()); - $oDB->exec('CREATE UNIQUE INDEX table1_index ON table1 (id)'); - $this->assertEquals( - array('table1_index'), - $oDB->getListOfIndices() - ); - $this->assertEmpty($oDB->getListOfIndices('table2')); } # select queries diff --git a/test/python/test_cli.py b/test/python/test_cli.py index e1df9478..10e57f8b 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -15,9 +15,11 @@ import nominatim.clicmd.api import nominatim.clicmd.refresh import nominatim.clicmd.admin import nominatim.indexer.indexer +import nominatim.tools.admin +import nominatim.tools.check_database +import nominatim.tools.freeze import nominatim.tools.refresh import nominatim.tools.replication -import nominatim.tools.freeze from nominatim.errors import UsageError from nominatim.db import status @@ -95,8 +97,7 @@ def test_freeze_command(mock_func_factory, temp_db): @pytest.mark.parametrize("params", [('--warm', ), ('--warm', '--reverse-only'), - ('--warm', '--search-only'), - ('--check-database', )]) + ('--warm', '--search-only')]) def test_admin_command_legacy(mock_func_factory, params): mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script') @@ -104,6 +105,7 @@ def test_admin_command_legacy(mock_func_factory, params): assert mock_run_legacy.called == 1 + @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))]) def test_admin_command_tool(temp_db, mock_func_factory, func, params): mock = mock_func_factory(nominatim.tools.admin, func) @@ -111,6 +113,14 @@ def test_admin_command_tool(temp_db, mock_func_factory, func, params): assert 0 == call_nominatim('admin', *params) assert mock.called == 1 + +def test_admin_command_check_database(mock_func_factory): + mock = mock_func_factory(nominatim.tools.check_database, 'check_database') + + assert 0 == call_nominatim('admin', '--check-database') + assert mock.called == 1 + + @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'), ('node', 12), ('way', 8), ('relation', 32)]) def test_add_data_command(mock_run_legacy, name, oid): -- 2.39.5