From: Sarah Hoffmann Date: Sat, 17 Apr 2021 09:51:21 +0000 (+0200) Subject: Merge pull request #2279 from lonvia/add-index-for-continued-indexing X-Git-Tag: v4.0.0~112 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/7aeae9da81d70d6a6d2fe8a8a66bba35888a227c?hp=9fabc5572db3cb5cb5f497c0a36e301d9d316dcc Merge pull request #2279 from lonvia/add-index-for-continued-indexing Add index for continued indexing --- diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index fe7c8dc1..fb7abdec 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -105,11 +105,11 @@ class SetupAll: LOG.error('Wikipedia importance dump file not found. ' 'Will be using default importances.') + if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Initialise tables') with connect(args.config.get_libpq_dsn()) as conn: database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) - if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Load data into placex table') database_import.load_data(args.config.get_libpq_dsn(), args.data_dir, @@ -119,6 +119,9 @@ class SetupAll: postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir) if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): + if args.continue_at is not None and args.continue_at != 'load-data': + with connect(args.config.get_libpq_dsn()) as conn: + SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX) LOG.warning('Indexing places') indexer = Indexer(args.config.get_libpq_dsn(), args.threads or psutil.cpu_count() or 1) @@ -148,3 +151,25 @@ class SetupAll: '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) return 0 + + + @staticmethod + def _create_pending_index(conn, tablespace): + """ Add a supporting index for finding places still to be indexed. + + This index is normally created at the end of the import process + for later updates. When indexing was partially done, then this + index can greatly improve speed going through already indexed data. + """ + if conn.index_exists('idx_placex_pendingsector'): + return + + with conn.cursor() as cur: + LOG.warning('Creating support index') + if tablespace: + tablespace = 'TABLESPACE ' + tablespace + cur.execute("""CREATE INDEX idx_placex_pendingsector + ON placex USING BTREE (rank_address,geometry_sector) + {} WHERE indexed_status > 0 + """.format(tablespace)) + conn.commit() diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 38bbaefe..afa01e57 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -49,6 +49,7 @@ def mock_run_legacy(monkeypatch): def mock_func_factory(monkeypatch): def get_mock(module, func): mock = MockParamCapture() + mock.func_name = func monkeypatch.setattr(module, func, mock) return mock @@ -110,7 +111,61 @@ def test_import_full(temp_db, mock_func_factory): assert cf_mock.called > 1 for mock in mocks: - assert mock.called == 1 + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_load_data(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), + mock_func_factory(nominatim.tools.database_import, 'load_data'), + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'load-data') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'indexing') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + # Calling it again still works for the index + assert 0 == call_nominatim('import', '--continue', 'indexing') + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + +def test_import_continue_postprocess(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'db-postprocess') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + def test_freeze_command(mock_func_factory, temp_db): mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')