From: Mykola Golub Date: Wed, 10 Aug 2016 10:46:46 +0000 (+0300) Subject: rbd-mirror: option to automatically resync after journal client disconnect X-Git-Tag: v10.2.4~61^2~37 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=f7ffbfa71d6f8c417b140ce434b7714cd0053b09;p=ceph.git rbd-mirror: option to automatically resync after journal client disconnect Signed-off-by: Mykola Golub (cherry picked from commit 77fd6a1c2016262d734b0bb5387e6b6a41232e8b) Conflicts: src/common/config_opts.h: trivial resolution --- diff --git a/qa/workunits/rbd/rbd_mirror.sh b/qa/workunits/rbd/rbd_mirror.sh index d553e7b9fc3d..e6860397b74c 100755 --- a/qa/workunits/rbd/rbd_mirror.sh +++ b/qa/workunits/rbd/rbd_mirror.sh @@ -279,4 +279,20 @@ wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})" compare_images ${POOL} ${image} +testlog " - rbd_mirroring_resync_after_disconnect config option" +set_image_meta ${CLUSTER1} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect true +disconnect_image ${CLUSTER2} ${POOL} ${image} +wait_for_image_present ${CLUSTER1} ${POOL} ${image} 'deleted' +wait_for_image_replay_started ${CLUSTER1} ${POOL} ${image} +wait_for_replay_complete ${CLUSTER1} ${CLUSTER2} ${POOL} ${image} +test -n "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})" +compare_images ${POOL} ${image} +set_image_meta ${CLUSTER1} ${POOL} ${image} \ + conf_rbd_mirroring_resync_after_disconnect false +disconnect_image ${CLUSTER2} ${POOL} ${image} +test -z "$(get_mirror_position ${CLUSTER2} ${POOL} ${image})" +wait_for_image_replay_stopped ${CLUSTER1} ${POOL} ${image} +test_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'disconnected' + echo OK diff --git a/src/common/config_opts.h b/src/common/config_opts.h index ba1c48c40648..3795f0ec3e6c 100644 --- a/src/common/config_opts.h +++ b/src/common/config_opts.h @@ -1172,6 +1172,7 @@ OPTION(rbd_enable_alloc_hint, OPT_BOOL, true) // when writing a object, it will OPTION(rbd_tracing, OPT_BOOL, false) // true if LTTng-UST tracepoints should be enabled OPTION(rbd_validate_pool, OPT_BOOL, true) // true if empty pools should be validated for RBD compatibility OPTION(rbd_validate_names, OPT_BOOL, true) // true if image specs should be validated +OPTION(rbd_mirroring_resync_after_disconnect, OPT_BOOL, false) // automatically start image resync after mirroring is disconnected due to being laggy /* * The following options change the behavior for librbd's image creation methods that diff --git a/src/librbd/ImageCtx.cc b/src/librbd/ImageCtx.cc index c1d21f070cb4..4226d4274713 100644 --- a/src/librbd/ImageCtx.cc +++ b/src/librbd/ImageCtx.cc @@ -930,7 +930,8 @@ struct C_InvalidateCache : public Context { "rbd_journal_object_flush_age", false)( "rbd_journal_pool", false)( "rbd_journal_max_payload_bytes", false)( - "rbd_journal_max_concurrent_object_sets", false); + "rbd_journal_max_concurrent_object_sets", false)( + "rbd_mirroring_resync_after_disconnect", false); md_config_t local_config_t; std::map res; @@ -987,6 +988,7 @@ struct C_InvalidateCache : public Context { ASSIGN_OPTION(journal_pool); ASSIGN_OPTION(journal_max_payload_bytes); ASSIGN_OPTION(journal_max_concurrent_object_sets); + ASSIGN_OPTION(mirroring_resync_after_disconnect); } ExclusiveLock *ImageCtx::create_exclusive_lock() { diff --git a/src/librbd/ImageCtx.h b/src/librbd/ImageCtx.h index 552d8a8eb5d2..b8a3bf6cf24a 100644 --- a/src/librbd/ImageCtx.h +++ b/src/librbd/ImageCtx.h @@ -184,6 +184,7 @@ namespace librbd { std::string journal_pool; uint32_t journal_max_payload_bytes; int journal_max_concurrent_object_sets; + bool mirroring_resync_after_disconnect; LibrbdAdminSocketHook *asok_hook; diff --git a/src/test/librbd/mock/MockImageCtx.h b/src/test/librbd/mock/MockImageCtx.h index 3dd3962d169f..f7283a72bc6d 100644 --- a/src/test/librbd/mock/MockImageCtx.h +++ b/src/test/librbd/mock/MockImageCtx.h @@ -93,7 +93,9 @@ struct MockImageCtx { journal_pool(image_ctx.journal_pool), journal_max_payload_bytes(image_ctx.journal_max_payload_bytes), journal_max_concurrent_object_sets( - image_ctx.journal_max_concurrent_object_sets) + image_ctx.journal_max_concurrent_object_sets), + mirroring_resync_after_disconnect( + image_ctx.mirroring_resync_after_disconnect) { md_ctx.dup(image_ctx.md_ctx); data_ctx.dup(image_ctx.data_ctx); @@ -260,6 +262,7 @@ struct MockImageCtx { std::string journal_pool; uint32_t journal_max_payload_bytes; int journal_max_concurrent_object_sets; + bool mirroring_resync_after_disconnect; }; } // namespace librbd diff --git a/src/test/rbd_mirror/test_ImageReplayer.cc b/src/test/rbd_mirror/test_ImageReplayer.cc index bb95dff819d9..d484188a6006 100644 --- a/src/test/rbd_mirror/test_ImageReplayer.cc +++ b/src/test/rbd_mirror/test_ImageReplayer.cc @@ -829,6 +829,9 @@ TEST_F(TestImageReplayer, Disconnect) { bootstrap(); + // Make sure rbd_mirroring_resync_after_disconnect is not set + EXPECT_EQ(0, m_local_cluster->conf_set("rbd_mirroring_resync_after_disconnect", "false")); + // Test start fails if disconnected librbd::ImageCtx *ictx; @@ -889,4 +892,24 @@ TEST_F(TestImageReplayer, Disconnect) C_SaferCond cond4; m_replayer->start(&cond4); ASSERT_EQ(-ENOTCONN, cond4.wait()); + + // Test automatic resync if rbd_mirroring_resync_after_disconnect is set + + EXPECT_EQ(0, m_local_cluster->conf_set("rbd_mirroring_resync_after_disconnect", "true")); + + // Resync is flagged on first start attempt + C_SaferCond cond5; + m_replayer->start(&cond5); + ASSERT_EQ(-ENOTCONN, cond5.wait()); + C_SaferCond delete_cond1; + m_image_deleter->wait_for_scheduled_deletion( + m_local_ioctx.get_id(), m_replayer->get_global_image_id(), &delete_cond1); + EXPECT_EQ(0, delete_cond1.wait()); + + C_SaferCond cond6; + m_replayer->start(&cond6); + ASSERT_EQ(0, cond6.wait()); + wait_for_replay_complete(); + + stop(); } diff --git a/src/tools/rbd_mirror/ImageReplayer.cc b/src/tools/rbd_mirror/ImageReplayer.cc index 58df5d081b11..263eebc91b6b 100644 --- a/src/tools/rbd_mirror/ImageReplayer.cc +++ b/src/tools/rbd_mirror/ImageReplayer.cc @@ -531,6 +531,10 @@ void ImageReplayer::handle_init_remote_journaler(int r) { if (client.state != cls::journal::CLIENT_STATE_CONNECTED) { dout(5) << "client flagged disconnected, stopping image replay" << dendl; + if (m_local_image_ctx->mirroring_resync_after_disconnect) { + Mutex::Locker locker(m_lock); + m_stopping_for_resync = true; + } on_start_fail(-ENOTCONN, "disconnected"); return; }