]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
rbd-mirror: make 'rbd mirror image resync' work after split-brain 12212/head
authorMykola Golub <mgolub@mirantis.com>
Tue, 29 Nov 2016 09:44:05 +0000 (11:44 +0200)
committerMykola Golub <mgolub@mirantis.com>
Sat, 3 Dec 2016 17:50:27 +0000 (19:50 +0200)
Fixes: http://tracker.ceph.com/issues/18051
Signed-off-by: Mykola Golub <mgolub@mirantis.com>
qa/workunits/rbd/rbd_mirror.sh
src/test/rbd_mirror/image_replayer/test_mock_BootstrapRequest.cc
src/test/rbd_mirror/test_mock_ImageReplayer.cc
src/tools/rbd_mirror/ImageReplayer.cc
src/tools/rbd_mirror/ImageReplayer.h
src/tools/rbd_mirror/image_replayer/BootstrapRequest.cc
src/tools/rbd_mirror/image_replayer/BootstrapRequest.h

index 848687a84c96ce7ee6da2547942f6b912ff8e3e2..2a26a5e6a1ee3c7ad1deadc755a8b9be4cff28b7 100755 (executable)
@@ -368,5 +368,7 @@ write_image ${CLUSTER1} ${POOL} ${image} 10
 demote_image ${CLUSTER1} ${POOL} ${image}
 promote_image ${CLUSTER2} ${POOL} ${image}
 wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+error' 'split-brain'
+request_resync_image ${CLUSTER1} ${POOL} ${image} image_id
+wait_for_status_in_pool_dir ${CLUSTER1} ${POOL} ${image} 'up+replaying' 'master_position'
 
 echo OK
index c681cbfce0a57004f50fdf5e8abfa19a37907e0f..471166b70fb22c822476c7ebc5308a39c54e1f2a 100644 (file)
@@ -402,14 +402,14 @@ public:
                                     remote_mirror_uuid,
                                     &mock_journaler,
                                     &m_mirror_peer_client_meta,
-                                    on_finish);
+                                    on_finish, &m_do_resync);
   }
 
   librbd::ImageCtx *m_remote_image_ctx;
   librbd::ImageCtx *m_local_image_ctx = nullptr;
   librbd::MockTestImageCtx *m_local_test_image_ctx = nullptr;
   librbd::journal::MirrorPeerClientMeta m_mirror_peer_client_meta;
-
+  bool m_do_resync;
 };
 
 TEST_F(TestMockImageReplayerBootstrapRequest, NonPrimaryRemoteSyncingState) {
index 2477e035bcdbe639bf1187452b2b7bc42aa9e909..a276899210dd662b9c58f95b16a9b2e063fabfcc 100644 (file)
@@ -75,6 +75,7 @@ struct BootstrapRequest<librbd::MockTestImageCtx> {
         ::journal::MockJournalerProxy *journaler,
         librbd::journal::MirrorPeerClientMeta *client_meta,
         Context *on_finish,
+        bool *do_resync,
         rbd::mirror::ProgressContext *progress_ctx = nullptr) {
     assert(s_instance != nullptr);
     s_instance->on_finish = on_finish;
index f6bad875fdfdba87c322be5881a2f4ee6485827b..ff2fa32623bad34153e15dd72ec8b10e459e2135 100644 (file)
@@ -398,7 +398,7 @@ void ImageReplayer<I>::bootstrap() {
     &m_local_image_ctx, m_local_image_name, m_remote_image_id,
     m_global_image_id, m_threads->work_queue, m_threads->timer,
     &m_threads->timer_lock, m_local_mirror_uuid, m_remote_mirror_uuid,
-    m_remote_journaler, &m_client_meta, ctx, &m_progress_cxt);
+    m_remote_journaler, &m_client_meta, ctx, &m_do_resync, &m_progress_cxt);
 
   {
     Mutex::Locker locker(m_lock);
@@ -439,7 +439,6 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
     return;
   }
 
-
   assert(m_local_journal == nullptr);
   {
     RWLock::RLocker snap_locker(m_local_image_ctx->snap_lock);
@@ -456,13 +455,8 @@ void ImageReplayer<I>::handle_bootstrap(int r) {
 
   {
     Mutex::Locker locker(m_lock);
-    bool do_resync = false;
-    r = m_local_image_ctx->journal->is_resync_requested(&do_resync);
-    if (r < 0) {
-      derr << "failed to check if a resync was requested" << dendl;
-    }
 
-    if (do_resync) {
+    if (m_do_resync) {
       Context *on_finish = m_on_start_finish;
       m_stopping_for_resync = true;
       FunctionContext *ctx = new FunctionContext([this, on_finish](int r) {
index 222ef024a5e4ef8ecbbcf3a9b0a1fdea68f06683..ee080f1c900e86cf5159e49b0e18cb0fabf415fd 100644 (file)
@@ -253,6 +253,7 @@ private:
   int m_last_r = 0;
   std::string m_state_desc;
   BootstrapProgressContext m_progress_cxt;
+  bool m_do_resync;
   image_replayer::EventPreprocessor<ImageCtxT> *m_event_preprocessor = nullptr;
   image_replayer::ReplayStatusFormatter<ImageCtxT> *m_replay_status_formatter =
     nullptr;
index b7ead13b04bb75f84816e288e70ad2f29460a6a8..20ddf90da613b1686c66e10340f50e04be0a12a8 100644 (file)
@@ -52,6 +52,7 @@ BootstrapRequest<I>::BootstrapRequest(
         Journaler *journaler,
         MirrorPeerClientMeta *client_meta,
         Context *on_finish,
+        bool *do_resync,
         rbd::mirror::ProgressContext *progress_ctx)
   : BaseRequest("rbd::mirror::image_replayer::BootstrapRequest",
                reinterpret_cast<CephContext*>(local_io_ctx.cct()), on_finish),
@@ -63,6 +64,7 @@ BootstrapRequest<I>::BootstrapRequest(
     m_local_mirror_uuid(local_mirror_uuid),
     m_remote_mirror_uuid(remote_mirror_uuid), m_journaler(journaler),
     m_client_meta(client_meta), m_progress_ctx(progress_ctx),
+    m_do_resync(do_resync),
     m_lock(unique_lock_name("BootstrapRequest::m_lock", this)) {
 }
 
@@ -73,6 +75,8 @@ BootstrapRequest<I>::~BootstrapRequest() {
 
 template <typename I>
 void BootstrapRequest<I>::send() {
+  *m_do_resync = false;
+
   get_local_image_id();
 }
 
@@ -372,7 +376,33 @@ void BootstrapRequest<I>::handle_open_local_image(int r) {
     m_ret_val = r;
     close_remote_image();
     return;
-  } if (m_client.state == cls::journal::CLIENT_STATE_DISCONNECTED) {
+  }
+
+  I *local_image_ctx = (*m_local_image_ctx);
+  {
+    RWLock::RLocker snap_locker(local_image_ctx->snap_lock);
+    if (local_image_ctx->journal == nullptr) {
+      derr << ": local image does not support journaling" << dendl;
+      m_ret_val = -EINVAL;
+      close_local_image();
+      return;
+    }
+
+    r = (*m_local_image_ctx)->journal->is_resync_requested(m_do_resync);
+    if (r < 0) {
+      derr << ": failed to check if a resync was requested" << dendl;
+      m_ret_val = r;
+      close_local_image();
+      return;
+    }
+  }
+
+  if (*m_do_resync) {
+    close_remote_image();
+    return;
+  }
+
+  if (m_client.state == cls::journal::CLIENT_STATE_DISCONNECTED) {
     dout(10) << ": client flagged disconnected -- skipping bootstrap" << dendl;
     // The caller is expected to detect disconnect initializing remote journal.
     m_ret_val = 0;
index 8926adf81677b8935417673d2a9abd71f2456032..51d394e3d15826faa60371c8bfbe36faf2a8ec8e 100644 (file)
@@ -52,14 +52,15 @@ public:
         Journaler *journaler,
         MirrorPeerClientMeta *client_meta,
         Context *on_finish,
+        bool *do_resync,
         ProgressContext *progress_ctx = nullptr) {
     return new BootstrapRequest(local_io_ctx, remote_io_ctx,
                                 image_sync_throttler, local_image_ctx,
                                 local_image_name, remote_image_id,
                                 global_image_id, work_queue, timer, timer_lock,
                                 local_mirror_uuid, remote_mirror_uuid,
-                                journaler, client_meta, on_finish,
-                               progress_ctx);
+                                journaler, client_meta, on_finish, do_resync,
+                                progress_ctx);
   }
 
   BootstrapRequest(librados::IoCtx &local_io_ctx,
@@ -73,7 +74,7 @@ public:
                    const std::string &local_mirror_uuid,
                    const std::string &remote_mirror_uuid, Journaler *journaler,
                    MirrorPeerClientMeta *client_meta, Context *on_finish,
-                  ProgressContext *progress_ctx = nullptr);
+                   bool *do_resync, ProgressContext *progress_ctx = nullptr);
   ~BootstrapRequest();
 
   void send();
@@ -158,6 +159,7 @@ private:
   Journaler *m_journaler;
   MirrorPeerClientMeta *m_client_meta;
   ProgressContext *m_progress_ctx;
+  bool *m_do_resync;
   Mutex m_lock;
   bool m_canceled = false;