]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: apply orphaned maintenance ops after journal replay 7785/head
authorJason Dillaman <dillaman@redhat.com>
Wed, 24 Feb 2016 23:07:45 +0000 (18:07 -0500)
committerJason Dillaman <dillaman@redhat.com>
Tue, 1 Mar 2016 12:39:42 +0000 (07:39 -0500)
If a client recorded a maintenance op to the journal but crashed
before writing the op finish event, the image will be in an
inconsistent state.  Therefore, once the end of the journal is
reached, attempt to apply all queued ops.

Fixes: #14822
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/journal/Replay.cc
src/librbd/journal/Replay.h
src/test/librbd/journal/test_mock_Replay.cc

index fa53a86e3e56541fd9ff3f7f35e68fd371d4ff24..b65d51c39c14900bfd5f7971d7b3e241719a581a 100644 (file)
@@ -100,6 +100,7 @@ void Replay<I>::shut_down(bool cancel_ops, Context *on_finish) {
 
   AioCompletion *flush_comp = nullptr;
   OpTids cancel_op_tids;
+  Contexts op_finish_events;
   on_finish = util::create_async_context_callback(
     m_image_ctx, on_finish);
 
@@ -111,13 +112,23 @@ void Replay<I>::shut_down(bool cancel_ops, Context *on_finish) {
       flush_comp = create_aio_flush_completion(nullptr, nullptr);;
     }
 
-    // cancel ops that are waiting to start
-    if (cancel_ops) {
-      for (auto &op_event_pair : m_op_events) {
-        const OpEvent &op_event = op_event_pair.second;
+    for (auto &op_event_pair : m_op_events) {
+      OpEvent &op_event = op_event_pair.second;
+      if (cancel_ops) {
+        // cancel ops that are waiting to start (waiting for
+        // OpFinishEvent or waiting for ready)
         if (op_event.on_start_ready == nullptr) {
           cancel_op_tids.push_back(op_event_pair.first);
         }
+      } else if (op_event.on_op_finish_event != nullptr) {
+        // start ops waiting for OpFinishEvent
+        Context *on_op_finish_event = nullptr;
+        std::swap(on_op_finish_event, op_event.on_op_finish_event);
+        m_image_ctx.op_work_queue->queue(on_op_finish_event, 0);
+      } else {
+        // waiting for op ready
+        assert(op_event.on_start_ready != nullptr);
+        op_event_pair.second.finish_on_ready = true;
       }
     }
 
@@ -174,7 +185,7 @@ void Replay<I>::replay_op_ready(uint64_t op_tid, Context *on_resume) {
   on_start_ready->complete(0);
 
   // cancel has been requested -- send error to paused state machine
-  if (m_flush_ctx != nullptr) {
+  if (!op_event.finish_on_ready && m_flush_ctx != nullptr) {
     m_image_ctx.op_work_queue->queue(on_resume, -ERESTART);
     return;
   }
@@ -185,6 +196,11 @@ void Replay<I>::replay_op_ready(uint64_t op_tid, Context *on_resume) {
     [on_resume](int r) {
       on_resume->complete(r);
     });
+
+  // shut down request -- don't expect OpFinishEvent
+  if (op_event.finish_on_ready) {
+    m_image_ctx.op_work_queue->queue(on_resume, 0);
+  }
 }
 
 template <typename I>
@@ -598,6 +614,7 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
 
   OpEvent op_event;
   Context *on_flush = nullptr;
+  bool shutting_down = false;
   {
     Mutex::Locker locker(m_lock);
     auto op_it = m_op_events.find(op_tid);
@@ -606,6 +623,7 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
     op_event = std::move(op_it->second);
     m_op_events.erase(op_it);
 
+    shutting_down = (m_flush_ctx != nullptr);
     if (m_op_events.empty() &&
         (m_in_flight_aio_flush + m_in_flight_aio_modify) == 0) {
       on_flush = m_flush_ctx;
@@ -622,7 +640,7 @@ void Replay<I>::handle_op_complete(uint64_t op_tid, int r) {
   } else {
     // event kicked off by OpFinishEvent
     assert((op_event.on_finish_ready != nullptr &&
-            op_event.on_finish_safe != nullptr) || r == -ERESTART);
+            op_event.on_finish_safe != nullptr) || shutting_down);
   }
 
   // skipped upon error -- so clean up if non-null
index d842a03ff5b41d4eaff4abf79b1f526fe5c9665f..c617dfb25d78f679fc52b8a64d838fba1c3f3e7f 100644 (file)
@@ -45,6 +45,7 @@ private:
 
   struct OpEvent {
     bool op_in_progress = false;
+    bool finish_on_ready = false;
     Context *on_op_finish_event = nullptr;
     Context *on_start_ready = nullptr;
     Context *on_start_safe = nullptr;
index 033ef3c5f9793d3279a8713d2c26b918d2c19890..ea3d0467b89050c32843008c40358214dc63f280 100644 (file)
@@ -529,15 +529,70 @@ TEST_F(TestMockJournalReplay, MissingOpFinishEvent) {
   expect_op_work_queue(mock_image_ctx);
 
   InSequence seq;
-  C_SaferCond on_ready;
-  C_SaferCond on_safe;
-  when_process(mock_journal_replay, EventEntry{SnapRemoveEvent(123, "snap")},
-               &on_ready, &on_safe);
+  Context *on_snap_create_finish = nullptr;
+  expect_snap_create(mock_image_ctx, &on_snap_create_finish, "snap", 123);
 
-  ASSERT_EQ(0, on_ready.wait());
+  Context *on_snap_remove_finish = nullptr;
+  expect_snap_remove(mock_image_ctx, &on_snap_remove_finish, "snap");
+
+  C_SaferCond on_snap_remove_ready;
+  C_SaferCond on_snap_remove_safe;
+  when_process(mock_journal_replay, EventEntry{SnapRemoveEvent(122, "snap")},
+               &on_snap_remove_ready, &on_snap_remove_safe);
+  ASSERT_EQ(0, on_snap_remove_ready.wait());
+
+  C_SaferCond on_snap_create_ready;
+  C_SaferCond on_snap_create_safe;
+  when_process(mock_journal_replay, EventEntry{SnapCreateEvent(123, "snap")},
+               &on_snap_create_ready, &on_snap_create_safe);
+
+  C_SaferCond on_shut_down;
+  mock_journal_replay.shut_down(false, &on_shut_down);
+
+  wait_for_op_invoked(&on_snap_remove_finish, 0);
+  ASSERT_EQ(0, on_snap_remove_safe.wait());
+
+  C_SaferCond on_snap_create_resume;
+  when_replay_op_ready(mock_journal_replay, 123, &on_snap_create_resume);
+  ASSERT_EQ(0, on_snap_create_resume.wait());
+
+  on_snap_create_finish->complete(0);
+  ASSERT_EQ(0, on_snap_create_ready.wait());
+  ASSERT_EQ(0, on_snap_create_safe.wait());
+
+  ASSERT_EQ(0, on_shut_down.wait());
+}
+
+TEST_F(TestMockJournalReplay, MissingOpFinishEventCancelOps) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockImageCtx mock_image_ctx(*ictx);
+  MockJournalReplay mock_journal_replay(mock_image_ctx);
+  expect_op_work_queue(mock_image_ctx);
+
+  InSequence seq;
+  Context *on_snap_create_finish = nullptr;
+  expect_snap_create(mock_image_ctx, &on_snap_create_finish, "snap", 123);
+
+  C_SaferCond on_snap_remove_ready;
+  C_SaferCond on_snap_remove_safe;
+  when_process(mock_journal_replay, EventEntry{SnapRemoveEvent(122, "snap")},
+               &on_snap_remove_ready, &on_snap_remove_safe);
+  ASSERT_EQ(0, on_snap_remove_ready.wait());
+
+  C_SaferCond on_snap_create_ready;
+  C_SaferCond on_snap_create_safe;
+  when_process(mock_journal_replay, EventEntry{SnapCreateEvent(123, "snap")},
+               &on_snap_create_ready, &on_snap_create_safe);
+
+  C_SaferCond on_resume;
+  when_replay_op_ready(mock_journal_replay, 123, &on_resume);
+  ASSERT_EQ(0, on_snap_create_ready.wait());
 
   ASSERT_EQ(0, when_shut_down(mock_journal_replay, true));
-  ASSERT_EQ(-ERESTART, on_safe.wait());
+  ASSERT_EQ(-ERESTART, on_snap_remove_safe.wait());
+  ASSERT_EQ(-ERESTART, on_snap_create_safe.wait());
 }
 
 TEST_F(TestMockJournalReplay, UnknownOpFinishEvent) {