]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: delay commit of overwritten journal event
authorJason Dillaman <dillaman@redhat.com>
Thu, 19 May 2016 19:50:04 +0000 (15:50 -0400)
committerJason Dillaman <dillaman@redhat.com>
Fri, 20 May 2016 12:04:02 +0000 (08:04 -0400)
With the cache enabled and write-after-write IOs to the same
object extents, it was possible for the overwritten journal event
to be committed before the overwriter journal event was written
to disk.  If a client crash occurs before the event is written,
the image will be inconsistent on replay.

Fixes: http://tracker.ceph.com/issues/15938
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/LibrbdWriteback.cc

index eaa0a820506e0c65d504225906114f1ca3181350..8551820cf90f28830b6a4baf2eb397a7ce20f11c 100644 (file)
@@ -167,6 +167,27 @@ namespace librbd {
     }
   };
 
+  struct C_CommitIOEventExtent : public Context {
+    ImageCtx *image_ctx;
+    uint64_t journal_tid;
+    uint64_t offset;
+    uint64_t length;
+
+    C_CommitIOEventExtent(ImageCtx *image_ctx, uint64_t journal_tid,
+                          uint64_t offset, uint64_t length)
+      : image_ctx(image_ctx), journal_tid(journal_tid), offset(offset),
+        length(length) {
+    }
+
+    virtual void finish(int r) {
+      // all IO operations are flushed prior to closing the journal
+      assert(image_ctx->journal != nullptr);
+
+      image_ctx->journal->commit_io_event_extent(journal_tid, offset, length,
+                                                 r);
+    }
+  };
+
   LibrbdWriteback::LibrbdWriteback(ImageCtx *ictx, Mutex& lock)
     : m_tid(0), m_lock(lock), m_ictx(ictx) {
   }
@@ -248,8 +269,8 @@ namespace librbd {
     assert(journal_tid == 0 || m_ictx->journal != NULL);
     if (journal_tid != 0) {
       m_ictx->journal->flush_event(
-       journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off,
-                                             bl, snapc, req_comp,
+        journal_tid, new C_WriteJournalCommit(m_ictx, oid.name, object_no, off,
+                                              bl, snapc, req_comp,
                                              journal_tid));
     } else {
       AioObjectWrite *req = new AioObjectWrite(m_ictx, oid.name, object_no,
@@ -277,8 +298,17 @@ namespace librbd {
                            len, file_extents);
     for (Extents::iterator it = file_extents.begin();
         it != file_extents.end(); ++it) {
-      m_ictx->journal->commit_io_event_extent(original_journal_tid, it->first,
-                                             it->second, 0);
+      if (new_journal_tid != 0) {
+        // ensure new journal event is safely committed to disk before
+        // committing old event
+        m_ictx->journal->flush_event(
+          new_journal_tid, new C_CommitIOEventExtent(m_ictx,
+                                                     original_journal_tid,
+                                                     it->first, it->second));
+      } else {
+        m_ictx->journal->commit_io_event_extent(original_journal_tid, it->first,
+                                               it->second, 0);
+      }
     }
   }