]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: journal op requests
authorJason Dillaman <dillaman@redhat.com>
Wed, 26 Aug 2015 17:46:53 +0000 (13:46 -0400)
committerJason Dillaman <dillaman@redhat.com>
Thu, 19 Nov 2015 01:34:43 +0000 (20:34 -0500)
Any op request that has the potential to impact the layout or data
within an image needs to be journaled for RBD mirroring purposes.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
18 files changed:
src/librbd/AioCompletion.cc
src/librbd/AioImageRequest.cc
src/librbd/Journal.cc
src/librbd/Journal.h
src/librbd/LibrbdWriteback.cc
src/librbd/internal.cc
src/librbd/operation/FlattenRequest.h
src/librbd/operation/RenameRequest.h
src/librbd/operation/Request.cc
src/librbd/operation/Request.h
src/librbd/operation/ResizeRequest.cc
src/librbd/operation/ResizeRequest.h
src/librbd/operation/SnapshotCreateRequest.h
src/librbd/operation/SnapshotProtectRequest.h
src/librbd/operation/SnapshotRemoveRequest.h
src/librbd/operation/SnapshotRenameRequest.h
src/librbd/operation/SnapshotRollbackRequest.h
src/librbd/operation/SnapshotUnprotectRequest.h

index ac6754d19a097e69443171c6545ea1548f2ce6c5..053df429af298b70cac719c605a3a2411aeb19ec 100644 (file)
@@ -90,7 +90,7 @@ namespace librbd {
     // inform the journal that the op has successfully committed
     if (journal_tid != 0) {
       assert(ictx->journal != NULL);
-      ictx->journal->commit_event(journal_tid, rval);
+      ictx->journal->commit_io_event(journal_tid, rval);
     }
 
     // note: possible for image to be closed after op marked finished
index 49632b6d68dbf53595c53b22274432d04d5f2091..f627d9c4d8a96dea822864c12e30bbf7bb675344 100644 (file)
@@ -299,9 +299,9 @@ uint64_t AioImageWrite::append_journal_event(
   bl.append(m_buf, m_len);
 
   journal::EventEntry event_entry(journal::AioWriteEvent(m_off, m_len, bl));
-  uint64_t tid = m_image_ctx.journal->append_event(m_aio_comp, event_entry,
-                                                   requests, m_off, m_len,
-                                                   synchronous);
+  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry,
+                                                      requests, m_off, m_len,
+                                                      synchronous);
   if (m_image_ctx.object_cacher == NULL) {
     m_aio_comp->associate_journal_event(tid);
   }
@@ -359,9 +359,9 @@ void AioImageWrite::update_stats(size_t length) {
 uint64_t AioImageDiscard::append_journal_event(
     const AioObjectRequests &requests, bool synchronous) {
   journal::EventEntry event_entry(journal::AioDiscardEvent(m_off, m_len));
-  uint64_t tid = m_image_ctx.journal->append_event(m_aio_comp, event_entry,
-                                                   requests, m_off, m_len,
-                                                   synchronous);
+  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry,
+                                                      requests, m_off, m_len,
+                                                      synchronous);
   m_aio_comp->associate_journal_event(tid);
   return tid;
 }
@@ -420,7 +420,7 @@ void AioImageFlush::send_request() {
     RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
     if (m_image_ctx.journal != NULL &&
         !m_image_ctx.journal->is_journal_replaying()) {
-      uint64_t journal_tid = m_image_ctx.journal->append_event(
+      uint64_t journal_tid = m_image_ctx.journal->append_io_event(
         m_aio_comp, journal::EventEntry(journal::AioFlushEvent()),
         AioObjectRequests(), 0, 0, false);
 
index 37e311e46e2804d9c676694f60d4a3819e9b18c8..5b610988672859958057895ac12c00d6da36bfae 100644 (file)
@@ -11,6 +11,8 @@
 #include "journal/Journaler.h"
 #include "journal/ReplayEntry.h"
 #include "common/errno.h"
+#include <boost/utility/enable_if.hpp>
+#include <boost/type_traits/is_base_of.hpp>
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
@@ -32,6 +34,27 @@ struct C_DestroyJournaler : public Context {
   }
 };
 
+struct SetOpRequestTid : public boost::static_visitor<void> {
+  uint64_t tid;
+
+  SetOpRequestTid(uint64_t _tid) : tid(_tid) {
+  }
+
+  template <typename Event>
+  typename boost::enable_if<boost::is_base_of<journal::OpEventBase, Event>,
+                            void>::type
+  operator()(Event &event) const {
+    event.tid = tid;
+  }
+
+  template <typename Event>
+  typename boost::disable_if<boost::is_base_of<journal::OpEventBase, Event>,
+                            void>::type
+  operator()(Event &event) const {
+    assert(false);
+  }
+};
+
 } // anonymous namespace
 
 Journal::Journal(ImageCtx &image_ctx)
@@ -53,6 +76,7 @@ Journal::~Journal() {
   m_image_ctx.op_work_queue->drain();
   assert(m_journaler == NULL);
   assert(m_journal_replay == NULL);
+  assert(m_wait_for_state_contexts.empty());
 
   m_image_ctx.image_watcher->unregister_listener(&m_lock_listener);
 
@@ -124,12 +148,16 @@ bool Journal::is_journal_replaying() const {
   return (m_state == STATE_REPLAYING);
 }
 
-bool Journal::wait_for_journal_ready() {
+void Journal::wait_for_journal_ready(Context *on_ready) {
+  Mutex::Locker locker(m_lock);
+  schedule_wait_for_ready(on_ready);
+}
+
+void Journal::wait_for_journal_ready() {
   Mutex::Locker locker(m_lock);
-  while (m_state != STATE_UNINITIALIZED && m_state != STATE_RECORDING) {
+  while (m_state != STATE_RECORDING) {
     wait_for_state_transition();
   }
-  return (m_state == STATE_RECORDING);
 }
 
 void Journal::open() {
@@ -183,11 +211,11 @@ int Journal::close() {
   return 0;
 }
 
-uint64_t Journal::append_event(AioCompletion *aio_comp,
-                               const journal::EventEntry &event_entry,
-                               const AioObjectRequests &requests,
-                               uint64_t offset, size_t length,
-                               bool flush_entry) {
+uint64_t Journal::append_io_event(AioCompletion *aio_comp,
+                                  const journal::EventEntry &event_entry,
+                                  const AioObjectRequests &requests,
+                                  uint64_t offset, size_t length,
+                                  bool flush_entry) {
   assert(m_image_ctx.owner_lock.is_locked());
 
   bufferlist bl;
@@ -225,7 +253,7 @@ uint64_t Journal::append_event(AioCompletion *aio_comp,
   return tid;
 }
 
-void Journal::commit_event(uint64_t tid, int r) {
+void Journal::commit_io_event(uint64_t tid, int r) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": tid=" << tid << ", "
                  "r=" << r << dendl;
@@ -238,8 +266,8 @@ void Journal::commit_event(uint64_t tid, int r) {
   complete_event(it, r);
 }
 
-void Journal::commit_event_extent(uint64_t tid, uint64_t offset,
-                                  uint64_t length, int r) {
+void Journal::commit_io_event_extent(uint64_t tid, uint64_t offset,
+                                     uint64_t length, int r) {
   assert(length > 0);
 
   CephContext *cct = m_image_ctx.cct;
@@ -273,6 +301,47 @@ void Journal::commit_event_extent(uint64_t tid, uint64_t offset,
   complete_event(it, event.ret_val);
 }
 
+uint64_t Journal::append_op_event(journal::EventEntry &event_entry) {
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  uint64_t tid;
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_state == STATE_RECORDING);
+
+    Mutex::Locker event_locker(m_event_lock);
+    tid = ++m_event_tid;
+    assert(tid != 0);
+
+    // inject the generated tid into the provided event entry
+    boost::apply_visitor(SetOpRequestTid(tid), event_entry.event);
+
+    bufferlist bl;
+    ::encode(event_entry, bl);
+    m_journaler->append("", bl);
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " " << __func__ << ": "
+                 << "event=" << event_entry.get_event_type() << ", "
+                 << "tid=" << tid << dendl;
+  return tid;
+}
+
+void Journal::commit_op_event(uint64_t tid, int r) {
+  journal::EventEntry event_entry((journal::OpFinishEvent(tid, r)));
+
+  bufferlist bl;
+  ::encode(event_entry, bl);
+
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_state == STATE_RECORDING);
+
+    m_journaler->append("", bl);
+  }
+}
+
 void Journal::flush_event(uint64_t tid, Context *on_safe) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": tid=" << tid << ", "
@@ -622,6 +691,13 @@ void Journal::transition_state(State state) {
   assert(m_lock.is_locked());
   m_state = state;
   m_cond.Signal();
+
+  Contexts wait_for_state_contexts;
+  wait_for_state_contexts.swap(m_wait_for_state_contexts);
+  for (Contexts::iterator it = wait_for_state_contexts.begin();
+       it != wait_for_state_contexts.end(); ++it) {
+    (*it)->complete(0);
+  }
 }
 
 void Journal::wait_for_state_transition() {
@@ -632,4 +708,25 @@ void Journal::wait_for_state_transition() {
   }
 }
 
+void Journal::schedule_wait_for_ready(Context *on_ready) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << __func__ << ": on_ready=" << on_ready << dendl;
+
+  assert(m_lock.is_locked());
+  m_wait_for_state_contexts.push_back(new C_WaitForReady(this, on_ready));
+}
+
+void Journal::handle_wait_for_ready(Context *on_ready) {
+  assert(m_lock.is_locked());
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " " << __func__ << ": on_ready=" << on_ready << ", "
+                 << "state=" << m_state << dendl;
+
+  if (m_state == STATE_RECORDING) {
+    m_image_ctx.op_work_queue->queue(on_ready, 0);
+  } else {
+    schedule_wait_for_ready(on_ready);
+  }
+}
+
 } // namespace librbd
index f6368b27f136bca31ae0711e205b94e9276ea605..29f4d2a01645fd1071905811d8100ff3c87dd6fa 100644 (file)
@@ -48,20 +48,23 @@ public:
   bool is_journal_ready() const;
   bool is_journal_replaying() const;
 
-  bool wait_for_journal_ready();
+  void wait_for_journal_ready(Context *on_ready);
+  void wait_for_journal_ready();
 
   void open();
   int close();
 
-  uint64_t append_event(AioCompletion *aio_comp,
-                        const journal::EventEntry &event_entry,
-                        const AioObjectRequests &requests,
-                        uint64_t offset, size_t length,
-                        bool flush_entry);
+  uint64_t append_io_event(AioCompletion *aio_comp,
+                           const journal::EventEntry &event_entry,
+                           const AioObjectRequests &requests,
+                           uint64_t offset, size_t length,
+                           bool flush_entry);
+  void commit_io_event(uint64_t tid, int r);
+  void commit_io_event_extent(uint64_t tid, uint64_t offset, uint64_t length,
+                              int r);
 
-  void commit_event(uint64_t tid, int r);
-  void commit_event_extent(uint64_t tid, uint64_t offset, uint64_t length,
-                           int r);
+  uint64_t append_op_event(journal::EventEntry &event_entry);
+  void commit_op_event(uint64_t tid, int r);
 
   void flush_event(uint64_t tid, Context *on_safe);
   void wait_event(uint64_t tid, Context *on_safe);
@@ -137,6 +140,19 @@ private:
     }
   };
 
+  struct C_WaitForReady : public Context {
+    Journal *journal;
+    Context *on_ready;
+
+    C_WaitForReady(Journal *_journal, Context *_on_ready)
+      : journal(_journal), on_ready(_on_ready) {
+    }
+
+    virtual void finish(int r) {
+      journal->handle_wait_for_ready(on_ready);
+    }
+  };
+
   struct ReplayHandler : public ::journal::ReplayHandler {
     Journal *journal;
     ReplayHandler(Journal *_journal) : journal(_journal) {
@@ -164,6 +180,7 @@ private:
   Cond m_cond;
   State m_state;
 
+  Contexts m_wait_for_state_contexts;
   LockListener m_lock_listener;
 
   ReplayHandler m_replay_handler;
@@ -202,6 +219,8 @@ private:
   void flush_journal();
   void transition_state(State state);
   void wait_for_state_transition();
+  void schedule_wait_for_ready(Context *on_ready);
+  void handle_wait_for_ready(Context *on_ready);
 };
 
 } // namespace librbd
index 1e68214273bd65c9ca039c230cd11a69c3337911..b25dfca28ea822fbca27424a5b48e1e4f5096936 100644 (file)
@@ -120,7 +120,7 @@ namespace librbd {
 
     virtual void complete(int r) {
       if (request_sent || r < 0) {
-        commit_event_extent(r);
+        commit_io_event_extent(r);
         req_comp->complete(r);
         delete this;
       } else {
@@ -131,7 +131,7 @@ namespace librbd {
     virtual void finish(int r) {
     }
 
-    void commit_event_extent(int r) {
+    void commit_io_event_extent(int r) {
       CephContext *cct = image_ctx->cct;
       ldout(cct, 20) << this << " C_WriteJournalCommit: "
                      << "write committed: updating journal commit position"
@@ -145,8 +145,8 @@ namespace librbd {
                               bl.length(), file_extents);
       for (Extents::iterator it = file_extents.begin();
            it != file_extents.end(); ++it) {
-        image_ctx->journal->commit_event_extent(journal_tid, it->first,
-                                                it->second, r);
+        image_ctx->journal->commit_io_event_extent(journal_tid, it->first,
+                                                   it->second, r);
       }
     }
 
@@ -275,8 +275,8 @@ namespace librbd {
                             len, file_extents);
     for (Extents::iterator it = file_extents.begin();
          it != file_extents.end(); ++it) {
-      m_ictx->journal->commit_event_extent(journal_tid, it->first, it->second,
-                                           0);
+      m_ictx->journal->commit_io_event_extent(journal_tid, it->first,
+                                              it->second, 0);
     }
   }
 
index 86c520e8434659c4911e7e599ed191f4d2fb99d3..76b5fee113318f8bdc9bab08d43f25ca1c718c8d 100644 (file)
@@ -747,16 +747,17 @@ int invoke_async_request(ImageCtx *ictx, const std::string& request_type,
     if (r < 0)
       return r;
 
-    bool fast_diff_enabled = false;
+    bool proxy_op = false;
     {
       RWLock::RLocker snap_locker(ictx->snap_lock);
       if (ictx->get_snap_id(snap_name) == CEPH_NOSNAP) {
         return -ENOENT;
       }
-      fast_diff_enabled = ((ictx->features & RBD_FEATURE_FAST_DIFF) != 0);
+      proxy_op = ((ictx->features & RBD_FEATURE_FAST_DIFF) != 0 ||
+                  (ictx->features & RBD_FEATURE_JOURNALING) != 0);
     }
 
-    if (fast_diff_enabled) {
+    if (proxy_op) {
       r = invoke_async_request(ictx, "snap_remove", true,
                                boost::bind(&snap_remove_helper, ictx, _1,
                                            snap_name),
@@ -2537,6 +2538,10 @@ int invoke_async_request(ImageCtx *ictx, const std::string& request_type,
        return -EROFS;
       }
 
+      if (ictx->journal != NULL) {
+        ictx->journal->wait_for_journal_ready();
+      }
+
       ictx->snap_lock.get_read();
       new_size = ictx->get_image_size(snap_id);
       ictx->snap_lock.put_read();
index f8ef2670e61510849da333549fa7953def102405..7a2b86e35df6b098f65403495cae5eff6d70fb12 100644 (file)
@@ -30,6 +30,10 @@ protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::FlattenEvent(0);
+  }
+
 private:
   /**
    * Flatten goes through the following state machine to copyup objects
index b07293facff497d4f71432d35ef668c8a2451ce1..ab7a87b8c168e86f2529af820f41a4fc7f616e1c 100644 (file)
@@ -58,6 +58,10 @@ protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::RenameEvent(0, m_dest_name);
+  }
+
 private:
   std::string m_dest_name;
 
index 82017de05c2c613ea587a06e935fdf51fde951cf..a1fe76003cf221536e1ed95ff9a04ae3e4a1319b 100644 (file)
@@ -2,21 +2,55 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/operation/Request.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
 
 namespace librbd {
 namespace operation {
 
 Request::Request(ImageCtx &image_ctx, Context *on_finish)
-  : AsyncRequest(image_ctx, on_finish) {
+  : AsyncRequest(image_ctx, on_finish), m_tid(0) {
 }
 
 void Request::send() {
-  // TODO: record op start in journal
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  {
+    RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+    if (m_image_ctx.journal != NULL &&
+        !m_image_ctx.journal->is_journal_replaying()) {
+      // journal might be replaying -- wait for it to complete
+      if (!m_image_ctx.journal->is_journal_ready()) {
+        m_image_ctx.journal->wait_for_journal_ready(
+          new C_WaitForJournalReady(this));
+        return;
+      }
+
+      journal::EventEntry event_entry(create_event());
+      m_tid = m_image_ctx.journal->append_op_event(event_entry);
+    }
+  }
+
   send_op();
 }
 
 void Request::finish(int r) {
-  // TODO: record op finish in journal
+  AsyncRequest::finish(r);
+
+  RWLock::RLocker snap_locker(m_image_ctx.snap_lock);
+  if (m_tid != 0 &&
+      m_image_ctx.journal != NULL &&
+      !m_image_ctx.journal->is_journal_replaying()) {
+    // ops will be canceled / completed before closing journal
+    assert(m_image_ctx.journal->is_journal_ready());
+
+    m_image_ctx.journal->commit_op_event(m_tid, r);
+  }
+}
+
+void Request::handle_journal_ready() {
+  RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+  send();
 }
 
 } // namespace operation
index b4a805d57477f4dc754a933b04e4a9b453aeef58..705c4115d8ef1327dad181f315de84ef64eebac2 100644 (file)
@@ -5,6 +5,8 @@
 #define CEPH_LIBRBD_OPERATION_REQUEST_H
 
 #include "librbd/AsyncRequest.h"
+#include "include/Context.h"
+#include "librbd/JournalTypes.h"
 
 namespace librbd {
 namespace operation {
@@ -19,6 +21,23 @@ protected:
   virtual void finish(int r);
   virtual void send_op() = 0;
 
+  virtual journal::Event create_event() const = 0;
+
+private:
+  struct C_WaitForJournalReady : public Context {
+    Request *request;
+
+    C_WaitForJournalReady(Request *_request) : request(_request) {
+    }
+
+    virtual void finish(int r) {
+      request->handle_journal_ready();
+    }
+  };
+
+  uint64_t m_tid;
+
+  void handle_journal_ready();
 };
 
 } // namespace operation
index afe925bac8c9538b88b8bfb3f542d098b1dafee7..825b5442849305fca46fae4c490c5244a7be3b6d 100644 (file)
@@ -99,7 +99,7 @@ bool ResizeRequest::should_complete(int r) {
   return false;
 }
 
-void ResizeRequest::send_op() {
+void ResizeRequest::send() {
   assert(m_image_ctx.owner_lock.is_locked());
 
   {
@@ -116,6 +116,12 @@ void ResizeRequest::send_op() {
     compute_parent_overlap();
   }
 
+  Request::send();
+}
+
+void ResizeRequest::send_op() {
+  assert(m_image_ctx.owner_lock.is_locked());
+
   CephContext *cct = m_image_ctx.cct;
   if (is_canceled()) {
     complete(-ERESTART);
index dcef8fa22ac91bd4eb4897ac3f1d79d7cd18b35d..242a8d68be7a8bfd2c332399e4a359e6f6e5d477 100644 (file)
@@ -29,10 +29,16 @@ public:
     return m_new_size;
   }
 
+  virtual void send();
+
 protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::ResizeEvent(0, m_new_size);
+  }
+
 private:
   /**
    * Resize goes through the following state machine to resize the image
index 7031a43c46f9f0ac03588a5d6705978c7e73f572..21294423a122cabc082fa578c5f628ca765424a8 100644 (file)
@@ -82,6 +82,10 @@ protected:
     return r;
   }
 
+  virtual journal::Event create_event() const {
+    return journal::SnapCreateEvent(0, m_snap_name);
+  }
+
 private:
   std::string m_snap_name;
   State m_state;
index dd3496b96e4cf116d39f0f7cd3e8b71c73161741..e2953f9c97fe6f08ef3d71f0f1aab2d86399d9b1 100644 (file)
@@ -45,6 +45,10 @@ protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::SnapProtectEvent(0, m_snap_name);
+  }
+
 private:
   std::string m_snap_name;
   State m_state;
index 85a3561c769e9c55e0dba08ba203f47c5c6b9975..621f3ee2c763188afb0e2f432f07cc6dc07f7795 100644 (file)
@@ -61,6 +61,10 @@ protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::SnapRemoveEvent(0, m_snap_name);
+  }
+
 private:
   std::string m_snap_name;
   uint64_t m_snap_id;
index 5857816f3a144a32b1a24daf057ead84012a9f6b..8be3459086295bd9ef62d1db8aa29f62588dbed9 100644 (file)
@@ -41,6 +41,10 @@ public:
   SnapshotRenameRequest(ImageCtx &image_ctx, Context *on_finish,
                         uint64_t snap_id, const std::string &snap_name);
 
+  virtual journal::Event create_event() const {
+    return journal::SnapRenameEvent(0, m_snap_id, m_snap_name);
+  }
+
 protected:
   virtual void send_op();
   virtual bool should_complete(int r);
index 972547e8c2ac0ffd7c4af4b146388dbc99bc3105..f074e488465cfdad2f731d2d03648b3770dff262 100644 (file)
@@ -62,6 +62,10 @@ protected:
   virtual void send_op();
   virtual bool should_complete(int r);
 
+  virtual journal::Event create_event() const {
+    return journal::SnapRollbackEvent(0, m_snap_name);
+  }
+
 private:
   std::string m_snap_name;
   uint64_t m_snap_id;
index faf1e138ef494c1b3a039bdfd15097b78ce700d7..3eacc3e71bec0965d562a847c41252f8de379e6c 100644 (file)
@@ -63,6 +63,10 @@ protected:
     return 0;
   }
 
+  virtual journal::Event create_event() const {
+    return journal::SnapUnprotectEvent(0, m_snap_name);
+  }
+
 private:
   std::string m_snap_name;
   State m_state;