]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: properly order maintenance ops journal events
authorJason Dillaman <dillaman@redhat.com>
Thu, 17 Dec 2015 04:17:22 +0000 (23:17 -0500)
committerJason Dillaman <dillaman@redhat.com>
Fri, 15 Jan 2016 15:40:28 +0000 (10:40 -0500)
In general, the op shoud only start after the jouranl event is
safely committed.  Snapshot create and resize need special treatment
to record the event while no IO is in-progress.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
14 files changed:
src/librbd/AioImageRequest.cc
src/librbd/AsyncRequest.h
src/librbd/Journal.cc
src/librbd/Journal.h
src/librbd/internal.cc
src/librbd/object_map/SnapshotRemoveRequest.h
src/librbd/operation/RebuildObjectMapRequest.cc
src/librbd/operation/Request.cc
src/librbd/operation/Request.h
src/librbd/operation/ResizeRequest.cc
src/librbd/operation/ResizeRequest.h
src/librbd/operation/SnapshotCreateRequest.cc
src/librbd/operation/SnapshotCreateRequest.h
src/librbd/operation/TrimRequest.h

index 975096f3fbcd7b5957320191acabf6ca1bd94b06..ed93b34eb7501eb43fc8006639a5febc8d1168ca 100644 (file)
@@ -311,7 +311,8 @@ uint64_t AioImageWrite::append_journal_event(
   bl.append(m_buf, m_len);
 
   journal::EventEntry event_entry(journal::AioWriteEvent(m_off, m_len, bl));
-  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry,
+  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp,
+                                                      std::move(event_entry),
                                                       requests, m_off, m_len,
                                                       synchronous);
   if (m_image_ctx.object_cacher == NULL) {
@@ -371,7 +372,8 @@ void AioImageWrite::update_stats(size_t length) {
 uint64_t AioImageDiscard::append_journal_event(
     const AioObjectRequests &requests, bool synchronous) {
   journal::EventEntry event_entry(journal::AioDiscardEvent(m_off, m_len));
-  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp, event_entry,
+  uint64_t tid = m_image_ctx.journal->append_io_event(m_aio_comp,
+                                                      std::move(event_entry),
                                                       requests, m_off, m_len,
                                                       synchronous);
   m_aio_comp->associate_journal_event(tid);
index 96802f8114a2ca6e40b0fea83360c68b57404e3a..8ca84f534c8f7f138aac6b9a63855742c41d7874 100644 (file)
@@ -24,8 +24,6 @@ public:
     if (should_complete(r)) {
       r = filter_return_code(r);
       finish(r);
-      finish_request();
-      m_on_finish->complete(r);
       delete this;
     }
   }
@@ -41,7 +39,6 @@ public:
 
 protected:
   ImageCtxT &m_image_ctx;
-  Context *m_on_finish;
 
   librados::AioCompletion *create_callback_completion();
   Context *create_callback_context();
@@ -55,8 +52,12 @@ protected:
   }
 
   virtual void finish(int r) {
+    finish_request();
+    m_on_finish->complete(r);
   }
+
 private:
+  Context *m_on_finish;
   bool m_canceled;
   typename xlist<AsyncRequest<ImageCtxT> *>::item m_xlist_item;
 
index eb0de9f128cf1c04dbc556c4600ff9692e37fbb9..6b050556e7497a4b0f0c6c2fabde9dfa7f8e60fd 100644 (file)
@@ -239,7 +239,7 @@ bool Journal<I>::is_journal_replaying() const {
 
 template <typename I>
 void Journal<I>::wait_for_journal_ready(Context *on_ready) {
-  on_ready = util::create_async_context_callback(m_image_ctx, on_ready);
+  on_ready = create_async_context_callback(m_image_ctx, on_ready);
 
   Mutex::Locker locker(m_lock);
   if (m_state == STATE_READY) {
@@ -254,7 +254,7 @@ void Journal<I>::open(Context *on_finish) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << dendl;
 
-  on_finish = util::create_async_context_callback(m_image_ctx, on_finish);
+  on_finish = create_async_context_callback(m_image_ctx, on_finish);
 
   Mutex::Locker locker(m_lock);
   assert(m_state == STATE_UNINITIALIZED);
@@ -267,7 +267,7 @@ void Journal<I>::close(Context *on_finish) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << dendl;
 
-  on_finish = util::create_async_context_callback(m_image_ctx, on_finish);
+  on_finish = create_async_context_callback(m_image_ctx, on_finish);
 
   Mutex::Locker locker(m_lock);
   assert(m_state != STATE_UNINITIALIZED);
@@ -286,7 +286,7 @@ void Journal<I>::close(Context *on_finish) {
 
 template <typename I>
 uint64_t Journal<I>::append_io_event(AioCompletion *aio_comp,
-                                     const journal::EventEntry &event_entry,
+                                     journal::EventEntry &&event_entry,
                                      const AioObjectRequests &requests,
                                      uint64_t offset, size_t length,
                                      bool flush_entry) {
@@ -301,12 +301,11 @@ uint64_t Journal<I>::append_io_event(AioCompletion *aio_comp,
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_READY);
 
-    future = m_journaler->append("", bl);
-
     Mutex::Locker event_locker(m_event_lock);
     tid = ++m_event_tid;
     assert(tid != 0);
 
+    future = m_journaler->append("", bl);
     m_events[tid] = Event(future, aio_comp, requests, offset, length);
   }
 
@@ -318,7 +317,7 @@ uint64_t Journal<I>::append_io_event(AioCompletion *aio_comp,
                  << "length=" << length << ", "
                  << "flush=" << flush_entry << ", tid=" << tid << dendl;
 
-  Context *on_safe = new C_EventSafe(this, tid);
+  Context *on_safe = new C_IOEventSafe(this, tid);
   if (flush_entry) {
     future.flush(on_safe);
   } else {
@@ -379,17 +378,23 @@ void Journal<I>::commit_io_event_extent(uint64_t tid, uint64_t offset,
 
 template <typename I>
 void Journal<I>::append_op_event(uint64_t op_tid,
-                                 journal::EventEntry &event_entry) {
+                                 journal::EventEntry &&event_entry,
+                                 Context *on_safe) {
   assert(m_image_ctx.owner_lock.is_locked());
 
   bufferlist bl;
   ::encode(event_entry, bl);
+
+  Future future;
   {
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_READY);
-    m_journaler->committed(m_journaler->append("", bl));
+    future = m_journaler->append("", bl);
   }
 
+  on_safe = create_async_context_callback(m_image_ctx, on_safe);
+  future.flush(new C_OpEventSafe(this, op_tid, future, on_safe));
+
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 10) << this << " " << __func__ << ": "
                  << "op_tid=" << op_tid << ", "
@@ -397,21 +402,24 @@ void Journal<I>::append_op_event(uint64_t op_tid,
 }
 
 template <typename I>
-void Journal<I>::commit_op_event(uint64_t tid, int r) {
+void Journal<I>::commit_op_event(uint64_t op_tid, int r) {
   CephContext *cct = m_image_ctx.cct;
-  ldout(cct, 10) << this << " " << __func__ << ": tid=" << tid << ", "
+  ldout(cct, 10) << this << " " << __func__ << ": op_tid=" << op_tid << ", "
                  << "r=" << r << dendl;
 
-  journal::EventEntry event_entry((journal::OpFinishEvent(tid, r)));
+  journal::EventEntry event_entry((journal::OpFinishEvent(op_tid, r)));
 
   bufferlist bl;
   ::encode(event_entry, bl);
 
+  Future future;
   {
     Mutex::Locker locker(m_lock);
     assert(m_state == STATE_READY);
-    m_journaler->committed(m_journaler->append("", bl));
+    future = m_journaler->append("", bl);
   }
+
+  future.flush(new C_OpEventSafe(this, op_tid, future, nullptr));
 }
 
 template <typename I>
@@ -458,7 +466,8 @@ typename Journal<I>::Future Journal<I>::wait_event(Mutex &lock, uint64_t tid,
     return Future();
   }
 
-  event.on_safe_contexts.push_back(on_safe);
+  event.on_safe_contexts.push_back(create_async_context_callback(m_image_ctx,
+                                                                 on_safe));
   return event.future;
 }
 
@@ -705,11 +714,17 @@ void Journal<I>::handle_journal_destroyed(int r) {
 }
 
 template <typename I>
-void Journal<I>::handle_event_safe(int r, uint64_t tid) {
+void Journal<I>::handle_io_event_safe(int r, uint64_t tid) {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", "
                  << "tid=" << tid << dendl;
 
+  // journal will be flushed before closing
+  assert(m_state == STATE_READY);
+  if (r < 0) {
+    lderr(cct) << "failed to commit IO event: "  << cpp_strerror(r) << dendl;
+  }
+
   AioCompletion *aio_comp;
   AioObjectRequests aio_object_requests;
   Contexts on_safe_contexts;
@@ -758,6 +773,25 @@ void Journal<I>::handle_event_safe(int r, uint64_t tid) {
   }
 }
 
+template <typename I>
+void Journal<I>::handle_op_event_safe(int r, uint64_t tid, const Future &future,
+                                      Context *on_safe) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << this << " " << __func__ << ": r=" << r << ", "
+                 << "tid=" << tid << dendl;
+
+  // journal will be flushed before closing
+  assert(m_state == STATE_READY);
+  if (r < 0) {
+    lderr(cct) << "failed to commit op event: "  << cpp_strerror(r) << dendl;
+  }
+
+  m_journaler->committed(future);
+  if (on_safe != nullptr) {
+    on_safe->complete(r);
+  }
+}
+
 template <typename I>
 void Journal<I>::stop_recording() {
   assert(m_lock.is_locked());
index bee4dbe98667d94fa95c2725bf3886d097fbb6e6..1740bc6a67616a317b00eb09146a83ce1eec16ce 100644 (file)
@@ -113,7 +113,7 @@ public:
   void close(Context *on_finish);
 
   uint64_t append_io_event(AioCompletion *aio_comp,
-                           const journal::EventEntry &event_entry,
+                           journal::EventEntry &&event_entry,
                            const AioObjectRequests &requests,
                            uint64_t offset, size_t length,
                            bool flush_entry);
@@ -121,8 +121,9 @@ public:
   void commit_io_event_extent(uint64_t tid, uint64_t offset, uint64_t length,
                               int r);
 
-  void append_op_event(uint64_t op_tid, journal::EventEntry &event_entry);
-  void commit_op_event(uint64_t op_tid, int r);
+  void append_op_event(uint64_t op_tid, journal::EventEntry &&event_entry,
+                       Context *on_safe);
+  void commit_op_event(uint64_t tid, int r);
 
   void flush_event(uint64_t tid, Context *on_safe);
   void wait_event(uint64_t tid, Context *on_safe);
@@ -168,16 +169,32 @@ private:
 
   typedef ceph::unordered_map<uint64_t, Event> Events;
 
-  struct C_EventSafe : public Context {
+  struct C_IOEventSafe : public Context {
     Journal *journal;
     uint64_t tid;
 
-    C_EventSafe(Journal *_journal, uint64_t _tid)
+    C_IOEventSafe(Journal *_journal, uint64_t _tid)
       : journal(_journal), tid(_tid) {
     }
 
     virtual void finish(int r) {
-      journal->handle_event_safe(r, tid);
+      journal->handle_io_event_safe(r, tid);
+    }
+  };
+
+  struct C_OpEventSafe : public Context {
+    Journal *journal;
+    uint64_t tid;
+    Future future;
+    Context *on_safe;
+
+    C_OpEventSafe(Journal *journal, uint64_t tid, const Future &future,
+                  Context *on_safe)
+      : journal(journal), tid(tid), future(future), on_safe(on_safe) {
+    }
+
+    virtual void finish(int r) {
+      journal->handle_op_event_safe(r, tid, future, on_safe);
     }
   };
 
@@ -241,7 +258,9 @@ private:
 
   void handle_journal_destroyed(int r);
 
-  void handle_event_safe(int r, uint64_t tid);
+  void handle_io_event_safe(int r, uint64_t tid);
+  void handle_op_event_safe(int r, uint64_t tid, const Future &future,
+                            Context *on_safe);
 
   void stop_recording();
 
index 4858df06a55417e52dbc0b1bc26ea9eadaa8fd6d..6f034f45352e1c3ebd4b3be6b48ad47c0967ade6 100644 (file)
@@ -351,7 +351,7 @@ int validate_pool(IoCtx &io_ctx, CephContext *cct) {
 
     C_SaferCond ctx;
     ictx->snap_lock.get_read();
-    operation::TrimRequest<> *req = new operation::TrimRequest<>(
+    operation::TrimRequest<> *req = operation::TrimRequest<>::create(
       *ictx, &ctx, ictx->size, newsize, prog_ctx);
     ictx->snap_lock.put_read();
     req->send();
index 3f2a2412a1df402a569bc51bda5e3b2808939b53..64696784130e9bf0715303e5919af826aba9d7f4 100644 (file)
@@ -64,10 +64,6 @@ protected:
     return r;
   }
 
-  virtual void finish() {
-  }
-  using AsyncRequest<>::finish;
-
 private:
   State m_state;
   ceph::BitVector<2> &m_object_map;
index ce7f911b99ea9c40616a9799127f9cbbc7ede396..cfb1c113faaa348845a769974a3fec5577b95ad4 100644 (file)
@@ -283,9 +283,9 @@ void RebuildObjectMapRequest<I>::send_trim_image() {
     orig_size = m_image_ctx.get_object_size() *
                 m_image_ctx.object_map->size();
   }
-  TrimRequest<I> *req = new TrimRequest<I>(m_image_ctx,
-                                           this->create_callback_context(),
-                                           orig_size, new_size, m_prog_ctx);
+  TrimRequest<I> *req = TrimRequest<I>::create(m_image_ctx,
+                                               this->create_callback_context(),
+                                               orig_size, new_size, m_prog_ctx);
   req->send();
 }
 
index e751360838e0cd03c4d7dc74d425ce0443c3b533..67eb950abfd66942b9f8bd8c75d9844dbff14aec 100644 (file)
@@ -2,8 +2,15 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/operation/Request.h"
+#include "common/dout.h"
+#include "common/errno.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/Journal.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::Request: "
 
 namespace librbd {
 namespace operation {
@@ -18,48 +25,97 @@ void Request<I>::send() {
   I &image_ctx = this->m_image_ctx;
   assert(image_ctx.owner_lock.is_locked());
 
-  {
-    RWLock::RLocker snap_locker(image_ctx.snap_lock);
-    if (image_ctx.journal != NULL &&
-        !image_ctx.journal->is_journal_replaying()) {
-      // journal might be replaying -- wait for it to complete
-      if (!image_ctx.journal->is_journal_ready()) {
-        image_ctx.journal->wait_for_journal_ready(
-          new C_WaitForJournalReady(this));
-        return;
-      }
-
-      m_op_tid = image_ctx.journal->allocate_op_tid();
-      journal::EventEntry event_entry(create_event(m_op_tid));
-      image_ctx.journal->append_op_event(m_op_tid, event_entry);
-    }
+  // automatically create the event if we don't need to worry
+  // about affecting concurrent IO ops
+  if (can_affect_io() || !append_op_event()) {
+    send_op();
   }
-
-  send_op();
 }
 
 template <typename I>
 void Request<I>::finish(int r) {
-  {
-    I &image_ctx = this->m_image_ctx;
-    RWLock::RLocker snap_locker(image_ctx.snap_lock);
-    if (m_op_tid != 0 && image_ctx.journal != NULL &&
-        !image_ctx.journal->is_journal_replaying()) {
-      // ops will be canceled / completed before closing journal
-      assert(image_ctx.journal->is_journal_ready());
-
-      image_ctx.journal->commit_op_event(m_op_tid, r);
-    }
+  // automatically commit the event if we don't need to worry
+  // about affecting concurrent IO ops
+  if (r < 0 || !can_affect_io()) {
+    commit_op_event(r);
   }
 
+  assert(!m_appended_op_event || m_committed_op_event);
   AsyncRequest<I>::finish(r);
 }
 
 template <typename I>
-void Request<I>::handle_journal_ready() {
+bool Request<I>::append_op_event() {
   I &image_ctx = this->m_image_ctx;
-  RWLock::RLocker owner_locker(image_ctx.owner_lock);
-  send();
+
+  assert(image_ctx.owner_lock.is_locked());
+  RWLock::RLocker snap_locker(image_ctx.snap_lock);
+  if (image_ctx.journal != NULL &&
+      !image_ctx.journal->is_journal_replaying()) {
+    append_op_event(util::create_context_callback<
+      Request<I>, &Request<I>::handle_op_event_safe>(this));
+    return true;
+  }
+  return false;
+}
+
+template <typename I>
+void Request<I>::commit_op_event(int r) {
+  I &image_ctx = this->m_image_ctx;
+  RWLock::RLocker snap_locker(image_ctx.snap_lock);
+
+  if (!m_appended_op_event) {
+    return;
+  }
+
+  assert(m_op_tid != 0);
+  assert(!m_committed_op_event);
+  m_committed_op_event = true;
+
+  if (image_ctx.journal != NULL &&
+      !image_ctx.journal->is_journal_replaying()) {
+    CephContext *cct = image_ctx.cct;
+    ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+    // ops will be canceled / completed before closing journal
+    assert(image_ctx.journal->is_journal_ready());
+    image_ctx.journal->commit_op_event(m_op_tid, r);
+  }
+}
+
+template <typename I>
+void Request<I>::append_op_event(Context *on_safe) {
+  I &image_ctx = this->m_image_ctx;
+  assert(image_ctx.owner_lock.is_locked());
+  assert(image_ctx.snap_lock.is_locked());
+
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << dendl;
+
+  m_op_tid = image_ctx.journal->allocate_op_tid();
+  image_ctx.journal->append_op_event(
+    m_op_tid, journal::EventEntry{create_event(m_op_tid)},
+    new C_OpEventSafe(this, on_safe));
+}
+
+template <typename I>
+void Request<I>::handle_op_event_safe(int r) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 10) << this << " " << __func__ << ": r=" << r << dendl;
+
+  if (r < 0) {
+    lderr(cct) << "failed to commit op event to journal: " << cpp_strerror(r)
+               << dendl;
+    this->finish(r);
+    delete this;
+  } else {
+    assert(!can_affect_io());
+
+    // haven't started the request state machine yet
+    RWLock::RLocker owner_locker(image_ctx.owner_lock);
+    send_op();
+  }
 }
 
 } // namespace operation
index 9432b26f0f4b5e78dfb04dda75e9672fdb81c4fa..83a8009b4f47a00b54298ca1c8abdaf88655a5fd 100644 (file)
@@ -6,6 +6,9 @@
 
 #include "librbd/AsyncRequest.h"
 #include "include/Context.h"
+#include "common/RWLock.h"
+#include "librbd/Utils.h"
+#include "librbd/Journal.h"
 #include "librbd/journal/Entries.h"
 
 namespace librbd {
@@ -22,26 +25,59 @@ public:
   virtual void send();
 
 protected:
-  virtual void finish(int r);
+  virtual void finish(int r) override;
   virtual void send_op() = 0;
 
+  virtual bool can_affect_io() const {
+    return false;
+  }
   virtual journal::Event create_event(uint64_t op_tid) const = 0;
 
-private:
-  struct C_WaitForJournalReady : public Context {
-    Request *request;
+  template <typename T, Context*(T::*MF)(int*)>
+  bool append_op_event(T *request) {
+    ImageCtxT &image_ctx = this->m_image_ctx;
 
-    C_WaitForJournalReady(Request *_request) : request(_request) {
+    RWLock::RLocker owner_locker(image_ctx.owner_lock);
+    RWLock::RLocker snap_locker(image_ctx.snap_lock);
+    if (image_ctx.journal != NULL &&
+        !image_ctx.journal->is_journal_replaying()) {
+      append_op_event(util::create_context_callback<T, MF>(request));
+      return true;
     }
+    return false;
+  }
+
+  bool append_op_event();
+  void commit_op_event(int r);
+
+  // NOTE: temporary until converted to new state machine format
+  Context *create_context_finisher() {
+    return util::create_context_callback<
+      Request<ImageCtxT>, &Request<ImageCtxT>::finish>(this);
+  }
 
-    virtual void finish(int r) {
-      request->handle_journal_ready();
+private:
+  struct C_OpEventSafe : public Context {
+    Request *request;
+    Context *on_safe;
+    C_OpEventSafe(Request *request, Context *on_safe)
+      : request(request), on_safe(on_safe) {
+    }
+    virtual void finish(int r) override {
+      if (r >= 0) {
+        request->m_appended_op_event = true;
+      }
+      on_safe->complete(r);
     }
   };
 
   uint64_t m_op_tid = 0;
+  bool m_appended_op_event = false;
+  bool m_committed_op_event = false;
+
+  void append_op_event(Context *on_safe);
+  void handle_op_event_safe(int r);
 
-  void handle_journal_ready();
 };
 
 } // namespace operation
index dc92dbeef34b70e1e24e9cb3b699b364247b79c7..f6459dbe40f5bae72a181e3a2dec0060fb6fd47c 100644 (file)
@@ -2,11 +2,13 @@
 // vim: ts=8 sw=2 smarttab
 
 #include "librbd/operation/ResizeRequest.h"
+#include "librbd/AioImageRequestWQ.h"
 #include "librbd/ExclusiveLock.h"
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
 #include "librbd/internal.h"
 #include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
 #include "librbd/operation/TrimRequest.h"
 #include "common/dout.h"
 #include "common/errno.h"
 namespace librbd {
 namespace operation {
 
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_safe_callback;
+
 template <typename I>
 ResizeRequest<I>::ResizeRequest(I &image_ctx, Context *on_finish,
                                 uint64_t new_size, ProgressContext &prog_ctx)
@@ -45,64 +51,6 @@ ResizeRequest<I>::~ResizeRequest() {
   }
 }
 
-template <typename I>
-bool ResizeRequest<I>::should_complete(int r) {
-  I &image_ctx = this->m_image_ctx;
-  CephContext *cct = image_ctx.cct;
-  ldout(cct, 5) << this << " should_complete: " << " r=" << r << dendl;
-
-  if (r < 0) {
-    lderr(cct) << "resize encountered an error: " << cpp_strerror(r) << dendl;
-    return true;
-  }
-  if (m_state == STATE_FINISHED) {
-    ldout(cct, 5) << "FINISHED" << dendl;
-    return true;
-  }
-
-  RWLock::RLocker owner_lock(image_ctx.owner_lock);
-  switch (m_state) {
-  case STATE_FLUSH:
-    ldout(cct, 5) << "FLUSH" << dendl;
-    send_invalidate_cache();
-    break;
-
-  case STATE_INVALIDATE_CACHE:
-    ldout(cct, 5) << "INVALIDATE_CACHE" << dendl;
-    send_trim_image();
-    break;
-
-  case STATE_TRIM_IMAGE:
-    ldout(cct, 5) << "TRIM_IMAGE" << dendl;
-    send_update_header();
-    break;
-
-  case STATE_GROW_OBJECT_MAP:
-    ldout(cct, 5) << "GROW_OBJECT_MAP" << dendl;
-    send_update_header();
-    break;
-
-  case STATE_UPDATE_HEADER:
-    ldout(cct, 5) << "UPDATE_HEADER" << dendl;
-    if (send_shrink_object_map()) {
-      update_size_and_overlap();
-      return true;
-    }
-    break;
-
-  case STATE_SHRINK_OBJECT_MAP:
-    ldout(cct, 5) << "SHRINK_OBJECT_MAP" << dendl;
-    update_size_and_overlap();
-    return true;
-
-  default:
-    lderr(cct) << "invalid state: " << m_state << dendl;
-    assert(false);
-    break;
-  }
-  return false;
-}
-
 template <typename I>
 void ResizeRequest<I>::send() {
   I &image_ctx = this->m_image_ctx;
@@ -130,132 +78,257 @@ void ResizeRequest<I>::send_op() {
   I &image_ctx = this->m_image_ctx;
   assert(image_ctx.owner_lock.is_locked());
 
-  CephContext *cct = image_ctx.cct;
   if (this->is_canceled()) {
     this->async_complete(-ERESTART);
-  } else if (m_original_size == m_new_size) {
-    ldout(cct, 2) << this << " no change in size (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    m_state = STATE_FINISHED;
-    this->async_complete(0);
-  } else if (m_new_size > m_original_size) {
-    ldout(cct, 2) << this << " expanding image (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    send_grow_object_map();
   } else {
-    ldout(cct, 2) << this << " shrinking image (" << m_original_size
-                 << " -> " << m_new_size << ")" << dendl;
-    send_flush();
+    send_pre_block_writes();
   }
 }
 
 template <typename I>
-void ResizeRequest<I>::send_flush() {
+void ResizeRequest<I>::send_pre_block_writes() {
   I &image_ctx = this->m_image_ctx;
-  ldout(image_ctx.cct, 5) << this << " send_flush: "
-                          << " original_size=" << m_original_size
-                          << " new_size=" << m_new_size << dendl;
-  m_state = STATE_FLUSH;
-
-  // with clipping adjusted, ensure that write / copy-on-read operations won't
-  // (re-)create objects that we just removed. need async callback to ensure
-  // we don't have cache_lock already held
-  image_ctx.flush_async_operations(this->create_async_callback_context());
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+
+  image_ctx.aio_work_queue->block_writes(create_context_callback<
+    ResizeRequest<I>, &ResizeRequest<I>::handle_pre_block_writes>(this));
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_pre_block_writes(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+    image_ctx.aio_work_queue->unblock_writes();
+    return this->create_context_finisher();
+  }
+
+  return send_append_op_event();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::send_append_op_event() {
+  I &image_ctx = this->m_image_ctx;
+  if (!this->template append_op_event<
+        ResizeRequest<I>, &ResizeRequest<I>::handle_append_op_event>(this)) {
+    m_shrink_size_visible = true;
+    return send_grow_object_map();
+  }
+
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+  return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_append_op_event(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  m_shrink_size_visible = true;
+  if (*result < 0) {
+    lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+               << dendl;
+    image_ctx.aio_work_queue->unblock_writes();
+    return this->create_context_finisher();
+  }
+
+  return send_grow_object_map();
+}
+
+template <typename I>
+void ResizeRequest<I>::send_trim_image() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
+  TrimRequest<I> *req = TrimRequest<I>::create(
+    image_ctx, create_context_callback<
+      ResizeRequest<I>, &ResizeRequest<I>::handle_trim_image>(this),
+    m_original_size, m_new_size, m_prog_ctx);
+  req->send();
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_trim_image(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to trim image: " << cpp_strerror(*result) << dendl;
+    return this->create_context_finisher();
+  }
+
+  send_invalidate_cache();
+  return nullptr;
 }
 
 template <typename I>
 void ResizeRequest<I>::send_invalidate_cache() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
-  ldout(image_ctx.cct, 5) << this << " send_invalidate_cache: "
-                          << " original_size=" << m_original_size
-                          << " new_size=" << m_new_size << dendl;
-  m_state = STATE_INVALIDATE_CACHE;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
   // need to invalidate since we're deleting objects, and
   // ObjectCacher doesn't track non-existent objects
-  image_ctx.invalidate_cache(this->create_callback_context());
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
+  image_ctx.invalidate_cache(create_async_context_callback(
+    image_ctx, create_context_callback<
+      ResizeRequest<I>, &ResizeRequest<I>::handle_invalidate_cache>(this)));
 }
 
 template <typename I>
-void ResizeRequest<I>::send_trim_image() {
+Context *ResizeRequest<I>::handle_invalidate_cache(int *result) {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
-  ldout(image_ctx.cct, 5) << this << " send_trim_image: "
-                          << " original_size=" << m_original_size
-                          << " new_size=" << m_new_size << dendl;
-  m_state = STATE_TRIM_IMAGE;
-
-  TrimRequest<I> *req = new TrimRequest<I>(image_ctx,
-                                           this->create_callback_context(),
-                                          m_original_size, m_new_size,
-                                           m_prog_ctx);
-  req->send();
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to invalidate cache: " << cpp_strerror(*result)
+               << dendl;
+    return this->create_context_finisher();
+  }
+
+  send_post_block_writes();
+  return nullptr;
 }
 
 template <typename I>
-void ResizeRequest<I>::send_grow_object_map() {
+Context *ResizeRequest<I>::send_grow_object_map() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
 
+  image_ctx.aio_work_queue->unblock_writes();
+  if (m_original_size == m_new_size) {
+    this->commit_op_event(0);
+    return this->create_context_finisher();
+  } else if (m_new_size < m_original_size) {
+    send_trim_image();
+    return nullptr;
+  }
+
+  image_ctx.owner_lock.get_read();
   image_ctx.snap_lock.get_read();
   if (image_ctx.object_map == nullptr) {
     image_ctx.snap_lock.put_read();
-    send_update_header();
-    return;
+    image_ctx.owner_lock.put_read();
+
+    send_post_block_writes();
+    return nullptr;
   }
 
-  ldout(image_ctx.cct, 5) << this << " send_grow_object_map: "
-                          << " original_size=" << m_original_size
-                          << " new_size=" << m_new_size << dendl;
-  m_state = STATE_GROW_OBJECT_MAP;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
   // should have been canceled prior to releasing lock
   assert(image_ctx.exclusive_lock == nullptr ||
          image_ctx.exclusive_lock->is_lock_owner());
 
-  image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
-                                  this->create_callback_context());
+  image_ctx.object_map->aio_resize(
+    m_new_size, OBJECT_NONEXISTENT, create_context_callback<
+      ResizeRequest<I>, &ResizeRequest<I>::handle_grow_object_map>(this));
   image_ctx.snap_lock.put_read();
+  image_ctx.owner_lock.put_read();
+  return nullptr;
 }
 
 template <typename I>
-bool ResizeRequest<I>::send_shrink_object_map() {
+Context *ResizeRequest<I>::handle_grow_object_map(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  assert(*result == 0);
+  send_post_block_writes();
+  return nullptr;
+}
+
+template <typename I>
+Context *ResizeRequest<I>::send_shrink_object_map() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
 
+  image_ctx.owner_lock.get_read();
   image_ctx.snap_lock.get_read();
   if (image_ctx.object_map == nullptr || m_new_size > m_original_size) {
     image_ctx.snap_lock.put_read();
-    return true;
+    image_ctx.owner_lock.put_read();
+
+    update_size_and_overlap();
+    return this->create_context_finisher();
   }
 
-  ldout(image_ctx.cct, 5) << this << " send_shrink_object_map: "
-                           << " original_size=" << m_original_size
-                           << " new_size=" << m_new_size << dendl;
-  m_state = STATE_SHRINK_OBJECT_MAP;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << " "
+                << "original_size=" << m_original_size << ", "
+                << "new_size=" << m_new_size << dendl;
 
   // should have been canceled prior to releasing lock
   assert(image_ctx.exclusive_lock == nullptr ||
          image_ctx.exclusive_lock->is_lock_owner());
 
-  image_ctx.object_map->aio_resize(m_new_size, OBJECT_NONEXISTENT,
-                                  this->create_callback_context());
+  image_ctx.object_map->aio_resize(
+    m_new_size, OBJECT_NONEXISTENT, create_context_callback<
+      ResizeRequest<I>, &ResizeRequest<I>::handle_shrink_object_map>(this));
   image_ctx.snap_lock.put_read();
-  return false;
+  image_ctx.owner_lock.put_read();
+  return nullptr;
 }
 
 template <typename I>
-void ResizeRequest<I>::send_update_header() {
+Context *ResizeRequest<I>::handle_shrink_object_map(int *result) {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  ldout(image_ctx.cct, 5) << this << " send_update_header: "
-                            << " original_size=" << m_original_size
-                            << " new_size=" << m_new_size << dendl;
-  m_state = STATE_UPDATE_HEADER;
+  update_size_and_overlap();
+  assert(*result == 0);
+  return this->create_context_finisher();
+}
+
+template <typename I>
+void ResizeRequest<I>::send_post_block_writes() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
+  image_ctx.aio_work_queue->block_writes(create_context_callback<
+    ResizeRequest<I>, &ResizeRequest<I>::handle_post_block_writes>(this));
+}
+
+template <typename I>
+Context *ResizeRequest<I>::handle_post_block_writes(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    image_ctx.aio_work_queue->unblock_writes();
+    lderr(cct) << "failed to block writes prior to header update: "
+               << cpp_strerror(*result) << dendl;
+    return this->create_context_finisher();
+  }
+
+  send_update_header();
+  return nullptr;
+}
+
+template <typename I>
+void ResizeRequest<I>::send_update_header() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << " "
+                << "original_size=" << m_original_size << ", "
+                << "new_size=" << m_new_size << dendl;;
 
   // should have been canceled prior to releasing lock
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
   assert(image_ctx.exclusive_lock == nullptr ||
          image_ctx.exclusive_lock->is_lock_owner());
 
@@ -273,14 +346,31 @@ void ResizeRequest<I>::send_update_header() {
     cls_client::set_size(&op, m_new_size);
   }
 
-  librados::AioCompletion *rados_completion =
-    this->create_callback_completion();
+  librados::AioCompletion *rados_completion = create_rados_safe_callback<
+    ResizeRequest<I>, &ResizeRequest<I>::handle_update_header>(this);
   int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid,
                                       rados_completion, &op);
   assert(r == 0);
   rados_completion->release();
 }
 
+template <typename I>
+Context *ResizeRequest<I>::handle_update_header(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    lderr(cct) << "failed to update image header: " << cpp_strerror(*result)
+               << dendl;
+    image_ctx.aio_work_queue->unblock_writes();
+    return this->create_context_finisher();
+  }
+
+  this->commit_op_event(0);
+  return send_shrink_object_map();
+}
+
 template <typename I>
 void ResizeRequest<I>::compute_parent_overlap() {
   I &image_ctx = this->m_image_ctx;
@@ -295,13 +385,18 @@ void ResizeRequest<I>::compute_parent_overlap() {
 template <typename I>
 void ResizeRequest<I>::update_size_and_overlap() {
   I &image_ctx = this->m_image_ctx;
-  RWLock::WLocker snap_locker(image_ctx.snap_lock);
-  image_ctx.size = m_new_size;
+  {
+    RWLock::WLocker snap_locker(image_ctx.snap_lock);
+    image_ctx.size = m_new_size;
 
-  RWLock::WLocker parent_locker(image_ctx.parent_lock);
-  if (image_ctx.parent != NULL && m_new_size < m_original_size) {
-    image_ctx.parent_md.overlap = m_new_parent_overlap;
+    RWLock::WLocker parent_locker(image_ctx.parent_lock);
+    if (image_ctx.parent != NULL && m_new_size < m_original_size) {
+      image_ctx.parent_md.overlap = m_new_parent_overlap;
+    }
   }
+
+  // blocked by POST_BLOCK_WRITES state
+  image_ctx.aio_work_queue->unblock_writes();
 }
 
 } // namespace operation
index 0ec1fe018621be2d8a610949902c7edc8d4d11da..a34a7cf3ef3ed1cb977ce292ca6fbbe185959c30 100644 (file)
@@ -22,7 +22,7 @@ public:
   virtual ~ResizeRequest();
 
   inline bool shrinking() const {
-    return m_new_size < m_original_size;
+    return (m_shrink_size_visible && m_new_size < m_original_size);
   }
 
   inline uint64_t get_image_size() const {
@@ -33,8 +33,12 @@ public:
 
 protected:
   virtual void send_op();
-  virtual bool should_complete(int r);
-
+  virtual bool should_complete(int r) {
+    return true;
+  }
+  virtual bool can_affect_io() const override {
+    return true;
+  }
   virtual journal::Event create_event(uint64_t op_tid) const {
     return journal::ResizeEvent(op_tid, m_new_size);
   }
@@ -46,27 +50,44 @@ private:
    *
    * @verbatim
    *
-   * <start> -------------> STATE_FINISHED -----------------------------\
-   *  |  .    (no change)                                               |
-   *  |  .                                                              |
-   *  |  . . . . . . . . . . . . . . . . . . . . .                      |
-   *  |                                          .                      |
-   *  |                                          v                      |
-   *  |----------> STATE_GROW_OBJECT_MAP ---> STATE_UPDATE_HEADER ------|
-   *  | (grow)                                                          |
-   *  |                                                                 |
-   *  |                                                                 |
-   *  \----------> STATE_FLUSH -------------> STATE_INVALIDATE_CACHE    |
-   *    (shrink)                                 |                      |
-   *                                             |                      |
-   *                      /----------------------/                      |
-   *                      |                                             |
-   *                      v                                             |
-   *              STATE_TRIM_IMAGE --------> STATE_UPDATE_HEADER . . .  |
-   *                                             |                   .  |
-   *                                             |                   .  |
-   *                                             v                   v  v
-   *                                  STATE_SHRINK_OBJECT_MAP ---> <finish>
+   * <start>
+   *    |
+   *    v
+   * STATE_PRE_BLOCK_WRITES
+   *    |
+   *    v
+   * STATE_APPEND_OP_EVENT (skip if journaling
+   *    |                   disabled)
+   *    | (unblock writes)
+   *    |
+   *    |
+   *    | (grow)
+   *    |\--------> STATE_GROW_OBJECT_MAP (skip if object map
+   *    |                 |                disabled)
+   *    |                 v
+   *    |           STATE_POST_BLOCK_WRITES
+   *    |                 |
+   *    |                 v
+   *    |           STATE_UPDATE_HEADER ----------------------------\
+   *    |                                                           |
+   *    | (shrink)                                                  |
+   *    |\--------> STATE_TRIM_IMAGE                                |
+   *    |                 |                                         |
+   *    |                 v                                         |
+   *    |           STATE_INVALIDATE_CACHE                          |
+   *    |                 |                                         |
+   *    |                 v                                         |
+   *    |           STATE_POST_BLOCK_WRITES                         |
+   *    |                 |                                         |
+   *    |                 v                                         |
+   *    |           STATE_UPDATE_HEADER                             |
+   *    |                 |                                         |
+   *    |                 v                                         |
+   *    |           STATE_SHRINK_OBJECT_MAP (skip if object map     |
+   *    |                 |                  disabled)              |
+   *    |                 | (unblock writes)                        |
+   *    | (no change)     v                                         |
+   *    \------------> <finish> <-----------------------------------/
    *
    * @endverbatim
    *
@@ -74,30 +95,38 @@ private:
    * The state machine will immediately transition to _FINISHED if there
    * are no objects to trim.
    */
-  enum State {
-    STATE_FLUSH,
-    STATE_INVALIDATE_CACHE,
-    STATE_TRIM_IMAGE,
-    STATE_GROW_OBJECT_MAP,
-    STATE_UPDATE_HEADER,
-    STATE_SHRINK_OBJECT_MAP,
-    STATE_FINISHED
-  };
-
-  State m_state;
+
   uint64_t m_original_size;
   uint64_t m_new_size;
   ProgressContext &m_prog_ctx;
   uint64_t m_new_parent_overlap;
+  bool m_shrink_size_visible = false;
 
   typename xlist<ResizeRequest<ImageCtxT>*>::item m_xlist_item;
 
-  void send_flush();
+  void send_pre_block_writes();
+  Context *handle_pre_block_writes(int *result);
+
+  Context *send_append_op_event();
+  Context *handle_append_op_event(int *result);
+
   void send_invalidate_cache();
+  Context *handle_invalidate_cache(int *result);
+
   void send_trim_image();
-  void send_grow_object_map();
-  bool send_shrink_object_map();
+  Context *handle_trim_image(int *result);
+
+  Context *send_grow_object_map();
+  Context *handle_grow_object_map(int *result);
+
+  Context *send_shrink_object_map();
+  Context *handle_shrink_object_map(int *result);
+
+  void send_post_block_writes();
+  Context *handle_post_block_writes(int *result);
+
   void send_update_header();
+  Context *handle_update_header(int *result);
 
   void compute_parent_overlap();
   void update_size_and_overlap();
index 63f19d48385b05b8fd3af22b4f2eb158cc41a356..49f5825f386a6b7515e8a411d56d0eabe2eb0b70 100644 (file)
@@ -9,6 +9,7 @@
 #include "librbd/ImageCtx.h"
 #include "librbd/ImageWatcher.h"
 #include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
 
 #define dout_subsys ceph_subsys_rbd
 #undef dout_prefix
 namespace librbd {
 namespace operation {
 
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_safe_callback;
+
 namespace {
 
-template <typename I>
-std::ostream& operator<<(std::ostream& os,
-                         const typename SnapshotCreateRequest<I>::State& state) {
-  switch(state) {
-  case SnapshotCreateRequest<I>::STATE_SUSPEND_REQUESTS:
-    os << "SUSPEND_REQUESTS";
-    break;
-  case SnapshotCreateRequest<I>::STATE_SUSPEND_AIO:
-    os << "SUSPEND_AIO";
-    break;
-  case SnapshotCreateRequest<I>::STATE_ALLOCATE_SNAP_ID:
-    os << "ALLOCATE_SNAP_ID";
-    break;
-  case SnapshotCreateRequest<I>::STATE_CREATE_SNAP:
-    os << "CREATE_SNAP";
-    break;
-  case SnapshotCreateRequest<I>::STATE_CREATE_OBJECT_MAP:
-    os << "CREATE_OBJECT_MAP";
-    break;
-  case SnapshotCreateRequest<I>::STATE_RELEASE_SNAP_ID:
-    os << "RELEASE_SNAP_ID";
-    break;
-  default:
-    os << "UNKNOWN (" << static_cast<uint32_t>(state) << ")";
-    break;
+template <typename ImageCtxT>
+struct C_CreateSnapId: public Context {
+  ImageCtxT &image_ctx;
+  uint64_t *snap_id;
+  Context *on_finish;
+
+  C_CreateSnapId(ImageCtxT &image_ctx, uint64_t *snap_id, Context *on_finish)
+    : image_ctx(image_ctx), snap_id(snap_id), on_finish(on_finish) {
   }
-  return os;
-}
+
+  virtual void finish(int r) {
+    r = image_ctx.md_ctx.selfmanaged_snap_create(snap_id);
+    on_finish->complete(r);
+  }
+};
+
+template <typename ImageCtxT>
+struct C_RemoveSnapId: public Context {
+  ImageCtxT &image_ctx;
+  uint64_t snap_id;
+  Context *on_finish;
+
+  C_RemoveSnapId(ImageCtxT &image_ctx, uint64_t snap_id, Context *on_finish)
+    : image_ctx(image_ctx), snap_id(snap_id), on_finish(on_finish) {
+  }
+
+  virtual void finish(int r) {
+    r = image_ctx.md_ctx.selfmanaged_snap_remove(snap_id);
+    on_finish->complete(r);
+  }
+};
 
 } // anonymous namespace
 
@@ -55,8 +63,7 @@ SnapshotCreateRequest<I>::SnapshotCreateRequest(I &image_ctx,
                                                 Context *on_finish,
                                                 const std::string &snap_name)
   : Request<I>(image_ctx, on_finish), m_snap_name(snap_name), m_ret_val(0),
-    m_aio_suspended(false), m_requests_suspended(false),
-    m_snap_id(CEPH_NOSNAP), m_snap_created(false) {
+    m_snap_id(CEPH_NOSNAP) {
 }
 
 template <typename I>
@@ -65,128 +72,126 @@ void SnapshotCreateRequest<I>::send_op() {
 }
 
 template <typename I>
-bool SnapshotCreateRequest<I>::should_complete(int r) {
+void SnapshotCreateRequest<I>::send_suspend_requests() {
   I &image_ctx = this->m_image_ctx;
   CephContext *cct = image_ctx.cct;
-  ldout(cct, 5) << this << " " << __func__ << ": state=" << m_state << ", "
-                << "r=" << r << dendl;
-  int orig_result = r;
-  r = filter_state_return_code(r);
-  if (r < 0) {
-    lderr(cct) << "encountered error: " << cpp_strerror(r) << dendl;
-    if (m_ret_val == 0) {
-      m_ret_val = r;
-    }
-  }
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
-  if (m_ret_val < 0) {
-    return should_complete_error();
-  }
+  // TODO suspend (shrink) resize to ensure consistent RBD mirror
+  send_suspend_aio();
+}
 
-  RWLock::RLocker owner_lock(image_ctx.owner_lock);
-  bool finished = false;
-  switch (m_state) {
-  case STATE_SUSPEND_REQUESTS:
-    send_suspend_aio();
-    break;
-  case STATE_SUSPEND_AIO:
-    send_allocate_snap_id();
-    break;
-  case STATE_ALLOCATE_SNAP_ID:
-    send_create_snap();
-    break;
-  case STATE_CREATE_SNAP:
-    if (orig_result == 0) {
-      update_snap_context();
-      finished = send_create_object_map();
-    } else {
-      assert(orig_result == -ESTALE);
-      send_allocate_snap_id();
-    }
-    break;
-  case STATE_CREATE_OBJECT_MAP:
-    finished = true;
-    break;
-  default:
-    assert(false);
-    break;
-  }
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_suspend_requests(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  if (finished) {
-    resume_aio();
-    resume_requests();
-  }
-  return finished;
+  // TODO
+  send_suspend_aio();
+  return nullptr;
 }
 
 template <typename I>
-bool SnapshotCreateRequest<I>::should_complete_error() {
+void SnapshotCreateRequest<I>::send_suspend_aio() {
   I &image_ctx = this->m_image_ctx;
-  CephContext *cct = image_ctx.cct;
-  lderr(cct) << this << " " << __func__ << ": "
-             << "ret_val=" << m_ret_val << dendl;
+  assert(image_ctx.owner_lock.is_locked());
 
-  // only valid exit points during error recovery
-  bool finished = true;
-  if (m_state != STATE_RELEASE_SNAP_ID) {
-    finished = send_release_snap_id();
-  }
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
-  if (finished) {
-    resume_aio();
-    resume_requests();
-  }
-  return finished;
+  image_ctx.aio_work_queue->block_writes(create_context_callback<
+    SnapshotCreateRequest<I>,
+    &SnapshotCreateRequest<I>::handle_suspend_aio>(this));
 }
 
 template <typename I>
-void SnapshotCreateRequest<I>::send_suspend_requests() {
+Context *SnapshotCreateRequest<I>::handle_suspend_aio(int *result) {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  // TODO suspend (shrink) resize to ensure consistent RBD mirror
-  send_suspend_aio();
+  if (*result < 0) {
+    lderr(cct) << "failed to block writes: " << cpp_strerror(*result) << dendl;
+    image_ctx.aio_work_queue->unblock_writes();
+    return this->create_context_finisher();
+  }
+
+  send_append_op_event();
+  return nullptr;
 }
 
 template <typename I>
-void SnapshotCreateRequest<I>::send_suspend_aio() {
+void SnapshotCreateRequest<I>::send_append_op_event() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
+  if (!this->template append_op_event<
+        SnapshotCreateRequest<I>,
+        &SnapshotCreateRequest<I>::handle_append_op_event>(this)) {
+    send_allocate_snap_id();
+    return;
+  }
 
   CephContext *cct = image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << dendl;
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::handle_append_op_event(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-  m_state = STATE_SUSPEND_AIO;
-  m_aio_suspended = true;
+  if (*result < 0) {
+    image_ctx.aio_work_queue->unblock_writes();
+    lderr(cct) << "failed to commit journal entry: " << cpp_strerror(*result)
+               << dendl;
+    return this->create_context_finisher();
+  }
 
-  // can issue a re-entrant callback if no IO in-progress
-  image_ctx.aio_work_queue->block_writes(this->create_async_callback_context());
+  send_allocate_snap_id();
+  return nullptr;
 }
 
 template <typename I>
 void SnapshotCreateRequest<I>::send_allocate_snap_id() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
-
   CephContext *cct = image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << dendl;
-  m_state = STATE_ALLOCATE_SNAP_ID;
 
   // TODO create an async version of selfmanaged_snap_create
-  int r = image_ctx.md_ctx.selfmanaged_snap_create(&m_snap_id);
-  this->async_complete(r);
+  image_ctx.op_work_queue->queue(new C_CreateSnapId<I>(
+    image_ctx, &m_snap_id, create_context_callback<
+      SnapshotCreateRequest<I>,
+      &SnapshotCreateRequest<I>::handle_allocate_snap_id>(this)), 0);
 }
 
 template <typename I>
-void SnapshotCreateRequest<I>::send_create_snap() {
+Context *SnapshotCreateRequest<I>::handle_allocate_snap_id(int *result) {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
-  RWLock::RLocker snap_locker(image_ctx.snap_lock);
-  RWLock::RLocker parent_locker(image_ctx.parent_lock);
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result < 0) {
+    save_result(result);
+    finalize(*result);
+    lderr(cct) << "failed to allocate snapshot id: " << cpp_strerror(*result)
+               << dendl;
+    return this->create_context_finisher();
+  }
+
+  send_create_snap();
+  return nullptr;
+}
 
+template <typename I>
+void SnapshotCreateRequest<I>::send_create_snap() {
+  I &image_ctx = this->m_image_ctx;
   CephContext *cct = image_ctx.cct;
   ldout(cct, 5) << this << " " << __func__ << dendl;
-  m_state = STATE_CREATE_SNAP;
+
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
+  RWLock::RLocker snap_locker(image_ctx.snap_lock);
+  RWLock::RLocker parent_locker(image_ctx.parent_lock);
 
   // should have been canceled prior to releasing lock
   assert(image_ctx.exclusive_lock == nullptr ||
@@ -206,84 +211,118 @@ void SnapshotCreateRequest<I>::send_create_snap() {
     cls_client::snapshot_add(&op, m_snap_id, m_snap_name);
   }
 
-  librados::AioCompletion *rados_completion =
-    this->create_callback_completion();
+  librados::AioCompletion *rados_completion = create_rados_safe_callback<
+    SnapshotCreateRequest<I>,
+    &SnapshotCreateRequest<I>::handle_create_snap>(this);
   int r = image_ctx.md_ctx.aio_operate(image_ctx.header_oid,
-                                         rados_completion, &op);
+                                       rados_completion, &op);
   assert(r == 0);
   rados_completion->release();
 }
 
 template <typename I>
-bool SnapshotCreateRequest<I>::send_create_object_map() {
+Context *SnapshotCreateRequest<I>::handle_create_snap(int *result) {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  if (*result == -ESTALE) {
+    send_allocate_snap_id();
+    return nullptr;
+  } else if (*result < 0) {
+    save_result(result);
+    send_release_snap_id();
+    return nullptr;
+  }
+
+  return send_create_object_map();
+}
+
+template <typename I>
+Context *SnapshotCreateRequest<I>::send_create_object_map() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
+
+  update_snap_context();
+
+  image_ctx.snap_lock.get_read();
+  if (image_ctx.object_map == nullptr) {
+    image_ctx.snap_lock.put_read();
+
+    finalize(0);
+    return this->create_context_finisher();
+  }
+
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
 
   {
-    RWLock::RLocker snap_locker(image_ctx.snap_lock);
     RWLock::RLocker object_map_lock(image_ctx.object_map_lock);
-    if (image_ctx.object_map != nullptr) {
-      CephContext *cct = image_ctx.cct;
-      ldout(cct, 5) << this << " " << __func__ << dendl;
-      m_state = STATE_CREATE_OBJECT_MAP;
-
-      image_ctx.object_map->snapshot_add(m_snap_id,
-                                         this->create_callback_context());
-      return false;
-    }
+    image_ctx.object_map->snapshot_add(
+      m_snap_id, create_context_callback<
+        SnapshotCreateRequest<I>,
+        &SnapshotCreateRequest<I>::handle_create_object_map>(this));
   }
-  return true;
+  image_ctx.snap_lock.put_read();
+  return nullptr;
 }
 
 template <typename I>
-bool SnapshotCreateRequest<I>::send_release_snap_id() {
+Context *SnapshotCreateRequest<I>::handle_create_object_map(int *result) {
   I &image_ctx = this->m_image_ctx;
-  if (m_snap_id != CEPH_NOSNAP && !m_snap_created) {
-    CephContext *cct = image_ctx.cct;
-    ldout(cct, 5) << this << " " << __func__ << ": snap_id=" << m_snap_id
-                  << dendl;
-    m_state = STATE_RELEASE_SNAP_ID;
-
-    // TODO add async version of selfmanaged_snap_remove
-    int r = image_ctx.md_ctx.selfmanaged_snap_remove(m_snap_id);
-    m_snap_id = CEPH_NOSNAP;
-
-    this->async_complete(r);
-    return false;
-  }
-  return true;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
+
+  assert(*result == 0);
+
+  finalize(0);
+  return this->create_context_finisher();
+}
+
+template <typename I>
+void SnapshotCreateRequest<I>::send_release_snap_id() {
+  I &image_ctx = this->m_image_ctx;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << dendl;
+
+  assert(m_snap_id != CEPH_NOSNAP);
+
+  // TODO create an async version of selfmanaged_snap_remove
+  image_ctx.op_work_queue->queue(new C_RemoveSnapId<I>(
+    image_ctx, m_snap_id, create_context_callback<
+      SnapshotCreateRequest<I>,
+      &SnapshotCreateRequest<I>::handle_release_snap_id>(this)), 0);
 }
 
 template <typename I>
-void SnapshotCreateRequest<I>::resume_aio() {
+Context *SnapshotCreateRequest<I>::handle_release_snap_id(int *result) {
   I &image_ctx = this->m_image_ctx;
-  if (m_aio_suspended) {
-    CephContext *cct = image_ctx.cct;
-    ldout(cct, 5) << this << " " << __func__ << dendl;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << *result << dendl;
 
-    image_ctx.aio_work_queue->unblock_writes();
-    m_aio_suspended = false;
-  }
+  assert(m_ret_val < 0);
+  *result = m_ret_val;
+
+  finalize(m_ret_val);
+  return this->create_context_finisher();
 }
 
 template <typename I>
-void SnapshotCreateRequest<I>::resume_requests() {
+void SnapshotCreateRequest<I>::finalize(int r) {
   I &image_ctx = this->m_image_ctx;
-  if (m_requests_suspended) {
-    CephContext *cct = image_ctx.cct;
-    ldout(cct, 5) << this << " " << __func__ << dendl;
+  CephContext *cct = image_ctx.cct;
+  ldout(cct, 5) << this << " " << __func__ << ": r=" << r << dendl;
 
-    // TODO
-    m_requests_suspended = false;
+  if (r == 0) {
+    this->commit_op_event(0);
   }
+  image_ctx.aio_work_queue->unblock_writes();
 }
 
 template <typename I>
 void SnapshotCreateRequest<I>::update_snap_context() {
   I &image_ctx = this->m_image_ctx;
-  assert(image_ctx.owner_lock.is_locked());
-  m_snap_created = true;
 
+  RWLock::RLocker owner_locker(image_ctx.owner_lock);
   RWLock::WLocker snap_locker(image_ctx.snap_lock);
   if (image_ctx.old_format) {
     return;
index abe5556c1c7d2caeb68e69e341003cdc7f598b11..b809a1a09341a1dc2d2012dd0439bf7c0faec1b8 100644 (file)
@@ -33,17 +33,20 @@ public:
    *               v
    *           STATE_SUSPEND_AIO * * * * * * * * * * * * *
    *               |                                     *
+   *               v                                     *
+   *           STATE_APPEND_OP_EVENT (skip if journal    *
+   *               |                  disabled)          *
    *   (retry)     v                                     *
-   *   . . . > STATE_ALLOCATE_SNAP_ID  * *               *
-   *   .           |                     *               *
-   *   .           v                     *               *
-   *   . . . . STATE_CREATE_SNAP * * * * *               *
-   *               |                     *               *
-   *               v                     *               *
-   *           STATE_CREATE_OBJECT_MAP   *               *
-   *               |                     *               *
-   *               |                     *               *
-   *               |                     v               *
+   *   . . . > STATE_ALLOCATE_SNAP_ID                    *
+   *   .           |                                     *
+   *   .           v                                     *
+   *   . . . . STATE_CREATE_SNAP * * * * * * * * * *     *
+   *               |                               *     *
+   *               v                               *     *
+   *           STATE_CREATE_OBJECT_MAP (skip if    *     *
+   *               |                    disabled)  *     *
+   *               |                               *     *
+   *               |                               v     *
    *               |              STATE_RELEASE_SNAP_ID  *
    *               |                     |               *
    *               |                     v               *
@@ -56,67 +59,59 @@ public:
    * to abort, the error path is followed to record the result in the journal
    * (if enabled) and bubble the originating error code back to the client.
    */
-  enum State {
-    STATE_SUSPEND_REQUESTS,
-    STATE_SUSPEND_AIO,
-    STATE_ALLOCATE_SNAP_ID,
-    STATE_CREATE_SNAP,
-    STATE_CREATE_OBJECT_MAP,
-    STATE_RELEASE_SNAP_ID
-  };
-
   SnapshotCreateRequest(ImageCtxT &image_ctx, Context *on_finish,
                        const std::string &snap_name);
 
 protected:
   virtual void send_op();
-  virtual bool should_complete(int r);
-
-  virtual int filter_return_code(int r) const {
-    if (m_ret_val < 0) {
-      return m_ret_val;
-    }
-    return r;
+  virtual bool should_complete(int r) {
+    return true;
+  }
+  virtual bool can_affect_io() const override {
+    return true;
   }
-
   virtual journal::Event create_event(uint64_t op_tid) const {
     return journal::SnapCreateEvent(op_tid, m_snap_name);
   }
 
 private:
   std::string m_snap_name;
-  State m_state;
 
   int m_ret_val;
 
-  bool m_aio_suspended;
-  bool m_requests_suspended;
-
   uint64_t m_snap_id;
-  bool m_snap_created;
-
   uint64_t m_size;
   parent_info m_parent_info;
 
-  int filter_state_return_code(int r) const {
-    if (m_state == STATE_CREATE_SNAP && r == -ESTALE) {
-      return 0;
-    }
-    return r;
-  }
-
-  bool should_complete_error();
-
   void send_suspend_requests();
+  Context *handle_suspend_requests(int *result);
+
   void send_suspend_aio();
+  Context *handle_suspend_aio(int *result);
+
+  void send_append_op_event();
+  Context *handle_append_op_event(int *result);
+
   void send_allocate_snap_id();
+  Context *handle_allocate_snap_id(int *result);
+
   void send_create_snap();
-  bool send_create_object_map();
-  bool send_release_snap_id();
+  Context *handle_create_snap(int *result);
 
-  void resume_aio();
-  void resume_requests();
+  Context *send_create_object_map();
+  Context *handle_create_object_map(int *result);
+
+  void send_release_snap_id();
+  Context *handle_release_snap_id(int *result);
+
+  void finalize(int r);
   void update_snap_context();
+
+  void save_result(int *result) {
+    if (m_ret_val == 0 && *result < 0) {
+      m_ret_val = *result;
+    }
+  }
 };
 
 } // namespace operation
index 6e6c50c9f4fc0b9c214005eed088618d0b2f7444..58312813dfc4a5b5d27da28a719699e23ae6520c 100644 (file)
@@ -17,9 +17,12 @@ template <typename ImageCtxT = ImageCtx>
 class TrimRequest : public AsyncRequest<ImageCtxT>
 {
 public:
-  TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
-             uint64_t original_size, uint64_t new_size,
-             ProgressContext &prog_ctx);
+  static TrimRequest *create(ImageCtxT &image_ctx, Context *on_finish,
+                             uint64_t original_size, uint64_t new_size,
+                             ProgressContext &prog_ctx) {
+    return new TrimRequest(image_ctx, on_finish, original_size, new_size,
+                           prog_ctx);
+  }
 
   virtual void send();
 
@@ -80,6 +83,10 @@ private:
   uint64_t m_new_size;
   ProgressContext &m_prog_ctx;
 
+  TrimRequest(ImageCtxT &image_ctx, Context *on_finish,
+             uint64_t original_size, uint64_t new_size,
+             ProgressContext &prog_ctx);
+
   void send_copyup_objects();
   void send_remove_objects();
   void send_pre_remove();