]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: separate exclusive lock handling into async state machines
authorJason Dillaman <dillaman@redhat.com>
Wed, 2 Dec 2015 20:21:58 +0000 (15:21 -0500)
committerJason Dillaman <dillaman@redhat.com>
Tue, 15 Dec 2015 01:31:31 +0000 (20:31 -0500)
Signed-off-by: Jason Dillaman <dillaman@redhat.com>
14 files changed:
src/CMakeLists.txt
src/librbd/AsyncRequest.cc
src/librbd/ExclusiveLock.cc [new file with mode: 0644]
src/librbd/ExclusiveLock.h [new file with mode: 0644]
src/librbd/ImageCtx.cc
src/librbd/ImageCtx.h
src/librbd/ImageWatcher.h
src/librbd/Journal.cc
src/librbd/Journal.h
src/librbd/Makefile.am
src/librbd/exclusive_lock/AcquireRequest.cc [new file with mode: 0644]
src/librbd/exclusive_lock/AcquireRequest.h [new file with mode: 0644]
src/librbd/exclusive_lock/ReleaseRequest.cc [new file with mode: 0644]
src/librbd/exclusive_lock/ReleaseRequest.h [new file with mode: 0644]

index bc7b491f82497af2533d2031bf0420f4ec393f04..c53b02d0276b1914b73077c0fa5042204bc4c89d 100644 (file)
@@ -878,6 +878,7 @@ if(${WITH_RBD})
     librbd/AsyncRequest.cc
     librbd/CopyupRequest.cc
     librbd/DiffIterate.cc
+    librbd/ExclusiveLock.cc
     librbd/ImageCtx.cc
     librbd/ImageWatcher.cc
     librbd/internal.cc
@@ -889,6 +890,8 @@ if(${WITH_RBD})
     librbd/LibrbdWriteback.cc
     librbd/ObjectMap.cc
     librbd/Utils.cc
+    librbd/exclusive_lock/AcquireRequest.cc
+    librbd/exclusive_lock/ReleaseRequest.cc
     librbd/object_map/InvalidateRequest.cc
     librbd/object_map/LockRequest.cc
     librbd/object_map/Request.cc
index ae0650f33e7d693d91d453e0eec919cf9f7c6048..a93eb502a21cad8a84c729ac4019af6780b6bc19 100644 (file)
@@ -51,9 +51,19 @@ void AsyncRequest<T>::start_request() {
 
 template <typename T>
 void AsyncRequest<T>::finish_request() {
-  Mutex::Locker async_ops_locker(m_image_ctx.async_ops_lock);
-  assert(m_xlist_item.remove_myself());
-  m_image_ctx.async_requests_cond.Signal();
+  decltype(m_image_ctx.async_requests_waiters) waiters;
+  {
+    Mutex::Locker async_ops_locker(m_image_ctx.async_ops_lock);
+    assert(m_xlist_item.remove_myself());
+
+    if (m_image_ctx.async_requests.empty()) {
+      waiters = std::move(m_image_ctx.async_requests_waiters);
+    }
+  }
+
+  for (auto ctx : waiters) {
+    ctx->complete(0);
+  }
 }
 
 } // namespace librbd
diff --git a/src/librbd/ExclusiveLock.cc b/src/librbd/ExclusiveLock.cc
new file mode 100644 (file)
index 0000000..9131223
--- /dev/null
@@ -0,0 +1,417 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/ExclusiveLock.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "librbd/AioImageRequestWQ.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/ImageWatcher.h"
+#include "librbd/Utils.h"
+#include "librbd/exclusive_lock/AcquireRequest.h"
+#include "librbd/exclusive_lock/ReleaseRequest.h"
+#include <sstream>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::ExclusiveLock: "
+
+namespace librbd {
+
+using namespace exclusive_lock;
+
+namespace {
+
+const std::string WATCHER_LOCK_COOKIE_PREFIX = "auto";
+
+} // anonymous namespace
+
+template <typename I>
+const std::string ExclusiveLock<I>::WATCHER_LOCK_TAG("internal");
+
+template <typename I>
+ExclusiveLock<I>::ExclusiveLock(I &image_ctx)
+  : m_image_ctx(image_ctx),
+    m_lock(util::unique_lock_name("librbd::ExclusiveLock::m_lock", this)),
+    m_state(STATE_UNINITIALIZED), m_watch_handle(0) {
+}
+
+template <typename I>
+ExclusiveLock<I>::~ExclusiveLock() {
+  assert(m_state == STATE_UNINITIALIZED || m_state == STATE_SHUTDOWN);
+}
+
+template <typename I>
+bool ExclusiveLock<I>::is_lock_owner() const {
+  ldout(m_image_ctx.cct, 20) << __func__ << dendl;
+  assert(m_image_ctx.owner_lock.is_locked());
+
+  Mutex::Locker locker(m_lock);
+  return (m_state == STATE_LOCKED);
+}
+
+template <typename I>
+void ExclusiveLock<I>::init(Context *on_init) {
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+
+  assert(m_image_ctx.owner_lock.is_wlocked());
+
+  Mutex::Locker locker(m_lock);
+  assert(m_state == STATE_UNINITIALIZED);
+  m_state = STATE_INITIALIZING;
+
+  m_image_ctx.aio_work_queue->block_writes(new C_InitComplete(this, on_init));
+}
+
+template <typename I>
+void ExclusiveLock<I>::shut_down(Context *on_shut_down) {
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+  assert(m_image_ctx.owner_lock.is_wlocked());
+
+  Mutex::Locker locker(m_lock);
+  assert(!is_shutdown());
+  execute_action(ACTION_SHUT_DOWN, on_shut_down);
+}
+
+template <typename I>
+void ExclusiveLock<I>::try_lock(Context *on_tried_lock) {
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_image_ctx.owner_lock.is_wlocked());
+    assert(!is_shutdown());
+
+    if (m_state != STATE_LOCKED || !m_actions_contexts.empty()) {
+      ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+      execute_action(ACTION_TRY_LOCK, on_tried_lock);
+      return;
+    }
+  }
+
+  on_tried_lock->complete(0);
+}
+
+template <typename I>
+void ExclusiveLock<I>::request_lock(Context *on_locked) {
+  {
+    Mutex::Locker locker(m_lock);
+    assert(!is_shutdown());
+    assert(m_image_ctx.owner_lock.is_wlocked());
+
+    if (m_state != STATE_LOCKED || !m_actions_contexts.empty()) {
+      ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+      execute_action(ACTION_REQUEST_LOCK, on_locked);
+      return;
+    }
+  }
+
+  if (on_locked != nullptr) {
+    on_locked->complete(0);
+  }
+}
+
+template <typename I>
+void ExclusiveLock<I>::release_lock(Context *on_released) {
+  {
+    Mutex::Locker locker(m_lock);
+    assert(m_image_ctx.owner_lock.is_wlocked());
+    assert(!is_shutdown());
+
+    if (m_state != STATE_UNLOCKED || !m_actions_contexts.empty()) {
+      ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+      execute_action(ACTION_RELEASE_LOCK, on_released);
+      return;
+    }
+  }
+
+  on_released->complete(0);
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_lock_released() {
+  Mutex::Locker locker(m_lock);
+  if (m_state != STATE_WAITING_FOR_PEER) {
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+  assert(get_active_action() == ACTION_REQUEST_LOCK);
+  execute_next_action();
+}
+
+template <typename I>
+void ExclusiveLock<I>::set_watch_handle(uint64_t watch_handle) {
+  Mutex::Locker locker(m_lock);
+  assert(m_watch_handle == 0 || watch_handle == 0);
+  m_watch_handle = watch_handle;
+}
+
+template <typename I>
+std::string ExclusiveLock<I>::encode_lock_cookie() const {
+  assert(m_lock.is_locked());
+
+  assert(m_watch_handle != 0);
+  std::ostringstream ss;
+  ss << WATCHER_LOCK_COOKIE_PREFIX << " " << m_watch_handle;
+  return ss.str();
+}
+
+template <typename I>
+bool ExclusiveLock<I>::decode_lock_cookie(const std::string &tag,
+                                          uint64_t *handle) {
+  std::string prefix;
+  std::istringstream ss(tag);
+  if (!(ss >> prefix >> *handle) || prefix != WATCHER_LOCK_COOKIE_PREFIX) {
+    return false;
+  }
+  return true;
+}
+
+template <typename I>
+bool ExclusiveLock<I>::is_transition_state() const {
+  switch (m_state) {
+  case STATE_INITIALIZING:
+  case STATE_ACQUIRING:
+  case STATE_WAITING_FOR_PEER:
+  case STATE_RELEASING:
+  case STATE_SHUTTING_DOWN:
+    return true;
+  case STATE_UNINITIALIZED:
+  case STATE_UNLOCKED:
+  case STATE_LOCKED:
+  case STATE_SHUTDOWN:
+    break;
+  }
+  return false;
+}
+
+template <typename I>
+void ExclusiveLock<I>::append_context(Action action, Context *ctx) {
+  assert(m_lock.is_locked());
+
+  for (auto &action_ctxs : m_actions_contexts) {
+    if (action == action_ctxs.first) {
+      if (ctx != nullptr) {
+        action_ctxs.second.push_back(ctx);
+      }
+      return;
+    }
+  }
+
+  Contexts contexts;
+  if (ctx != nullptr) {
+    contexts.push_back(ctx);
+  }
+  m_actions_contexts.push_back({action, std::move(contexts)});
+}
+
+template <typename I>
+void ExclusiveLock<I>::execute_action(Action action, Context *ctx) {
+  assert(m_lock.is_locked());
+
+  append_context(action, ctx);
+  if (!is_transition_state()) {
+    execute_next_action();
+  }
+}
+
+template <typename I>
+void ExclusiveLock<I>::execute_next_action() {
+  assert(m_lock.is_locked());
+  assert(!m_actions_contexts.empty());
+  switch (get_active_action()) {
+  case ACTION_TRY_LOCK:
+  case ACTION_REQUEST_LOCK:
+    send_acquire_lock();
+    break;
+  case ACTION_RELEASE_LOCK:
+    send_release_lock();
+    break;
+  case ACTION_SHUT_DOWN:
+    send_shutdown();
+    break;
+  default:
+    assert(false);
+    break;
+  }
+}
+
+template <typename I>
+typename ExclusiveLock<I>::Action ExclusiveLock<I>::get_active_action() const {
+  assert(m_lock.is_locked());
+  assert(!m_actions_contexts.empty());
+  return m_actions_contexts.front().first;
+}
+
+template <typename I>
+void ExclusiveLock<I>::complete_active_action(State next_state, int r) {
+  assert(m_lock.is_locked());
+  assert(!m_actions_contexts.empty());
+
+  ActionContexts action_contexts(std::move(m_actions_contexts.front()));
+  m_actions_contexts.pop_front();
+  m_state = next_state;
+
+  m_lock.Unlock();
+  for (auto ctx : action_contexts.second) {
+    ctx->complete(r);
+  }
+  m_lock.Lock();
+
+  if (!m_actions_contexts.empty()) {
+    execute_next_action();
+  }
+}
+
+template <typename I>
+bool ExclusiveLock<I>::is_shutdown() const {
+  assert(m_lock.is_locked());
+
+  return ((m_state == STATE_SHUTDOWN) ||
+          (!m_actions_contexts.empty() &&
+           m_actions_contexts.back().first == ACTION_SHUT_DOWN));
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_init_complete() {
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+
+  Mutex::Locker locker(m_lock);
+  m_state = STATE_UNLOCKED;
+}
+
+template <typename I>
+void ExclusiveLock<I>::send_acquire_lock() {
+  assert(m_lock.is_locked());
+  if (m_state == STATE_LOCKED) {
+    complete_active_action(STATE_LOCKED, 0);
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+  m_state = STATE_ACQUIRING;
+
+  using el = ExclusiveLock<I>;
+  AcquireRequest<I>* req = AcquireRequest<I>::create(
+    m_image_ctx, encode_lock_cookie(),
+    util::create_context_callback<el, &el::handle_acquire_lock>(this));
+  req->send();
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_acquire_lock(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << r << dendl;
+
+  if (r == -EBUSY) {
+    ldout(cct, 5) << "unable to acquire exclusive lock" << dendl;
+  } else if (r < 0) {
+    lderr(cct) << "failed to acquire exclusive lock:" << cpp_strerror(r)
+               << dendl;
+  } else {
+    ldout(cct, 5) << "successfully acquired exclusive lock" << dendl;
+  }
+
+  {
+    m_lock.Lock();
+    Action action = get_active_action();
+    assert(action == ACTION_TRY_LOCK || action == ACTION_REQUEST_LOCK);
+    if (action == ACTION_REQUEST_LOCK && r < 0 && r != -EBLACKLISTED) {
+      m_state = STATE_WAITING_FOR_PEER;
+      m_lock.Unlock();
+
+      // request the lock from a peer
+      m_image_ctx.image_watcher->notify_request_lock();
+      return;
+    }
+    m_lock.Unlock();
+  }
+
+  State next_state = (r < 0 ? STATE_UNLOCKED : STATE_LOCKED);
+  if (r == -EAGAIN) {
+    r = 0;
+  }
+
+  Mutex::Locker locker(m_lock);
+  if (next_state == STATE_LOCKED) {
+    m_image_ctx.aio_work_queue->unblock_writes();
+  }
+  complete_active_action(next_state, r);
+}
+
+template <typename I>
+void ExclusiveLock<I>::send_release_lock() {
+  assert(m_lock.is_locked());
+  if (m_state == STATE_UNLOCKED) {
+    complete_active_action(STATE_UNLOCKED, 0);
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+  m_state = STATE_RELEASING;
+
+  m_image_ctx.op_work_queue->queue(
+    new C_BlockWrites(m_image_ctx, new C_ReleaseBlockWrites(this)), 0);
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_release_blocked_writes(int r) {
+  if (r < 0) {
+    handle_release_lock(r);
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << __func__ << ": r=" << r << dendl;
+
+  Mutex::Locker locker(m_lock);
+  assert(m_state == STATE_RELEASING);
+
+  using el = ExclusiveLock<I>;
+  ReleaseRequest<I>* req = ReleaseRequest<I>::create(
+    m_image_ctx, encode_lock_cookie(),
+    util::create_context_callback<el, &el::handle_release_lock>(this));
+  req->send();
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_release_lock(int r) {
+  Mutex::Locker locker(m_lock);
+
+  ldout(m_image_ctx.cct, 10) << __func__ << ": r=" << r << dendl;
+
+  if (r < 0) {
+    m_image_ctx.aio_work_queue->unblock_writes();
+  }
+  complete_active_action(r < 0 ? STATE_LOCKED : STATE_UNLOCKED, r);
+}
+
+template <typename I>
+void ExclusiveLock<I>::send_shutdown() {
+  assert(m_lock.is_locked());
+  if (m_state == STATE_UNLOCKED) {
+    m_image_ctx.aio_work_queue->unblock_writes();
+    complete_active_action(STATE_SHUTDOWN, 0);
+    return;
+  }
+
+  ldout(m_image_ctx.cct, 10) << __func__ << dendl;
+  assert(m_state == STATE_LOCKED);
+  m_state = STATE_SHUTTING_DOWN;
+
+  using el = ExclusiveLock<I>;
+  ReleaseRequest<I>* req = ReleaseRequest<I>::create(
+    m_image_ctx, encode_lock_cookie(),
+    util::create_context_callback<el, &el::handle_shutdown>(this));
+  req->send();
+}
+
+template <typename I>
+void ExclusiveLock<I>::handle_shutdown(int r) {
+  Mutex::Locker locker(m_lock);
+  ldout(m_image_ctx.cct, 10) << __func__ << ": r=" << r << dendl;
+
+  complete_active_action(r == 0 ? STATE_SHUTDOWN : STATE_LOCKED, r);
+}
+
+} // namespace librbd
+
+template class librbd::ExclusiveLock<librbd::ImageCtx>;
diff --git a/src/librbd/ExclusiveLock.h b/src/librbd/ExclusiveLock.h
new file mode 100644 (file)
index 0000000..509d8d4
--- /dev/null
@@ -0,0 +1,159 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_H
+
+#include "include/int_types.h"
+#include "include/Context.h"
+#include "common/Mutex.h"
+#include "common/RWLock.h"
+#include <list>
+#include <string>
+#include <utility>
+
+namespace librbd {
+
+class ImageCtx;
+
+template <typename ImageCtxT = ImageCtx>
+class ExclusiveLock {
+public:
+  static const std::string WATCHER_LOCK_TAG;
+
+  ExclusiveLock(ImageCtxT &image_ctx);
+  ~ExclusiveLock();
+
+  bool is_lock_owner() const;
+
+  void init(Context *on_init);
+  void shut_down(Context *on_shutdown);
+
+  void try_lock(Context *on_tried_lock);
+  void request_lock(Context *on_locked);
+  void release_lock(Context *on_released);
+
+  void handle_lock_released();
+
+  void set_watch_handle(uint64_t watch_handle);
+  static bool decode_lock_cookie(const std::string &cookie, uint64_t *handle);
+
+private:
+
+  /**
+   * <start>                               WAITING_FOR_PEER -----------------\
+   *    |                                     ^                              |
+   *    |                                     *  (request_lock busy)         |
+   *    |                                     * * * * * * * * * * * *        |
+   *    |                                                           *        |
+   *    v            (init)            (try_lock/request_lock)      *        |
+   * UNINITIALIZED  -------> UNLOCKED ------------------------> ACQUIRING <--/
+   *                            ^                                   |
+   *                            |                                   |
+   *                            |          (release_lock)           v
+   *                         RELEASING <------------------------- LOCKED
+   *
+   * <UNLOCKED/LOCKED states>
+   *    |
+   *    |
+   *    v
+   * SHUTTING_DOWN ---> SHUTDOWN ---> <finish>
+   */
+  enum State {
+    STATE_UNINITIALIZED,
+    STATE_UNLOCKED,
+    STATE_LOCKED,
+    STATE_INITIALIZING,
+    STATE_ACQUIRING,
+    STATE_WAITING_FOR_PEER,
+    STATE_RELEASING,
+    STATE_SHUTTING_DOWN,
+    STATE_SHUTDOWN,
+  };
+
+  enum Action {
+    ACTION_TRY_LOCK,
+    ACTION_REQUEST_LOCK,
+    ACTION_RELEASE_LOCK,
+    ACTION_SHUT_DOWN
+  };
+
+  typedef std::list<Context *> Contexts;
+  typedef std::pair<Action, Contexts> ActionContexts;
+  typedef std::list<ActionContexts> ActionsContexts;
+
+  struct C_InitComplete : public Context {
+    ExclusiveLock *exclusive_lock;
+    Context *on_init;
+    C_InitComplete(ExclusiveLock *exclusive_lock, Context *on_init)
+      : exclusive_lock(exclusive_lock), on_init(on_init) {
+    }
+    virtual void finish(int r) override {
+      if (r == 0) {
+        exclusive_lock->handle_init_complete();
+      }
+      on_init->complete(r);
+    }
+  };
+
+  struct C_BlockWrites : public Context {
+    ImageCtxT &image_ctx;
+    Context *on_finish;
+    C_BlockWrites(ImageCtxT &image_ctx, Context *on_finish)
+      : image_ctx(image_ctx), on_finish(on_finish) {
+    }
+    virtual void finish(int r) override {
+      RWLock::RLocker owner_locker(image_ctx.owner_lock);
+      image_ctx.aio_work_queue->block_writes(on_finish);
+    }
+  };
+
+  struct C_ReleaseBlockWrites : public Context {
+    ExclusiveLock *exclusive_lock;
+    C_ReleaseBlockWrites(ExclusiveLock *exclusive_lock)
+      : exclusive_lock(exclusive_lock) {
+    }
+    virtual void finish(int r) override {
+      exclusive_lock->handle_release_blocked_writes(r);
+    }
+  };
+
+  ImageCtxT &m_image_ctx;
+
+  mutable Mutex m_lock;
+  State m_state;
+  uint64_t m_watch_handle;
+
+  ActionsContexts m_actions_contexts;
+
+  std::string encode_lock_cookie() const;
+
+  bool is_transition_state() const;
+
+  void append_context(Action action, Context *ctx);
+  void execute_action(Action action, Context *ctx);
+  void execute_next_action();
+
+  Action get_active_action() const;
+  void complete_active_action(State next_state, int r);
+
+  bool is_shutdown() const;
+
+  void handle_init_complete();
+
+  void send_acquire_lock();
+  void handle_acquire_lock(int r);
+
+  void send_release_lock();
+  void handle_release_blocked_writes(int r);
+  void handle_release_lock(int r);
+
+  void send_shutdown();
+  void handle_shutdown(int r);
+};
+
+} // namespace librbd
+
+extern template class librbd::ExclusiveLock<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_H
index 6ad3415dd7132209a26e29990cc12075c26b4c59..9871060fdc95c50b8db7e0b9c4f226cd64b2e74e 100644 (file)
@@ -151,7 +151,7 @@ struct C_InvalidateCache : public Context {
       object_cacher(NULL), writeback_handler(NULL), object_set(NULL),
       readahead(),
       total_bytes_read(0), copyup_finisher(NULL),
-      object_map(*this), aio_work_queue(NULL), op_work_queue(NULL),
+      object_map(*this), object_map_ptr(nullptr), aio_work_queue(NULL), op_work_queue(NULL),
       refresh_in_progress(false), asok_hook(new LibrbdAdminSocketHook(this))
   {
     md_ctx.dup(p);
@@ -860,19 +860,27 @@ struct C_InvalidateCache : public Context {
   }
 
   void ImageCtx::cancel_async_requests() {
-    Mutex::Locker l(async_ops_lock);
-    ldout(cct, 10) << "canceling async requests: count="
-                   << async_requests.size() << dendl;
-
-    for (xlist<AsyncRequest<>*>::iterator it = async_requests.begin();
-         !it.end(); ++it) {
-      ldout(cct, 10) << "canceling async request: " << *it << dendl;
-      (*it)->cancel();
-    }
+    C_SaferCond ctx;
+    cancel_async_requests(&ctx);
+    ctx.wait();
+  }
 
-    while (!async_requests.empty()) {
-      async_requests_cond.Wait(async_ops_lock);
+  void ImageCtx::cancel_async_requests(Context *on_finish) {
+    {
+      Mutex::Locker async_ops_locker(async_ops_lock);
+      if (!async_requests.empty()) {
+        ldout(cct, 10) << "canceling async requests: count="
+                       << async_requests.size() << dendl;
+        for (auto req : async_requests) {
+          ldout(cct, 10) << "canceling async request: " << req << dendl;
+          req->cancel();
+        }
+        async_requests_waiters.push_back(on_finish);
+        return;
+      }
     }
+
+    on_finish->complete(0);
   }
 
   void ImageCtx::clear_pending_completions() {
@@ -1014,6 +1022,14 @@ struct C_InvalidateCache : public Context {
     ASSIGN_OPTION(journal_pool);
   }
 
+  ObjectMap *ImageCtx::create_object_map() {
+    return new ObjectMap(*this);
+  }
+
+  Journal *ImageCtx::create_journal() {
+    return new Journal(*this);
+  }
+
   void ImageCtx::open_journal() {
     assert(journal == NULL);
     journal = new Journal(*this);
index 0a18d9b6fe2e229d91ac7d1172e17dda3033926d..1641de54b1787b3aa12e6bb1c0c72c069224b386 100644 (file)
@@ -5,6 +5,7 @@
 
 #include "include/int_types.h"
 
+#include <list>
 #include <map>
 #include <set>
 #include <string>
@@ -134,9 +135,10 @@ namespace librbd {
 
     xlist<AsyncOperation*> async_ops;
     xlist<AsyncRequest<>*> async_requests;
-    Cond async_requests_cond;
+    std::list<Context*> async_requests_waiters;
 
-    ObjectMap object_map;
+    ObjectMap object_map;         // TODO
+    ObjectMap *object_map_ptr;
 
     atomic_t async_request_seq;
 
@@ -264,10 +266,15 @@ namespace librbd {
     void flush(Context *on_safe);
 
     void cancel_async_requests();
+    void cancel_async_requests(Context *on_finish);
     void apply_metadata_confs();
 
-    void open_journal();
-    int close_journal(bool force);
+    ObjectMap *create_object_map();
+
+    Journal *create_journal();
+    void open_journal();            // TODO remove
+    int close_journal(bool force);  // TODO remove
+
     void clear_pending_completions();
   };
 }
index 829324a5abde4e075d46c5c756571deb123fce14..5e73cf1c90c1be53a7fe4cfcb05187ee76a52138 100644 (file)
@@ -75,6 +75,8 @@ public:
                                 ProgressContext &prog_ctx);
   int notify_rename(const std::string &image_name);
 
+  void notify_request_lock();
+
   void notify_lock_state();
   static void notify_header_update(librados::IoCtx &io_ctx,
                                    const std::string &oid);
@@ -267,7 +269,6 @@ private:
   void notify_released_lock();
 
   void schedule_request_lock(bool use_timer, int timer_delay = -1);
-  void notify_request_lock();
 
   int notify_lock_owner(bufferlist &bl);
 
index 8ef0a2f6fa67471becf82d29cdbdd7085f519f92..755a058a4477b706c3f29ad3d845349f426f539b 100644 (file)
@@ -248,6 +248,11 @@ void Journal::open() {
   create_journaler();
 }
 
+void Journal::open(Context *on_finish) {
+  open();
+  wait_for_journal_ready(on_finish);
+}
+
 int Journal::close() {
   CephContext *cct = m_image_ctx.cct;
   ldout(cct, 20) << this << " " << __func__ << ": state=" << m_state << dendl;
@@ -288,6 +293,11 @@ int Journal::close() {
   return 0;
 }
 
+void Journal::close(Context *on_finish) {
+  // TODO
+  assert(false);
+}
+
 uint64_t Journal::append_io_event(AioCompletion *aio_comp,
                                   const journal::EventEntry &event_entry,
                                   const AioObjectRequests &requests,
index 8823db09a9184295c783d61122387b8fc77f7692..a7d9e7201dd3ba93903e5a42079b18593d614b3d 100644 (file)
@@ -54,8 +54,10 @@ public:
   void wait_for_journal_ready(Context *on_ready);
   void wait_for_journal_ready();
 
-  void open();
-  int close();
+  void open();  // TODO remove
+  void open(Context *on_finish);
+  int close();  // TODO remove
+  void close(Context *on_finish);
 
   uint64_t append_io_event(AioCompletion *aio_comp,
                            const journal::EventEntry &event_entry,
index 5d7d082631f74ef05401c7203fd97b0253f82c62..d88973c54f86902688b19860695366837b956d75 100644 (file)
@@ -17,6 +17,7 @@ librbd_internal_la_SOURCES = \
        librbd/AsyncRequest.cc \
        librbd/CopyupRequest.cc \
        librbd/DiffIterate.cc \
+       librbd/ExclusiveLock.cc \
        librbd/ImageCtx.cc \
        librbd/ImageWatcher.cc \
        librbd/internal.cc \
@@ -26,6 +27,8 @@ librbd_internal_la_SOURCES = \
        librbd/LibrbdWriteback.cc \
        librbd/ObjectMap.cc \
        librbd/Utils.cc \
+       librbd/exclusive_lock/AcquireRequest.cc \
+       librbd/exclusive_lock/ReleaseRequest.cc \
        librbd/object_map/InvalidateRequest.cc \
        librbd/object_map/LockRequest.cc \
        librbd/object_map/Request.cc \
@@ -82,6 +85,7 @@ noinst_HEADERS += \
        librbd/AsyncRequest.h \
        librbd/CopyupRequest.h \
        librbd/DiffIterate.h \
+       librbd/ExclusiveLock.h \
        librbd/ImageCtx.h \
        librbd/ImageWatcher.h \
        librbd/internal.h \
@@ -96,6 +100,8 @@ noinst_HEADERS += \
        librbd/TaskFinisher.h \
        librbd/Utils.h \
        librbd/WatchNotifyTypes.h \
+       librbd/exclusive_lock/AcquireRequest.h \
+       librbd/exclusive_lock/ReleaseRequest.h \
        librbd/object_map/InvalidateRequest.h \
        librbd/object_map/LockRequest.h \
        librbd/object_map/Request.h \
diff --git a/src/librbd/exclusive_lock/AcquireRequest.cc b/src/librbd/exclusive_lock/AcquireRequest.cc
new file mode 100644 (file)
index 0000000..8c19282
--- /dev/null
@@ -0,0 +1,403 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/AcquireRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "cls/lock/cls_lock_types.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "include/stringify.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::AcquireRequest: "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_ack_callback;
+using util::create_rados_safe_callback;
+
+namespace {
+
+template <typename I>
+struct C_BlacklistClient : public Context {
+  I &image_ctx;
+  std::string locker_address;
+  Context *on_finish;
+
+  C_BlacklistClient(I &image_ctx, const std::string &locker_address,
+                    Context *on_finish)
+    : image_ctx(image_ctx), locker_address(locker_address),
+      on_finish(on_finish) {
+  }
+
+  virtual void finish(int r) override {
+    librados::Rados rados(image_ctx.md_ctx);
+    r = rados.blacklist_add(locker_address,
+                            image_ctx.blacklist_expire_seconds);
+    on_finish->complete(r);
+  }
+};
+
+} // anonymous namespace
+
+template <typename I>
+AcquireRequest<I>* AcquireRequest<I>::create(I &image_ctx,
+                                             const std::string &cookie,
+                                             Context *on_finish) {
+  return new AcquireRequest(image_ctx, cookie, on_finish);
+}
+
+template <typename I>
+AcquireRequest<I>::AcquireRequest(I &image_ctx, const std::string &cookie,
+                                  Context *on_finish)
+  : m_image_ctx(image_ctx), m_cookie(cookie),
+    m_on_finish(create_async_context_callback(image_ctx, on_finish)),
+    m_object_map(nullptr), m_journal(nullptr) {
+}
+
+template <typename I>
+void AcquireRequest<I>::send() {
+  send_lock();
+}
+
+template <typename I>
+void AcquireRequest<I>::send_lock() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::lock(&op, RBD_LOCK_NAME, LOCK_EXCLUSIVE, m_cookie,
+                         ExclusiveLock<I>::WATCHER_LOCK_TAG, "", utime_t(), 0);
+
+  using klass = AcquireRequest<I>;
+  librados::AioCompletion *rados_completion =
+    create_rados_safe_callback<klass, &klass::handle_lock>(this);
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                         rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_lock(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val == 0) {
+    return send_open_journal();
+  } else if (*ret_val != -EBUSY) {
+    lderr(cct) << "failed to lock: " << cpp_strerror(*ret_val) << dendl;
+    return m_on_finish;
+  }
+
+  send_get_lockers();
+  return nullptr;
+}
+
+template <typename I>
+Context *AcquireRequest<I>::send_open_journal() {
+  if (!m_image_ctx.test_features(RBD_FEATURE_JOURNALING)) {
+    return send_open_object_map();
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  using klass = AcquireRequest<I>;
+  Context *ctx = create_context_callback<klass, &klass::handle_open_journal>(
+    this);
+  m_journal = m_image_ctx.create_journal();
+  m_journal->open(ctx);
+  return nullptr;
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_open_journal(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val < 0) {
+    lderr(cct) << "failed to open journal: " << cpp_strerror(*ret_val) << dendl;
+
+    delete m_journal;
+    return m_on_finish;
+  }
+
+  assert(m_image_ctx.journal == nullptr);
+  return send_open_object_map();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::send_open_object_map() {
+  if (!m_image_ctx.test_features(RBD_FEATURE_OBJECT_MAP)) {
+    apply();
+    return m_on_finish;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  using klass = AcquireRequest<I>;
+  Context *ctx = create_context_callback<klass, &klass::handle_open_object_map>(
+    this);
+  m_object_map = m_image_ctx.create_object_map(CEPH_NOSNAP);
+  m_object_map->open(ctx);
+  return nullptr;
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_open_object_map(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  // object map should never result in an error
+  assert(*ret_val == 0);
+  return send_lock_object_map();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::send_lock_object_map() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  assert(m_object_map != nullptr);
+
+  using klass = AcquireRequest<I>;
+  Context *ctx = create_context_callback<klass, &klass::handle_lock_object_map>(
+    this);
+  m_object_map->lock(ctx);
+  return nullptr;
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_lock_object_map(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  // object map should never result in an error
+  assert(*ret_val == 0);
+
+  apply();
+  return m_on_finish;
+}
+
+template <typename I>
+void AcquireRequest<I>::send_get_lockers() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  librados::ObjectReadOperation op;
+  rados::cls::lock::get_lock_info_start(&op, RBD_LOCK_NAME);
+
+  using klass = AcquireRequest<I>;
+  librados::AioCompletion *rados_completion =
+    create_rados_ack_callback<klass, &klass::handle_get_lockers>(this);
+  m_out_bl.clear();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                         rados_completion, &op, &m_out_bl);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_get_lockers(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  std::map<rados::cls::lock::locker_id_t,
+           rados::cls::lock::locker_info_t> lockers;
+  ClsLockType lock_type;
+  std::string lock_tag;
+  if (*ret_val == 0) {
+    bufferlist::iterator it = m_out_bl.begin();
+    *ret_val = rados::cls::lock::get_lock_info_finish(&it, &lockers,
+                                                      &lock_type, &lock_tag);
+  }
+
+  if (*ret_val < 0) {
+    lderr(cct) << "failed to retrieve lockers: " << cpp_strerror(*ret_val)
+               << dendl;
+    return m_on_finish;
+  }
+
+  if (lockers.empty()) {
+    ldout(cct, 20) << "no lockers detected" << dendl;
+    send_lock();
+    return nullptr;
+  }
+
+  if (lock_tag != ExclusiveLock<I>::WATCHER_LOCK_TAG) {
+    ldout(cct, 5) <<"locked by external mechanism: tag=" << lock_tag << dendl;
+    *ret_val = -EBUSY;
+    return m_on_finish;
+  }
+
+  if (lock_type == LOCK_SHARED) {
+    ldout(cct, 5) << "shared lock type detected" << dendl;
+    *ret_val = -EBUSY;
+    return m_on_finish;
+  }
+
+  std::map<rados::cls::lock::locker_id_t,
+           rados::cls::lock::locker_info_t>::iterator iter = lockers.begin();
+  if (!ExclusiveLock<I>::decode_lock_cookie(iter->first.cookie,
+                                            &m_locker_handle)) {
+    ldout(cct, 5) << "locked by external mechanism: "
+                  << "cookie=" << iter->first.cookie << dendl;
+    *ret_val = -EBUSY;
+    return m_on_finish;
+  }
+
+  m_locker_entity = iter->first.locker;
+  m_locker_cookie = iter->first.cookie;
+  m_locker_address = stringify(iter->second.addr);
+  if (m_locker_cookie.empty() || m_locker_address.empty()) {
+    ldout(cct, 20) << "no valid lockers detected" << dendl;
+    send_lock();
+    return nullptr;
+  }
+
+  ldout(cct, 10) << "retrieved exclusive locker: "
+                 << m_locker_entity << "@" << m_locker_address << dendl;
+  send_get_watchers();
+  return nullptr;
+}
+
+template <typename I>
+void AcquireRequest<I>::send_get_watchers() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  librados::ObjectReadOperation op;
+  op.list_watchers(&m_watchers, &m_watchers_ret_val);
+
+  using klass = AcquireRequest<I>;
+  librados::AioCompletion *rados_completion =
+    create_rados_ack_callback<klass, &klass::handle_get_watchers>(this);
+  m_out_bl.clear();
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                         rados_completion, &op, &m_out_bl);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_get_watchers(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val == 0) {
+    *ret_val = m_watchers_ret_val;
+  }
+  if (*ret_val < 0) {
+    lderr(cct) << "failed to retrieve watchers: " << cpp_strerror(*ret_val)
+               << dendl;
+    return m_on_finish;
+  }
+
+  for (auto &watcher : m_watchers) {
+    if ((strncmp(m_locker_address.c_str(),
+                 watcher.addr, sizeof(watcher.addr)) == 0) &&
+        (m_locker_handle == watcher.cookie)) {
+      ldout(cct, 10) << "lock owner is still alive" << dendl;
+
+      *ret_val = -EAGAIN;
+      return m_on_finish;
+    }
+  }
+
+  send_blacklist();
+  return nullptr;
+}
+
+template <typename I>
+void AcquireRequest<I>::send_blacklist() {
+  if (!m_image_ctx.blacklist_on_break_lock) {
+    send_break_lock();
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  // TODO: need async version of RadosClient::blacklist_add
+  using klass = AcquireRequest<I>;
+  Context *ctx = create_context_callback<klass, &klass::handle_blacklist>(
+    this);
+  m_image_ctx.op_work_queue->queue(new C_BlacklistClient<I>(m_image_ctx,
+                                                            m_locker_address,
+                                                            ctx), 0);
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_blacklist(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val < 0) {
+    lderr(cct) << "failed to blacklist lock owner: " << cpp_strerror(*ret_val)
+               << dendl;
+    return m_on_finish;
+  }
+  send_break_lock();
+  return nullptr;
+}
+
+template <typename I>
+void AcquireRequest<I>::send_break_lock() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::break_lock(&op, RBD_LOCK_NAME, m_locker_cookie,
+                               m_locker_entity);
+
+  using klass = AcquireRequest<I>;
+  librados::AioCompletion *rados_completion =
+    create_rados_safe_callback<klass, &klass::handle_break_lock>(this);
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                         rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+Context *AcquireRequest<I>::handle_break_lock(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val == -ENOENT) {
+    *ret_val = 0;
+  } else if (*ret_val < 0) {
+    lderr(cct) << "failed to break lock: " << cpp_strerror(*ret_val) << dendl;
+    return m_on_finish;
+  }
+
+  send_lock();
+  return nullptr;
+}
+
+template <typename I>
+void AcquireRequest<I>::apply() {
+  RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+  assert(m_image_ctx.object_map == nullptr);
+  m_image_ctx.object_map = m_object_map;
+
+  assert(m_image_ctx.journal == nullptr);
+  m_image_ctx.journal = m_journal;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::AcquireRequest<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/AcquireRequest.h b/src/librbd/exclusive_lock/AcquireRequest.h
new file mode 100644 (file)
index 0000000..71d4a9d
--- /dev/null
@@ -0,0 +1,114 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_ACQUIRE_REQUEST_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_ACQUIRE_REQUEST_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+#include "include/rados/librados.hpp"
+#include "librbd/ImageCtx.h"
+#include "msg/msg_types.h"
+#include <map>
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class Journal;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class AcquireRequest {
+public:
+  static AcquireRequest* create(ImageCtxT &image_ctx, const std::string &cookie,
+                                Context *on_finish);
+
+  void send();
+
+private:
+
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |     /---------------------------------------------------------\
+   *    |     |                                                         |
+   *    |     |             (no lockers)                                |
+   *    |     |   . . . . . . . . . . . . . . . . . . . . .             |
+   *    |     |   .                                       .             |
+   *    |     v   v      (EBUSY)                          .             |
+   *    \--> LOCK_IMAGE * * * * * * * > GET_LOCKERS . . . .             |
+   *          .   |                       |                             |
+   *    . . . .   |                       |                             |
+   *    .         v                       v                             |
+   *    .     OPEN_JOURNAL  . . .       GET_WATCHERS . . .              |
+   *    .         |             .         |              .              |
+   *    .         v             .         v              .              |
+   *    . . > OPEN_OBJECT_MAP   .       BLACKLIST        . (blacklist   |
+   *    .         |             .         |              .  disabled)   |
+   *    .         v             .         v              .              |
+   *    .     LOCK_OBJECT_MAP   .       BREAK_LOCK < . . .              |
+   *    .         |             .         |                             |
+   *    .         v             .         |                             |
+   *    . . > <finish>  < . . . .         |                             |
+   *                                      \-----------------------------/
+   *
+   * @endverbatim
+   */
+
+  AcquireRequest(ImageCtxT &image_ctx, const std::string &cookie,
+                 Context *on_finish);
+
+  ImageCtxT &m_image_ctx;
+  std::string m_cookie;
+  Context *m_on_finish;
+
+  bufferlist m_out_bl;
+
+  std::list<obj_watch_t> m_watchers;
+  int m_watchers_ret_val;
+
+  decltype(m_image_ctx.object_map) m_object_map;
+  decltype(m_image_ctx.journal) m_journal;
+
+  entity_name_t m_locker_entity;
+  std::string m_locker_cookie;
+  std::string m_locker_address;
+  uint64_t m_locker_handle;
+
+  void send_lock();
+  Context *handle_lock(int *ret_val);
+
+  Context *send_open_journal();
+  Context *handle_open_journal(int *ret_val);
+
+  Context *send_open_object_map();
+  Context *handle_open_object_map(int *ret_val);
+
+  Context *send_lock_object_map();
+  Context *handle_lock_object_map(int *ret_val);
+
+  void send_get_lockers();
+  Context *handle_get_lockers(int *ret_val);
+
+  void send_get_watchers();
+  Context *handle_get_watchers(int *ret_val);
+
+  void send_blacklist();
+  Context *handle_blacklist(int *ret_val);
+
+  void send_break_lock();
+  Context *handle_break_lock(int *ret_val);
+
+  void apply();
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::AcquireRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_ACQUIRE_REQUEST_H
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.cc b/src/librbd/exclusive_lock/ReleaseRequest.cc
new file mode 100644 (file)
index 0000000..af0cb5d
--- /dev/null
@@ -0,0 +1,175 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/exclusive_lock/ReleaseRequest.h"
+#include "cls/lock/cls_lock_client.h"
+#include "cls/lock/cls_lock_types.h"
+#include "common/dout.h"
+#include "common/errno.h"
+#include "common/WorkQueue.h"
+#include "include/stringify.h"
+#include "librbd/ExclusiveLock.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Journal.h"
+#include "librbd/ObjectMap.h"
+#include "librbd/Utils.h"
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::exclusive_lock::ReleaseRequest: "
+
+namespace librbd {
+namespace exclusive_lock {
+
+using util::create_async_context_callback;
+using util::create_context_callback;
+using util::create_rados_safe_callback;
+
+template <typename I>
+ReleaseRequest<I>* ReleaseRequest<I>::create(I &image_ctx,
+                                             const std::string &cookie,
+                                             Context *on_finish) {
+  return new ReleaseRequest(image_ctx, cookie, on_finish);
+}
+
+template <typename I>
+ReleaseRequest<I>::ReleaseRequest(I &image_ctx, const std::string &cookie,
+                                  Context *on_finish)
+  : m_image_ctx(image_ctx), m_cookie(cookie),
+    m_on_finish(create_async_context_callback(image_ctx, on_finish)),
+    m_object_map(nullptr), m_journal(nullptr) {
+}
+
+template <typename I>
+void ReleaseRequest<I>::send() {
+  send_cancel_op_requests();
+}
+
+template <typename I>
+void ReleaseRequest<I>::send_cancel_op_requests() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  using klass = ReleaseRequest<I>;
+  Context *ctx = create_context_callback<klass,
+                                         &klass::handle_cancel_op_requests>(this);
+  m_image_ctx.cancel_async_requests(ctx);
+}
+
+template <typename I>
+Context *ReleaseRequest<I>::handle_cancel_op_requests(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  assert(*ret_val == 0);
+  send_close_journal();
+  return nullptr;
+}
+
+template <typename I>
+void ReleaseRequest<I>::send_close_journal() {
+  {
+    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+    std::swap(m_journal, m_image_ctx.journal);
+  }
+
+  if (m_journal == nullptr) {
+    send_unlock_object_map();
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  using klass = ReleaseRequest<I>;
+  Context *ctx = create_context_callback<klass, &klass::handle_close_journal>(
+    this);
+  m_journal->close(ctx);
+}
+
+template <typename I>
+Context *ReleaseRequest<I>::handle_close_journal(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val < 0) {
+    // error implies some journal events were not flushed -- continue
+    lderr(cct) << "failed to close journal: " << cpp_strerror(*ret_val)
+               << dendl;
+  }
+
+  delete m_journal;
+
+  send_unlock_object_map();
+  return nullptr;
+}
+
+template <typename I>
+void ReleaseRequest<I>::send_unlock_object_map() {
+  {
+    RWLock::WLocker snap_locker(m_image_ctx.snap_lock);
+    std::swap(m_object_map, m_image_ctx.object_map_ptr);
+  }
+
+  if (m_object_map == nullptr) {
+    send_unlock();
+    return;
+  }
+
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  using klass = ReleaseRequest<I>;
+  Context *ctx = create_context_callback<
+    klass, &klass::handle_unlock_object_map>(this);
+  m_object_map->unlock(ctx);
+}
+
+template <typename I>
+Context *ReleaseRequest<I>::handle_unlock_object_map(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  // object map shouldn't return errors
+  assert(*ret_val == 0);
+  delete m_object_map;
+
+  send_unlock();
+  return nullptr;
+}
+
+template <typename I>
+void ReleaseRequest<I>::send_unlock() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << dendl;
+
+  librados::ObjectWriteOperation op;
+  rados::cls::lock::unlock(&op, RBD_LOCK_NAME, m_cookie);
+
+  using klass = ReleaseRequest<I>;
+  librados::AioCompletion *rados_completion =
+    create_rados_safe_callback<klass, &klass::handle_unlock>(this);
+  int r = m_image_ctx.md_ctx.aio_operate(m_image_ctx.header_oid,
+                                         rados_completion, &op);
+  assert(r == 0);
+  rados_completion->release();
+}
+
+template <typename I>
+Context *ReleaseRequest<I>::handle_unlock(int *ret_val) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 10) << __func__ << ": r=" << *ret_val << dendl;
+
+  if (*ret_val < 0 && *ret_val != -ENOENT) {
+    lderr(cct) << "failed to unlock: " << cpp_strerror(*ret_val) << dendl;
+  }
+
+  // treat errors as the image is unlocked
+  *ret_val = 0;
+  return m_on_finish;
+}
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+template class librbd::exclusive_lock::ReleaseRequest<librbd::ImageCtx>;
diff --git a/src/librbd/exclusive_lock/ReleaseRequest.h b/src/librbd/exclusive_lock/ReleaseRequest.h
new file mode 100644 (file)
index 0000000..2523181
--- /dev/null
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_EXCLUSIVE_LOCK_RELEASE_REQUEST_H
+#define CEPH_LIBRBD_EXCLUSIVE_LOCK_RELEASE_REQUEST_H
+
+#include "include/int_types.h"
+#include "librbd/ImageCtx.h"
+#include <string>
+
+class Context;
+
+namespace librbd {
+
+class ImageCtx;
+class Journal;
+
+namespace exclusive_lock {
+
+template <typename ImageCtxT = ImageCtx>
+class ReleaseRequest {
+public:
+  static ReleaseRequest* create(ImageCtxT &image_ctx, const std::string &cookie,
+                                Context *on_finish);
+
+  void send();
+
+private:
+  /**
+   * @verbatim
+   *
+   * <start>
+   *    |
+   *    v
+   * CANCEL_OP_REQUESTS . . . . . . . . . . . .
+   *    |                                     .
+   *    v                                     .
+   * CLOSE_JOURNAL                            .
+   *    |                (journal disabled,   .
+   *    v                 object map enabled) .
+   * UNLOCK_OBJECT_MAP  < . . . . . . . . . . .
+   *    |                                     .
+   *    v               (object map disabled) .
+   * UNLOCK < . . . . . . . . . . . . . . . . .
+   *    |
+   *    v
+   * <finish>
+   *
+   * @endverbatim
+   */
+
+  ReleaseRequest(ImageCtxT &image_ctx, const std::string &cookie,
+                 Context *on_finish);
+
+  ImageCtxT &m_image_ctx;
+  std::string m_cookie;
+  Context *m_on_finish;
+
+  decltype(m_image_ctx.object_map_ptr) m_object_map;
+  decltype(m_image_ctx.journal) m_journal;
+
+  void send_cancel_op_requests();
+  Context *handle_cancel_op_requests(int *ret_val);
+
+  void send_close_journal();
+  Context *handle_close_journal(int *ret_val);
+
+  void send_unlock_object_map();
+  Context *handle_unlock_object_map(int *ret_val);
+
+  void send_unlock();
+  Context *handle_unlock(int *ret_val);
+
+};
+
+} // namespace exclusive_lock
+} // namespace librbd
+
+extern template class librbd::exclusive_lock::ReleaseRequest<librbd::ImageCtx>;
+
+#endif // CEPH_LIBRBD_EXCLUSIVE_LOCK_RELEASE_REQUEST_H