]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: use AIO notifications to prevent blocking ops
authorJason Dillaman <dillaman@redhat.com>
Tue, 16 Feb 2016 15:10:16 +0000 (10:10 -0500)
committerJason Dillaman <dillaman@redhat.com>
Thu, 18 Feb 2016 20:45:51 +0000 (15:45 -0500)
If two or more images share the same CephContext, notifications
from one image can block the work queue which will potentially
block acknowledging the notification until after it times out.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/ImageWatcher.cc
src/librbd/ImageWatcher.h
src/librbd/Makefile.am
src/librbd/image_watcher/Notifier.cc
src/librbd/image_watcher/NotifyLockOwner.cc [new file with mode: 0644]
src/librbd/image_watcher/NotifyLockOwner.h [new file with mode: 0644]
src/test/librados_test_stub/TestWatchNotify.cc

index a85b44375aed1d6f0c229d2db3016d5287adc94a..35b5bf9b1aa146a04a8359f4288ab6e0a0322622 100644 (file)
@@ -12,6 +12,7 @@
 #include "librbd/TaskFinisher.h"
 #include "librbd/Utils.h"
 #include "librbd/image_watcher/Notifier.h"
+#include "librbd/image_watcher/NotifyLockOwner.h"
 #include "include/encoding.h"
 #include "include/stringify.h"
 #include "common/errno.h"
@@ -28,6 +29,7 @@ namespace librbd {
 
 using namespace image_watcher;
 using namespace watch_notify;
+using util::create_context_callback;
 
 static const double    RETRY_DELAY_SECONDS = 1.0;
 
@@ -109,8 +111,7 @@ int ImageWatcher::notify_async_progress(const AsyncRequestId &request,
 
   bufferlist bl;
   ::encode(NotifyMessage(AsyncProgressPayload(request, offset, total)), bl);
-  m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl,
-                             Notifier::NOTIFY_TIMEOUT, NULL);
+  m_notifier.notify(bl, nullptr, nullptr);
   return 0;
 }
 
@@ -121,30 +122,31 @@ void ImageWatcher::schedule_async_complete(const AsyncRequestId &request,
   m_task_finisher->queue(ctx);
 }
 
-int ImageWatcher::notify_async_complete(const AsyncRequestId &request,
-                                       int r) {
+void ImageWatcher::notify_async_complete(const AsyncRequestId &request, int r) {
   ldout(m_image_ctx.cct, 20) << this << " remote async request finished: "
                             << request << " = " << r << dendl;
 
   bufferlist bl;
   ::encode(NotifyMessage(AsyncCompletePayload(request, r)), bl);
+  m_notifier.notify(bl, nullptr, new FunctionContext(
+    boost::bind(&ImageWatcher::handle_async_complete, this, request, r, _1)));
+}
 
-  if (r >= 0) {
-    m_image_ctx.notify_update();
-  }
-  int ret = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl,
-                                      Notifier::NOTIFY_TIMEOUT, NULL);
-  if (ret < 0) {
+void ImageWatcher::handle_async_complete(const AsyncRequestId &request, int r,
+                                         int ret_val) {
+  ldout(m_image_ctx.cct, 20) << this << " " << __func__ << ": "
+                             << "request=" << request << ", r=" << ret_val
+                             << dendl;
+  if (ret_val < 0) {
     lderr(m_image_ctx.cct) << this << " failed to notify async complete: "
-                          << cpp_strerror(ret) << dendl;
-    if (ret == -ETIMEDOUT) {
+                          << cpp_strerror(ret_val) << dendl;
+    if (ret_val == -ETIMEDOUT) {
       schedule_async_complete(request, r);
     }
   } else {
-    RWLock::WLocker l(m_async_request_lock);
+    RWLock::WLocker async_request_locker(m_async_request_lock);
     m_async_pending.erase(request);
   }
-  return 0;
 }
 
 int ImageWatcher::notify_flatten(uint64_t request_id, ProgressContext &prog_ctx) {
@@ -156,8 +158,7 @@ int ImageWatcher::notify_flatten(uint64_t request_id, ProgressContext &prog_ctx)
 
   bufferlist bl;
   ::encode(NotifyMessage(FlattenPayload(async_request_id)), bl);
-
-  return notify_async_request(async_request_id, bl, prog_ctx);
+  return notify_async_request(async_request_id, std::move(bl), prog_ctx);
 }
 
 int ImageWatcher::notify_resize(uint64_t request_id, uint64_t size,
@@ -170,8 +171,7 @@ int ImageWatcher::notify_resize(uint64_t request_id, uint64_t size,
 
   bufferlist bl;
   ::encode(NotifyMessage(ResizePayload(size, async_request_id)), bl);
-
-  return notify_async_request(async_request_id, bl, prog_ctx);
+  return notify_async_request(async_request_id, std::move(bl), prog_ctx);
 }
 
 int ImageWatcher::notify_snap_create(const std::string &snap_name) {
@@ -181,8 +181,7 @@ int ImageWatcher::notify_snap_create(const std::string &snap_name) {
 
   bufferlist bl;
   ::encode(NotifyMessage(SnapCreatePayload(snap_name)), bl);
-
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 
 int ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id,
@@ -193,8 +192,7 @@ int ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id,
 
   bufferlist bl;
   ::encode(NotifyMessage(SnapRenamePayload(src_snap_id, dst_snap_name)), bl);
-
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 int ImageWatcher::notify_snap_remove(const std::string &snap_name) {
   assert(m_image_ctx.owner_lock.is_locked());
@@ -203,8 +201,7 @@ int ImageWatcher::notify_snap_remove(const std::string &snap_name) {
 
   bufferlist bl;
   ::encode(NotifyMessage(SnapRemovePayload(snap_name)), bl);
-
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 
 int ImageWatcher::notify_snap_protect(const std::string &snap_name) {
@@ -214,7 +211,7 @@ int ImageWatcher::notify_snap_protect(const std::string &snap_name) {
 
   bufferlist bl;
   ::encode(NotifyMessage(SnapProtectPayload(snap_name)), bl);
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 
 int ImageWatcher::notify_snap_unprotect(const std::string &snap_name) {
@@ -224,7 +221,7 @@ int ImageWatcher::notify_snap_unprotect(const std::string &snap_name) {
 
   bufferlist bl;
   ::encode(NotifyMessage(SnapUnprotectPayload(snap_name)), bl);
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 
 int ImageWatcher::notify_rebuild_object_map(uint64_t request_id,
@@ -237,8 +234,7 @@ int ImageWatcher::notify_rebuild_object_map(uint64_t request_id,
 
   bufferlist bl;
   ::encode(NotifyMessage(RebuildObjectMapPayload(async_request_id)), bl);
-
-  return notify_async_request(async_request_id, bl, prog_ctx);
+  return notify_async_request(async_request_id, std::move(bl), prog_ctx);
 }
 
 int ImageWatcher::notify_rename(const std::string &image_name) {
@@ -248,7 +244,7 @@ int ImageWatcher::notify_rename(const std::string &image_name) {
 
   bufferlist bl;
   ::encode(NotifyMessage(RenamePayload(image_name)), bl);
-  return notify_lock_owner(bl);
+  return notify_lock_owner(std::move(bl));
 }
 
 void ImageWatcher::notify_header_update(Context *on_finish) {
@@ -297,8 +293,7 @@ void ImageWatcher::notify_acquired_lock() {
 
   bufferlist bl;
   ::encode(NotifyMessage(AcquiredLockPayload(client_id)), bl);
-  m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl,
-                             Notifier::NOTIFY_TIMEOUT, NULL);
+  m_notifier.notify(bl, nullptr, nullptr);
 }
 
 void ImageWatcher::notify_released_lock() {
@@ -311,8 +306,7 @@ void ImageWatcher::notify_released_lock() {
 
   bufferlist bl;
   ::encode(NotifyMessage(ReleasedLockPayload(get_client_id())), bl);
-  m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl,
-                             Notifier::NOTIFY_TIMEOUT, NULL);
+  m_notifier.notify(bl, nullptr, nullptr);
 }
 
 void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) {
@@ -349,8 +343,14 @@ void ImageWatcher::notify_request_lock() {
 
   bufferlist bl;
   ::encode(NotifyMessage(RequestLockPayload(get_client_id())), bl);
+  notify_lock_owner(std::move(bl), create_context_callback<
+    ImageWatcher, &ImageWatcher::handle_request_lock>(this));
+}
+
+void ImageWatcher::handle_request_lock(int r) {
+  RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
+  assert(!m_image_ctx.exclusive_lock->is_lock_owner());
 
-  int r = notify_lock_owner(bl);
   if (r == -ETIMEDOUT) {
     ldout(m_image_ctx.cct, 5) << this << " timed out requesting lock: retrying"
                               << dendl;
@@ -370,65 +370,17 @@ void ImageWatcher::notify_request_lock() {
   }
 }
 
-int ImageWatcher::notify_lock_owner(bufferlist &bl) {
-  assert(m_image_ctx.owner_lock.is_locked());
-
-  // since we need to ack our own notifications, release the owner lock just in
-  // case another notification occurs before this one and it requires the lock
-  bufferlist response_bl;
-  m_image_ctx.owner_lock.put_read();
-  int r = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl,
-                                     Notifier::NOTIFY_TIMEOUT, &response_bl);
-  m_image_ctx.owner_lock.get_read();
-
-  if (r < 0 && r != -ETIMEDOUT) {
-    lderr(m_image_ctx.cct) << this << " lock owner notification failed: "
-                          << cpp_strerror(r) << dendl;
-    return r;
-  }
-
-  typedef std::map<std::pair<uint64_t, uint64_t>, bufferlist> responses_t;
-  responses_t responses;
-  if (response_bl.length() > 0) {
-    try {
-      bufferlist::iterator iter = response_bl.begin();
-      ::decode(responses, iter);
-    } catch (const buffer::error &err) {
-      lderr(m_image_ctx.cct) << this << " failed to decode response" << dendl;
-      return -EINVAL;
-    }
-  }
-
-  bufferlist response;
-  bool lock_owner_responded = false;
-  for (responses_t::iterator i = responses.begin(); i != responses.end(); ++i) {
-    if (i->second.length() > 0) {
-      if (lock_owner_responded) {
-       lderr(m_image_ctx.cct) << this << " duplicate lock owners detected"
-                               << dendl;
-       return -EIO;
-      }
-      lock_owner_responded = true;
-      response.claim(i->second);
-    }
-  }
-
-  if (!lock_owner_responded) {
-    lderr(m_image_ctx.cct) << this << " no lock owners detected" << dendl;
-    return -ETIMEDOUT;
-  }
-
-  try {
-    bufferlist::iterator iter = response.begin();
-
-    ResponseMessage response_message;
-    ::decode(response_message, iter);
+int ImageWatcher::notify_lock_owner(bufferlist &&bl) {
+  C_SaferCond ctx;
+  notify_lock_owner(std::move(bl), &ctx);
+  return ctx.wait();
+}
 
-    r = response_message.result;
-  } catch (const buffer::error &err) {
-    r = -EINVAL;
-  }
-  return r;
+void ImageWatcher::notify_lock_owner(bufferlist &&bl, Context *on_finish) {
+  assert(m_image_ctx.owner_lock.is_locked());
+  NotifyLockOwner *notify_lock_owner = NotifyLockOwner::create(
+    m_image_ctx, m_notifier, std::move(bl), on_finish);
+  notify_lock_owner->send();
 }
 
 void ImageWatcher::schedule_async_request_timed_out(const AsyncRequestId &id) {
@@ -452,7 +404,7 @@ void ImageWatcher::async_request_timed_out(const AsyncRequestId &id) {
 }
 
 int ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id,
-                                      bufferlist &in,
+                                      bufferlist &&in,
                                       ProgressContext& prog_ctx) {
   assert(m_image_ctx.owner_lock.is_locked());
 
@@ -475,7 +427,7 @@ int ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id,
   } BOOST_SCOPE_EXIT_END
 
   schedule_async_request_timed_out(async_request_id);
-  int r = notify_lock_owner(in);
+  int r = notify_lock_owner(std::move(in));
   if (r < 0) {
     return r;
   }
index 76af12858a50526acd963d2e4d637ac32b06ac74..26738a6b728cbfbd21dd2cba75c870ab63cd1be1 100644 (file)
@@ -230,22 +230,25 @@ private:
   void set_owner_client_id(const watch_notify::ClientId &client_id);
   watch_notify::ClientId get_client_id();
 
+  void handle_request_lock(int r);
   void schedule_request_lock(bool use_timer, int timer_delay = -1);
 
-  int notify_lock_owner(bufferlist &bl);
+  int notify_lock_owner(bufferlist &&bl);
+  void notify_lock_owner(bufferlist &&bl, Context *on_finish);
 
   void schedule_async_request_timed_out(const watch_notify::AsyncRequestId &id);
   void async_request_timed_out(const watch_notify::AsyncRequestId &id);
   int notify_async_request(const watch_notify::AsyncRequestId &id,
-                           bufferlist &in, ProgressContext& prog_ctx);
-  void notify_request_leadership();
+                           bufferlist &&in, ProgressContext& prog_ctx);
 
   void schedule_async_progress(const watch_notify::AsyncRequestId &id,
                                uint64_t offset, uint64_t total);
   int notify_async_progress(const watch_notify::AsyncRequestId &id,
                             uint64_t offset, uint64_t total);
   void schedule_async_complete(const watch_notify::AsyncRequestId &id, int r);
-  int notify_async_complete(const watch_notify::AsyncRequestId &id, int r);
+  void notify_async_complete(const watch_notify::AsyncRequestId &id, int r);
+  void handle_async_complete(const watch_notify::AsyncRequestId &request, int r,
+                             int ret_val);
 
   int prepare_async_request(const watch_notify::AsyncRequestId& id,
                             bool* new_request, Context** ctx,
index 22a191ab76a5182b3bd5acd2f42ad57810564b48..2cc5fdf3bc7955c2a4cadd93d893788386d788d6 100644 (file)
@@ -36,6 +36,7 @@ librbd_internal_la_SOURCES = \
        librbd/image/RefreshRequest.cc \
        librbd/image/SetSnapRequest.cc \
        librbd/image_watcher/Notifier.cc \
+       librbd/image_watcher/NotifyLockOwner.cc \
        librbd/journal/Replay.cc \
        librbd/object_map/InvalidateRequest.cc \
        librbd/object_map/LockRequest.cc \
@@ -116,6 +117,7 @@ noinst_HEADERS += \
        librbd/image/RefreshRequest.h \
        librbd/image/SetSnapRequest.h \
        librbd/image_watcher/Notifier.h \
+       librbd/image_watcher/NotifyLockOwner.h \
        librbd/journal/Replay.h \
        librbd/journal/Types.h \
        librbd/object_map/InvalidateRequest.h \
index 29cd56f787a248e5232929d59d00df60df1d3937..fc3d07cbd71f7608ec6dcc3a587e8f4bc85d5aed 100644 (file)
@@ -50,7 +50,7 @@ void Notifier::notify(bufferlist &bl, bufferlist *out_bl, Context *on_finish) {
   C_AioNotify *ctx = new C_AioNotify(this, on_finish);
   librados::AioCompletion *comp = util::create_rados_ack_callback(ctx);
   int r = m_image_ctx.md_ctx.aio_notify(m_image_ctx.header_oid, comp, bl,
-                                        NOTIFY_TIMEOUT, nullptr);
+                                        NOTIFY_TIMEOUT, out_bl);
   assert(r == 0);
   comp->release();
 }
diff --git a/src/librbd/image_watcher/NotifyLockOwner.cc b/src/librbd/image_watcher/NotifyLockOwner.cc
new file mode 100644 (file)
index 0000000..9ee0ebe
--- /dev/null
@@ -0,0 +1,105 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include "librbd/image_watcher/NotifyLockOwner.h"
+#include "common/errno.h"
+#include "librbd/ImageCtx.h"
+#include "librbd/Utils.h"
+#include "librbd/WatchNotifyTypes.h"
+#include "librbd/image_watcher/Notifier.h"
+#include <map>
+
+#define dout_subsys ceph_subsys_rbd
+#undef dout_prefix
+#define dout_prefix *_dout << "librbd::image_watcher::NotifyLockOwner: " \
+                           << this << " " << __func__
+
+namespace librbd {
+namespace image_watcher {
+
+using namespace watch_notify;
+using util::create_context_callback;
+
+NotifyLockOwner::NotifyLockOwner(ImageCtx &image_ctx, Notifier &notifier,
+                                 bufferlist &&bl, Context *on_finish)
+  : m_image_ctx(image_ctx), m_notifier(notifier), m_bl(std::move(bl)),
+    m_on_finish(on_finish) {
+}
+
+void NotifyLockOwner::send() {
+  send_notify();
+}
+
+void NotifyLockOwner::send_notify() {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << dendl;
+
+  assert(m_image_ctx.owner_lock.is_locked());
+  m_notifier.notify(m_bl, &m_out_bl, create_context_callback<
+    NotifyLockOwner, &NotifyLockOwner::handle_notify>(this));
+}
+
+void NotifyLockOwner::handle_notify(int r) {
+  CephContext *cct = m_image_ctx.cct;
+  ldout(cct, 20) << ": r=" << r << dendl;
+
+  if (r < 0 && r != -ETIMEDOUT) {
+    lderr(cct) << ": lock owner notification failed: " << cpp_strerror(r)
+               << dendl;
+    finish(r);
+    return;
+  }
+
+  typedef std::map<std::pair<uint64_t, uint64_t>, bufferlist> responses_t;
+  responses_t responses;
+  if (m_out_bl.length() > 0) {
+    try {
+      bufferlist::iterator iter = m_out_bl.begin();
+      ::decode(responses, iter);
+    } catch (const buffer::error &err) {
+      lderr(cct) << ": failed to decode response" << dendl;
+      finish(-EINVAL);
+      return;
+    }
+  }
+
+  bufferlist response;
+  bool lock_owner_responded = false;
+  for (responses_t::iterator i = responses.begin(); i != responses.end(); ++i) {
+    if (i->second.length() > 0) {
+      if (lock_owner_responded) {
+        lderr(cct) << ": duplicate lock owners detected" << dendl;
+        finish(-EINVAL);
+        return;
+      }
+      lock_owner_responded = true;
+      response.claim(i->second);
+    }
+  }
+
+  if (!lock_owner_responded) {
+    lderr(cct) << ": no lock owners detected" << dendl;
+    finish(-ETIMEDOUT);
+    return;
+  }
+
+  try {
+    bufferlist::iterator iter = response.begin();
+
+    ResponseMessage response_message;
+    ::decode(response_message, iter);
+
+    r = response_message.result;
+  } catch (const buffer::error &err) {
+    r = -EINVAL;
+  }
+  finish(r);
+}
+
+void NotifyLockOwner::finish(int r) {
+  m_on_finish->complete(r);
+  delete this;
+}
+
+} // namespace image_watcher
+} // namespace librbd
diff --git a/src/librbd/image_watcher/NotifyLockOwner.h b/src/librbd/image_watcher/NotifyLockOwner.h
new file mode 100644 (file)
index 0000000..b4bdb2d
--- /dev/null
@@ -0,0 +1,49 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H
+#define CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H
+
+#include "include/int_types.h"
+#include "include/buffer.h"
+
+class Context;
+
+namespace librbd {
+
+struct ImageCtx;
+
+namespace image_watcher {
+
+class Notifier;
+
+class NotifyLockOwner {
+public:
+  static NotifyLockOwner *create(ImageCtx &image_ctx, Notifier &notifier,
+                                 bufferlist &&bl, Context *on_finish) {
+    return new NotifyLockOwner(image_ctx, notifier, std::move(bl), on_finish);
+  }
+
+  NotifyLockOwner(ImageCtx &image_ctx, Notifier &notifier, bufferlist &&bl,
+                  Context *on_finish);
+
+  void send();
+
+private:
+  ImageCtx &m_image_ctx;
+  Notifier &m_notifier;
+
+  bufferlist m_bl;
+  bufferlist m_out_bl;
+  Context *m_on_finish;
+
+  void send_notify();
+  void handle_notify(int r);
+
+  void finish(int r);
+};
+
+} // namespace image_watcher
+} // namespace librbd
+
+#endif // CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H
index a35d081ce2713a4481ff45232508267fca8255af..3e42690dbb7696db49f5186a8084aa2d006979ec 100644 (file)
@@ -7,6 +7,8 @@
 #include <boost/bind.hpp>
 #include <boost/function.hpp>
 
+#define dout_subsys ceph_subsys_rados
+
 namespace librados {
 
 TestWatchNotify::TestWatchNotify(CephContext *cct, Finisher *finisher)
@@ -81,6 +83,9 @@ int TestWatchNotify::notify(const std::string& oid, bufferlist& bl,
 void TestWatchNotify::notify_ack(const std::string& o, uint64_t notify_id,
                                  uint64_t handle, uint64_t gid,
                                  bufferlist& bl) {
+  ldout(m_cct, 20) << __func__ << ": notify_id=" << notify_id << ", "
+                   << "handle=" << handle << ", "
+                   << "gid=" << gid << dendl;
   Mutex::Locker lock(m_lock);
   WatcherID watcher_id = std::make_pair(gid, handle);
   ack_notify(o, notify_id, watcher_id, bl);