From bc2ae0ef569d7242c7a3fb34110a2bf76e047d9b Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 16 Feb 2016 10:10:16 -0500 Subject: [PATCH] librbd: use AIO notifications to prevent blocking ops If two or more images share the same CephContext, notifications from one image can block the work queue which will potentially block acknowledging the notification until after it times out. Signed-off-by: Jason Dillaman --- src/librbd/ImageWatcher.cc | 140 ++++++------------ src/librbd/ImageWatcher.h | 11 +- src/librbd/Makefile.am | 2 + src/librbd/image_watcher/Notifier.cc | 2 +- src/librbd/image_watcher/NotifyLockOwner.cc | 105 +++++++++++++ src/librbd/image_watcher/NotifyLockOwner.h | 49 ++++++ .../librados_test_stub/TestWatchNotify.cc | 5 + 7 files changed, 215 insertions(+), 99 deletions(-) create mode 100644 src/librbd/image_watcher/NotifyLockOwner.cc create mode 100644 src/librbd/image_watcher/NotifyLockOwner.h diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index a85b44375aed1..35b5bf9b1aa14 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -12,6 +12,7 @@ #include "librbd/TaskFinisher.h" #include "librbd/Utils.h" #include "librbd/image_watcher/Notifier.h" +#include "librbd/image_watcher/NotifyLockOwner.h" #include "include/encoding.h" #include "include/stringify.h" #include "common/errno.h" @@ -28,6 +29,7 @@ namespace librbd { using namespace image_watcher; using namespace watch_notify; +using util::create_context_callback; static const double RETRY_DELAY_SECONDS = 1.0; @@ -109,8 +111,7 @@ int ImageWatcher::notify_async_progress(const AsyncRequestId &request, bufferlist bl; ::encode(NotifyMessage(AsyncProgressPayload(request, offset, total)), bl); - m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, - Notifier::NOTIFY_TIMEOUT, NULL); + m_notifier.notify(bl, nullptr, nullptr); return 0; } @@ -121,30 +122,31 @@ void ImageWatcher::schedule_async_complete(const AsyncRequestId &request, m_task_finisher->queue(ctx); } -int ImageWatcher::notify_async_complete(const AsyncRequestId &request, - int r) { +void ImageWatcher::notify_async_complete(const AsyncRequestId &request, int r) { ldout(m_image_ctx.cct, 20) << this << " remote async request finished: " << request << " = " << r << dendl; bufferlist bl; ::encode(NotifyMessage(AsyncCompletePayload(request, r)), bl); + m_notifier.notify(bl, nullptr, new FunctionContext( + boost::bind(&ImageWatcher::handle_async_complete, this, request, r, _1))); +} - if (r >= 0) { - m_image_ctx.notify_update(); - } - int ret = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, - Notifier::NOTIFY_TIMEOUT, NULL); - if (ret < 0) { +void ImageWatcher::handle_async_complete(const AsyncRequestId &request, int r, + int ret_val) { + ldout(m_image_ctx.cct, 20) << this << " " << __func__ << ": " + << "request=" << request << ", r=" << ret_val + << dendl; + if (ret_val < 0) { lderr(m_image_ctx.cct) << this << " failed to notify async complete: " - << cpp_strerror(ret) << dendl; - if (ret == -ETIMEDOUT) { + << cpp_strerror(ret_val) << dendl; + if (ret_val == -ETIMEDOUT) { schedule_async_complete(request, r); } } else { - RWLock::WLocker l(m_async_request_lock); + RWLock::WLocker async_request_locker(m_async_request_lock); m_async_pending.erase(request); } - return 0; } int ImageWatcher::notify_flatten(uint64_t request_id, ProgressContext &prog_ctx) { @@ -156,8 +158,7 @@ int ImageWatcher::notify_flatten(uint64_t request_id, ProgressContext &prog_ctx) bufferlist bl; ::encode(NotifyMessage(FlattenPayload(async_request_id)), bl); - - return notify_async_request(async_request_id, bl, prog_ctx); + return notify_async_request(async_request_id, std::move(bl), prog_ctx); } int ImageWatcher::notify_resize(uint64_t request_id, uint64_t size, @@ -170,8 +171,7 @@ int ImageWatcher::notify_resize(uint64_t request_id, uint64_t size, bufferlist bl; ::encode(NotifyMessage(ResizePayload(size, async_request_id)), bl); - - return notify_async_request(async_request_id, bl, prog_ctx); + return notify_async_request(async_request_id, std::move(bl), prog_ctx); } int ImageWatcher::notify_snap_create(const std::string &snap_name) { @@ -181,8 +181,7 @@ int ImageWatcher::notify_snap_create(const std::string &snap_name) { bufferlist bl; ::encode(NotifyMessage(SnapCreatePayload(snap_name)), bl); - - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } int ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id, @@ -193,8 +192,7 @@ int ImageWatcher::notify_snap_rename(const snapid_t &src_snap_id, bufferlist bl; ::encode(NotifyMessage(SnapRenamePayload(src_snap_id, dst_snap_name)), bl); - - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } int ImageWatcher::notify_snap_remove(const std::string &snap_name) { assert(m_image_ctx.owner_lock.is_locked()); @@ -203,8 +201,7 @@ int ImageWatcher::notify_snap_remove(const std::string &snap_name) { bufferlist bl; ::encode(NotifyMessage(SnapRemovePayload(snap_name)), bl); - - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } int ImageWatcher::notify_snap_protect(const std::string &snap_name) { @@ -214,7 +211,7 @@ int ImageWatcher::notify_snap_protect(const std::string &snap_name) { bufferlist bl; ::encode(NotifyMessage(SnapProtectPayload(snap_name)), bl); - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } int ImageWatcher::notify_snap_unprotect(const std::string &snap_name) { @@ -224,7 +221,7 @@ int ImageWatcher::notify_snap_unprotect(const std::string &snap_name) { bufferlist bl; ::encode(NotifyMessage(SnapUnprotectPayload(snap_name)), bl); - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } int ImageWatcher::notify_rebuild_object_map(uint64_t request_id, @@ -237,8 +234,7 @@ int ImageWatcher::notify_rebuild_object_map(uint64_t request_id, bufferlist bl; ::encode(NotifyMessage(RebuildObjectMapPayload(async_request_id)), bl); - - return notify_async_request(async_request_id, bl, prog_ctx); + return notify_async_request(async_request_id, std::move(bl), prog_ctx); } int ImageWatcher::notify_rename(const std::string &image_name) { @@ -248,7 +244,7 @@ int ImageWatcher::notify_rename(const std::string &image_name) { bufferlist bl; ::encode(NotifyMessage(RenamePayload(image_name)), bl); - return notify_lock_owner(bl); + return notify_lock_owner(std::move(bl)); } void ImageWatcher::notify_header_update(Context *on_finish) { @@ -297,8 +293,7 @@ void ImageWatcher::notify_acquired_lock() { bufferlist bl; ::encode(NotifyMessage(AcquiredLockPayload(client_id)), bl); - m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, - Notifier::NOTIFY_TIMEOUT, NULL); + m_notifier.notify(bl, nullptr, nullptr); } void ImageWatcher::notify_released_lock() { @@ -311,8 +306,7 @@ void ImageWatcher::notify_released_lock() { bufferlist bl; ::encode(NotifyMessage(ReleasedLockPayload(get_client_id())), bl); - m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, - Notifier::NOTIFY_TIMEOUT, NULL); + m_notifier.notify(bl, nullptr, nullptr); } void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) { @@ -349,8 +343,14 @@ void ImageWatcher::notify_request_lock() { bufferlist bl; ::encode(NotifyMessage(RequestLockPayload(get_client_id())), bl); + notify_lock_owner(std::move(bl), create_context_callback< + ImageWatcher, &ImageWatcher::handle_request_lock>(this)); +} + +void ImageWatcher::handle_request_lock(int r) { + RWLock::RLocker owner_locker(m_image_ctx.owner_lock); + assert(!m_image_ctx.exclusive_lock->is_lock_owner()); - int r = notify_lock_owner(bl); if (r == -ETIMEDOUT) { ldout(m_image_ctx.cct, 5) << this << " timed out requesting lock: retrying" << dendl; @@ -370,65 +370,17 @@ void ImageWatcher::notify_request_lock() { } } -int ImageWatcher::notify_lock_owner(bufferlist &bl) { - assert(m_image_ctx.owner_lock.is_locked()); - - // since we need to ack our own notifications, release the owner lock just in - // case another notification occurs before this one and it requires the lock - bufferlist response_bl; - m_image_ctx.owner_lock.put_read(); - int r = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, - Notifier::NOTIFY_TIMEOUT, &response_bl); - m_image_ctx.owner_lock.get_read(); - - if (r < 0 && r != -ETIMEDOUT) { - lderr(m_image_ctx.cct) << this << " lock owner notification failed: " - << cpp_strerror(r) << dendl; - return r; - } - - typedef std::map, bufferlist> responses_t; - responses_t responses; - if (response_bl.length() > 0) { - try { - bufferlist::iterator iter = response_bl.begin(); - ::decode(responses, iter); - } catch (const buffer::error &err) { - lderr(m_image_ctx.cct) << this << " failed to decode response" << dendl; - return -EINVAL; - } - } - - bufferlist response; - bool lock_owner_responded = false; - for (responses_t::iterator i = responses.begin(); i != responses.end(); ++i) { - if (i->second.length() > 0) { - if (lock_owner_responded) { - lderr(m_image_ctx.cct) << this << " duplicate lock owners detected" - << dendl; - return -EIO; - } - lock_owner_responded = true; - response.claim(i->second); - } - } - - if (!lock_owner_responded) { - lderr(m_image_ctx.cct) << this << " no lock owners detected" << dendl; - return -ETIMEDOUT; - } - - try { - bufferlist::iterator iter = response.begin(); - - ResponseMessage response_message; - ::decode(response_message, iter); +int ImageWatcher::notify_lock_owner(bufferlist &&bl) { + C_SaferCond ctx; + notify_lock_owner(std::move(bl), &ctx); + return ctx.wait(); +} - r = response_message.result; - } catch (const buffer::error &err) { - r = -EINVAL; - } - return r; +void ImageWatcher::notify_lock_owner(bufferlist &&bl, Context *on_finish) { + assert(m_image_ctx.owner_lock.is_locked()); + NotifyLockOwner *notify_lock_owner = NotifyLockOwner::create( + m_image_ctx, m_notifier, std::move(bl), on_finish); + notify_lock_owner->send(); } void ImageWatcher::schedule_async_request_timed_out(const AsyncRequestId &id) { @@ -452,7 +404,7 @@ void ImageWatcher::async_request_timed_out(const AsyncRequestId &id) { } int ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id, - bufferlist &in, + bufferlist &&in, ProgressContext& prog_ctx) { assert(m_image_ctx.owner_lock.is_locked()); @@ -475,7 +427,7 @@ int ImageWatcher::notify_async_request(const AsyncRequestId &async_request_id, } BOOST_SCOPE_EXIT_END schedule_async_request_timed_out(async_request_id); - int r = notify_lock_owner(in); + int r = notify_lock_owner(std::move(in)); if (r < 0) { return r; } diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index 76af12858a505..26738a6b728cb 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -230,22 +230,25 @@ private: void set_owner_client_id(const watch_notify::ClientId &client_id); watch_notify::ClientId get_client_id(); + void handle_request_lock(int r); void schedule_request_lock(bool use_timer, int timer_delay = -1); - int notify_lock_owner(bufferlist &bl); + int notify_lock_owner(bufferlist &&bl); + void notify_lock_owner(bufferlist &&bl, Context *on_finish); void schedule_async_request_timed_out(const watch_notify::AsyncRequestId &id); void async_request_timed_out(const watch_notify::AsyncRequestId &id); int notify_async_request(const watch_notify::AsyncRequestId &id, - bufferlist &in, ProgressContext& prog_ctx); - void notify_request_leadership(); + bufferlist &&in, ProgressContext& prog_ctx); void schedule_async_progress(const watch_notify::AsyncRequestId &id, uint64_t offset, uint64_t total); int notify_async_progress(const watch_notify::AsyncRequestId &id, uint64_t offset, uint64_t total); void schedule_async_complete(const watch_notify::AsyncRequestId &id, int r); - int notify_async_complete(const watch_notify::AsyncRequestId &id, int r); + void notify_async_complete(const watch_notify::AsyncRequestId &id, int r); + void handle_async_complete(const watch_notify::AsyncRequestId &request, int r, + int ret_val); int prepare_async_request(const watch_notify::AsyncRequestId& id, bool* new_request, Context** ctx, diff --git a/src/librbd/Makefile.am b/src/librbd/Makefile.am index 22a191ab76a51..2cc5fdf3bc795 100644 --- a/src/librbd/Makefile.am +++ b/src/librbd/Makefile.am @@ -36,6 +36,7 @@ librbd_internal_la_SOURCES = \ librbd/image/RefreshRequest.cc \ librbd/image/SetSnapRequest.cc \ librbd/image_watcher/Notifier.cc \ + librbd/image_watcher/NotifyLockOwner.cc \ librbd/journal/Replay.cc \ librbd/object_map/InvalidateRequest.cc \ librbd/object_map/LockRequest.cc \ @@ -116,6 +117,7 @@ noinst_HEADERS += \ librbd/image/RefreshRequest.h \ librbd/image/SetSnapRequest.h \ librbd/image_watcher/Notifier.h \ + librbd/image_watcher/NotifyLockOwner.h \ librbd/journal/Replay.h \ librbd/journal/Types.h \ librbd/object_map/InvalidateRequest.h \ diff --git a/src/librbd/image_watcher/Notifier.cc b/src/librbd/image_watcher/Notifier.cc index 29cd56f787a24..fc3d07cbd71f7 100644 --- a/src/librbd/image_watcher/Notifier.cc +++ b/src/librbd/image_watcher/Notifier.cc @@ -50,7 +50,7 @@ void Notifier::notify(bufferlist &bl, bufferlist *out_bl, Context *on_finish) { C_AioNotify *ctx = new C_AioNotify(this, on_finish); librados::AioCompletion *comp = util::create_rados_ack_callback(ctx); int r = m_image_ctx.md_ctx.aio_notify(m_image_ctx.header_oid, comp, bl, - NOTIFY_TIMEOUT, nullptr); + NOTIFY_TIMEOUT, out_bl); assert(r == 0); comp->release(); } diff --git a/src/librbd/image_watcher/NotifyLockOwner.cc b/src/librbd/image_watcher/NotifyLockOwner.cc new file mode 100644 index 0000000000000..9ee0ebe8a2d97 --- /dev/null +++ b/src/librbd/image_watcher/NotifyLockOwner.cc @@ -0,0 +1,105 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include "librbd/image_watcher/NotifyLockOwner.h" +#include "common/errno.h" +#include "librbd/ImageCtx.h" +#include "librbd/Utils.h" +#include "librbd/WatchNotifyTypes.h" +#include "librbd/image_watcher/Notifier.h" +#include + +#define dout_subsys ceph_subsys_rbd +#undef dout_prefix +#define dout_prefix *_dout << "librbd::image_watcher::NotifyLockOwner: " \ + << this << " " << __func__ + +namespace librbd { +namespace image_watcher { + +using namespace watch_notify; +using util::create_context_callback; + +NotifyLockOwner::NotifyLockOwner(ImageCtx &image_ctx, Notifier ¬ifier, + bufferlist &&bl, Context *on_finish) + : m_image_ctx(image_ctx), m_notifier(notifier), m_bl(std::move(bl)), + m_on_finish(on_finish) { +} + +void NotifyLockOwner::send() { + send_notify(); +} + +void NotifyLockOwner::send_notify() { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << dendl; + + assert(m_image_ctx.owner_lock.is_locked()); + m_notifier.notify(m_bl, &m_out_bl, create_context_callback< + NotifyLockOwner, &NotifyLockOwner::handle_notify>(this)); +} + +void NotifyLockOwner::handle_notify(int r) { + CephContext *cct = m_image_ctx.cct; + ldout(cct, 20) << ": r=" << r << dendl; + + if (r < 0 && r != -ETIMEDOUT) { + lderr(cct) << ": lock owner notification failed: " << cpp_strerror(r) + << dendl; + finish(r); + return; + } + + typedef std::map, bufferlist> responses_t; + responses_t responses; + if (m_out_bl.length() > 0) { + try { + bufferlist::iterator iter = m_out_bl.begin(); + ::decode(responses, iter); + } catch (const buffer::error &err) { + lderr(cct) << ": failed to decode response" << dendl; + finish(-EINVAL); + return; + } + } + + bufferlist response; + bool lock_owner_responded = false; + for (responses_t::iterator i = responses.begin(); i != responses.end(); ++i) { + if (i->second.length() > 0) { + if (lock_owner_responded) { + lderr(cct) << ": duplicate lock owners detected" << dendl; + finish(-EINVAL); + return; + } + lock_owner_responded = true; + response.claim(i->second); + } + } + + if (!lock_owner_responded) { + lderr(cct) << ": no lock owners detected" << dendl; + finish(-ETIMEDOUT); + return; + } + + try { + bufferlist::iterator iter = response.begin(); + + ResponseMessage response_message; + ::decode(response_message, iter); + + r = response_message.result; + } catch (const buffer::error &err) { + r = -EINVAL; + } + finish(r); +} + +void NotifyLockOwner::finish(int r) { + m_on_finish->complete(r); + delete this; +} + +} // namespace image_watcher +} // namespace librbd diff --git a/src/librbd/image_watcher/NotifyLockOwner.h b/src/librbd/image_watcher/NotifyLockOwner.h new file mode 100644 index 0000000000000..b4bdb2da3fc1e --- /dev/null +++ b/src/librbd/image_watcher/NotifyLockOwner.h @@ -0,0 +1,49 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H +#define CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H + +#include "include/int_types.h" +#include "include/buffer.h" + +class Context; + +namespace librbd { + +struct ImageCtx; + +namespace image_watcher { + +class Notifier; + +class NotifyLockOwner { +public: + static NotifyLockOwner *create(ImageCtx &image_ctx, Notifier ¬ifier, + bufferlist &&bl, Context *on_finish) { + return new NotifyLockOwner(image_ctx, notifier, std::move(bl), on_finish); + } + + NotifyLockOwner(ImageCtx &image_ctx, Notifier ¬ifier, bufferlist &&bl, + Context *on_finish); + + void send(); + +private: + ImageCtx &m_image_ctx; + Notifier &m_notifier; + + bufferlist m_bl; + bufferlist m_out_bl; + Context *m_on_finish; + + void send_notify(); + void handle_notify(int r); + + void finish(int r); +}; + +} // namespace image_watcher +} // namespace librbd + +#endif // CEPH_LIBRBD_IMAGE_WATCHER_NOTIFY_LOCK_OWNER_H diff --git a/src/test/librados_test_stub/TestWatchNotify.cc b/src/test/librados_test_stub/TestWatchNotify.cc index a35d081ce2713..3e42690dbb769 100644 --- a/src/test/librados_test_stub/TestWatchNotify.cc +++ b/src/test/librados_test_stub/TestWatchNotify.cc @@ -7,6 +7,8 @@ #include #include +#define dout_subsys ceph_subsys_rados + namespace librados { TestWatchNotify::TestWatchNotify(CephContext *cct, Finisher *finisher) @@ -81,6 +83,9 @@ int TestWatchNotify::notify(const std::string& oid, bufferlist& bl, void TestWatchNotify::notify_ack(const std::string& o, uint64_t notify_id, uint64_t handle, uint64_t gid, bufferlist& bl) { + ldout(m_cct, 20) << __func__ << ": notify_id=" << notify_id << ", " + << "handle=" << handle << ", " + << "gid=" << gid << dendl; Mutex::Locker lock(m_lock); WatcherID watcher_id = std::make_pair(gid, handle); ack_notify(o, notify_id, watcher_id, bl); -- 2.39.5