From 6eeede997d20217d99cbe1b7627fb6aa8de5e8c3 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Tue, 1 May 2018 15:16:29 -0400 Subject: [PATCH] librbd: prevent watcher from unregistering with in-flight actions Fixes: http://tracker.ceph.com/issues/23955 Signed-off-by: Jason Dillaman --- src/librbd/ImageWatcher.cc | 2 +- src/librbd/Watcher.cc | 147 ++++++++++++++++++++++++------------- src/librbd/Watcher.h | 18 +++-- 3 files changed, 109 insertions(+), 58 deletions(-) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 939eb8f7346..cbd4be98d58 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -367,7 +367,7 @@ void ImageWatcher::schedule_request_lock(bool use_timer, int timer_delay) { !m_image_ctx.exclusive_lock->is_lock_owner()); RWLock::RLocker watch_locker(this->m_watch_lock); - if (this->m_watch_state == Watcher::WATCH_STATE_REGISTERED) { + if (this->is_registered(this->m_watch_lock)) { ldout(m_image_ctx.cct, 15) << this << " requesting exclusive lock" << dendl; FunctionContext *ctx = new FunctionContext( diff --git a/src/librbd/Watcher.cc b/src/librbd/Watcher.cc index 9c3ef192745..9a72fc13a76 100644 --- a/src/librbd/Watcher.cc +++ b/src/librbd/Watcher.cc @@ -93,23 +93,23 @@ Watcher::Watcher(librados::IoCtx& ioctx, ContextWQ *work_queue, m_cct(reinterpret_cast(ioctx.cct())), m_watch_lock(util::unique_lock_name("librbd::Watcher::m_watch_lock", this)), m_watch_handle(0), m_notifier(work_queue, ioctx, oid), - m_watch_state(WATCH_STATE_UNREGISTERED), m_watch_ctx(*this) { + m_watch_state(WATCH_STATE_IDLE), m_watch_ctx(*this) { } Watcher::~Watcher() { RWLock::RLocker l(m_watch_lock); - assert(m_watch_state != WATCH_STATE_REGISTERED); + assert(is_unregistered(m_watch_lock)); } void Watcher::register_watch(Context *on_finish) { ldout(m_cct, 10) << dendl; RWLock::RLocker watch_locker(m_watch_lock); - assert(m_watch_state == WATCH_STATE_UNREGISTERED); + assert(is_unregistered(m_watch_lock)); m_watch_state = WATCH_STATE_REGISTERING; librados::AioCompletion *aio_comp = create_rados_callback( - new C_RegisterWatch(this, on_finish)); + new C_RegisterWatch(this, on_finish)); int r = m_ioctx.aio_watch(m_oid, aio_comp, &m_watch_handle, &m_watch_ctx); assert(r == 0); aio_comp->release(); @@ -117,27 +117,35 @@ void Watcher::register_watch(Context *on_finish) { void Watcher::handle_register_watch(int r, Context *on_finish) { ldout(m_cct, 10) << "r=" << r << dendl; + + bool watch_error = false; Context *unregister_watch_ctx = nullptr; { RWLock::WLocker watch_locker(m_watch_lock); assert(m_watch_state == WATCH_STATE_REGISTERING); - std::swap(unregister_watch_ctx, m_unregister_watch_ctx); + m_watch_state = WATCH_STATE_IDLE; if (r < 0) { lderr(m_cct) << "failed to register watch: " << cpp_strerror(r) << dendl; m_watch_handle = 0; - m_watch_state = WATCH_STATE_UNREGISTERED; - } else { - m_watch_state = WATCH_STATE_REGISTERED; + } + + if (m_unregister_watch_ctx != nullptr) { + std::swap(unregister_watch_ctx, m_unregister_watch_ctx); + } else if (r == 0 && m_watch_error) { + lderr(m_cct) << "re-registering watch after error" << dendl; + m_watch_state = WATCH_STATE_REWATCHING; + watch_error = true; } } on_finish->complete(r); - // wake up pending unregister request if (unregister_watch_ctx != nullptr) { unregister_watch_ctx->complete(0); + } else if (watch_error) { + rewatch(); } } @@ -146,8 +154,7 @@ void Watcher::unregister_watch(Context *on_finish) { { RWLock::WLocker watch_locker(m_watch_lock); - if (m_watch_state == WATCH_STATE_REGISTERING || - m_watch_state == WATCH_STATE_REWATCHING) { + if (m_watch_state != WATCH_STATE_IDLE) { ldout(m_cct, 10) << "delaying unregister until register completed" << dendl; @@ -156,17 +163,13 @@ void Watcher::unregister_watch(Context *on_finish) { unregister_watch(on_finish); }); return; - } - - if (m_watch_state == WATCH_STATE_REGISTERED || - m_watch_state == WATCH_STATE_ERROR) { - m_watch_state = WATCH_STATE_UNREGISTERED; - + } else if (is_registered(m_watch_lock)) { librados::AioCompletion *aio_comp = create_rados_callback( new C_UnwatchAndFlush(m_ioctx, on_finish)); int r = m_ioctx.aio_unwatch(m_watch_handle, aio_comp); assert(r == 0); aio_comp->release(); + m_watch_handle = 0; return; } } @@ -208,8 +211,8 @@ std::string Watcher::get_oid() const { } void Watcher::set_oid(const string& oid) { - RWLock::WLocker l(m_watch_lock); - assert(m_watch_state == WATCH_STATE_UNREGISTERED); + RWLock::WLocker watch_locker(m_watch_lock); + assert(is_unregistered(m_watch_lock)); m_oid = oid; } @@ -217,9 +220,11 @@ void Watcher::set_oid(const string& oid) { void Watcher::handle_error(uint64_t handle, int err) { lderr(m_cct) << "handle=" << handle << ": " << cpp_strerror(err) << dendl; - RWLock::WLocker l(m_watch_lock); - if (m_watch_state == WATCH_STATE_REGISTERED) { - m_watch_state = WATCH_STATE_ERROR; + RWLock::WLocker watch_locker(m_watch_lock); + m_watch_error = true; + + if (is_registered(m_watch_lock)) { + m_watch_state = WATCH_STATE_REWATCHING; FunctionContext *ctx = new FunctionContext( boost::bind(&Watcher::rewatch, this)); @@ -235,55 +240,93 @@ void Watcher::acknowledge_notify(uint64_t notify_id, uint64_t handle, void Watcher::rewatch() { ldout(m_cct, 10) << dendl; - RWLock::WLocker l(m_watch_lock); - if (m_watch_state != WATCH_STATE_ERROR) { - return; + Context *unregister_watch_ctx = nullptr; + { + RWLock::WLocker watch_locker(m_watch_lock); + assert(m_watch_state == WATCH_STATE_REWATCHING); + + if (m_unregister_watch_ctx != nullptr) { + m_watch_state = WATCH_STATE_IDLE; + std::swap(unregister_watch_ctx, m_unregister_watch_ctx); + } else { + m_watch_error = false; + auto ctx = create_context_callback< + Watcher, &Watcher::handle_rewatch>(this); + auto req = RewatchRequest::create(m_ioctx, m_oid, m_watch_lock, + &m_watch_ctx, &m_watch_handle, ctx); + req->send(); + return; + } } - m_watch_state = WATCH_STATE_REWATCHING; - - Context *ctx = create_context_callback(this); - RewatchRequest *req = RewatchRequest::create(m_ioctx, m_oid, m_watch_lock, - &m_watch_ctx, - &m_watch_handle, ctx); - req->send(); + + unregister_watch_ctx->complete(0); } void Watcher::handle_rewatch(int r) { ldout(m_cct, 10) << "r=" << r << dendl; - WatchState next_watch_state = WATCH_STATE_REGISTERED; - if (r == -EBLACKLISTED) { - lderr(m_cct) << "client blacklisted" << dendl; - next_watch_state = WATCH_STATE_UNREGISTERED; - } else if (r == -ENOENT) { - lderr(m_cct) << "failed to unwatch: " << cpp_strerror(r) << dendl; - next_watch_state = WATCH_STATE_UNREGISTERED; - } else if (r < 0) { - next_watch_state = WATCH_STATE_ERROR; + bool watch_error = false; + Context *unregister_watch_ctx = nullptr; + { + RWLock::WLocker watch_locker(m_watch_lock); + assert(m_watch_state == WATCH_STATE_REWATCHING); + if (m_unregister_watch_ctx != nullptr) { ldout(m_cct, 10) << "image is closing, skip rewatch" << dendl; - } else { - rewatch(); - } + m_watch_state = WATCH_STATE_IDLE; + std::swap(unregister_watch_ctx, m_unregister_watch_ctx); + } else if (r == -EBLACKLISTED) { + lderr(m_cct) << "client blacklisted" << dendl; + } else if (r == -ENOENT) { + ldout(m_cct, 5) << "object does not exist" << dendl; + } else if (r < 0) { + lderr(m_cct) << "failed to rewatch: " << cpp_strerror(r) << dendl; + watch_error = true; + } else if (m_watch_error) { + lderr(m_cct) << "re-registering watch after error" << dendl; + watch_error = true; + } } + if (unregister_watch_ctx != nullptr) { + unregister_watch_ctx->complete(0); + return; + } else if (watch_error) { + rewatch(); + return; + } + + auto ctx = create_context_callback< + Watcher, &Watcher::handle_rewatch_callback>(this); + m_work_queue->queue(ctx, r); +} + +void Watcher::handle_rewatch_callback(int r) { + ldout(m_cct, 10) << "r=" << r << dendl; + handle_rewatch_complete(r); + + bool watch_error = false; Context *unregister_watch_ctx = nullptr; { RWLock::WLocker watch_locker(m_watch_lock); assert(m_watch_state == WATCH_STATE_REWATCHING); - m_watch_state = next_watch_state; - - std::swap(unregister_watch_ctx, m_unregister_watch_ctx); - m_work_queue->queue( - create_context_callback(this), r); + if (m_unregister_watch_ctx != nullptr) { + m_watch_state = WATCH_STATE_IDLE; + std::swap(unregister_watch_ctx, m_unregister_watch_ctx); + } else if (r == -EBLACKLISTED || r == -ENOENT) { + m_watch_state = WATCH_STATE_IDLE; + } else if (m_watch_error) { + watch_error = true; + } else { + m_watch_state = WATCH_STATE_IDLE; + } } - // wake up pending unregister request if (unregister_watch_ctx != nullptr) { unregister_watch_ctx->complete(0); + } else if (watch_error) { + rewatch(); } } diff --git a/src/librbd/Watcher.h b/src/librbd/Watcher.h index 39009027d9e..518fbdd015e 100644 --- a/src/librbd/Watcher.h +++ b/src/librbd/Watcher.h @@ -54,19 +54,17 @@ public: bool is_registered() const { RWLock::RLocker locker(m_watch_lock); - return m_watch_state == WATCH_STATE_REGISTERED; + return is_registered(m_watch_lock); } bool is_unregistered() const { RWLock::RLocker locker(m_watch_lock); - return m_watch_state == WATCH_STATE_UNREGISTERED; + return is_unregistered(m_watch_lock); } protected: enum WatchState { - WATCH_STATE_UNREGISTERED, + WATCH_STATE_IDLE, WATCH_STATE_REGISTERING, - WATCH_STATE_REGISTERED, - WATCH_STATE_ERROR, WATCH_STATE_REWATCHING }; @@ -80,6 +78,13 @@ protected: WatchState m_watch_state; AsyncOpTracker m_async_op_tracker; + bool is_registered(const RWLock&) const { + return (m_watch_state == WATCH_STATE_IDLE && m_watch_handle != 0); + } + bool is_unregistered(const RWLock&) const { + return (m_watch_state == WATCH_STATE_IDLE && m_watch_handle == 0); + } + void send_notify(bufferlist &payload, watcher::NotifyResponse *response = nullptr, Context *on_finish = nullptr); @@ -155,12 +160,15 @@ private: WatchCtx m_watch_ctx; Context *m_unregister_watch_ctx = nullptr; + bool m_watch_error = false; + uint32_t m_blocked_count = 0; void handle_register_watch(int r, Context *on_finish); void rewatch(); void handle_rewatch(int r); + void handle_rewatch_callback(int r); }; -- 2.39.5