From cd9d8eb79a1abf9b982d9236385ced396cb1339f Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Wed, 14 Jan 2015 15:56:15 -0500 Subject: [PATCH] librbd: add more robust retry handling to maintenance ops When image locking is enabled, snapshot create, resize, and flatten are coordinated with the lock owner. Previously, if the the lock owner changed during one of this operations, the operation would fail. Now librbd will attempt to restart the operation with the new lock owner (or become the owner itself). Signed-off-by: Jason Dillaman --- src/librbd/ImageWatcher.cc | 62 +++++++-- src/librbd/ImageWatcher.h | 1 - src/librbd/internal.cc | 216 ++++++++++++++++++------------- src/test/librbd/test_internal.cc | 16 +-- 4 files changed, 185 insertions(+), 110 deletions(-) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 9f95918ce278a..13cf69bee740a 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -267,12 +267,21 @@ int ImageWatcher::request_lock( } bool ImageWatcher::try_request_lock() { - RWLock::WLocker l(m_image_ctx.owner_lock); + assert(m_image_ctx.owner_lock.is_locked()); if (is_lock_owner()) { return true; } - int r = try_lock(); + int r = 0; + m_image_ctx.owner_lock.put_read(); + { + RWLock::WLocker l(m_image_ctx.owner_lock); + if (!is_lock_owner()) { + r = try_lock(); + } + } + m_image_ctx.owner_lock.get_read(); + if (r < 0) { ldout(m_image_ctx.cct, 5) << "failed to acquire exclusive lock:" << cpp_strerror(r) << dendl; @@ -292,8 +301,14 @@ bool ImageWatcher::try_request_lock() { void ImageWatcher::finalize_request_lock() { cancel_retry_aio_requests(); - if (try_request_lock()) { + bool owned_lock; + { + RWLock::RLocker l(m_image_ctx.owner_lock); + owned_lock = try_request_lock(); + } + if (owned_lock) { retry_aio_requests(); + } else { schedule_retry_aio_requests(); } @@ -450,6 +465,9 @@ int ImageWatcher::notify_async_complete(const RemoteAsyncRequest &request, } int ImageWatcher::notify_flatten(ProgressContext &prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; uint64_t async_request_id; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); @@ -461,6 +479,9 @@ int ImageWatcher::notify_flatten(ProgressContext &prog_ctx) { } int ImageWatcher::notify_resize(uint64_t size, ProgressContext &prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; uint64_t async_request_id; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); @@ -473,6 +494,9 @@ int ImageWatcher::notify_resize(uint64_t size, ProgressContext &prog_ctx) { } int ImageWatcher::notify_snap_create(const std::string &snap_name) { + assert(m_image_ctx.owner_lock.is_locked()); + assert(!is_lock_owner()); + bufferlist bl; ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, bl); ::encode(NOTIFY_OP_SNAP_CREATE, bl); @@ -599,10 +623,14 @@ uint64_t ImageWatcher::encode_async_request(bufferlist &bl) { int ImageWatcher::decode_response_code(bufferlist &bl) { int r; - bufferlist::iterator iter = bl.begin(); - DECODE_START(NOTIFY_VERSION, iter); - ::decode(r, iter); - DECODE_FINISH(iter); + try { + bufferlist::iterator iter = bl.begin(); + DECODE_START(NOTIFY_VERSION, iter); + ::decode(r, iter); + DECODE_FINISH(iter); + } catch (const buffer::error &err) { + r = -EINVAL; + } return r; } @@ -617,7 +645,9 @@ void ImageWatcher::notify_released_lock() { void ImageWatcher::notify_request_lock() { cancel_retry_aio_requests(); + m_image_ctx.owner_lock.get_read(); if (try_request_lock()) { + m_image_ctx.owner_lock.put_read(); retry_aio_requests(); return; } @@ -629,6 +659,8 @@ void ImageWatcher::notify_request_lock() { bufferlist response; int r = notify_lock_owner(bl, response); + m_image_ctx.owner_lock.put_read(); + if (r == -ETIMEDOUT) { ldout(m_image_ctx.cct, 5) << "timed out requesting lock: retrying" << dendl; retry_aio_requests(); @@ -640,9 +672,15 @@ void ImageWatcher::notify_request_lock() { } int ImageWatcher::notify_lock_owner(bufferlist &bl, bufferlist& response) { + assert(m_image_ctx.owner_lock.is_locked()); + + // since we need to ack our own notifications, release the owner lock just in + // case another notification occurs before this one and it requires the lock bufferlist response_bl; + m_image_ctx.owner_lock.put_read(); int r = m_image_ctx.md_ctx.notify2(m_image_ctx.header_oid, bl, NOTIFY_TIMEOUT, &response_bl); + m_image_ctx.owner_lock.get_read(); if (r < 0 && r != -ETIMEDOUT) { lderr(m_image_ctx.cct) << "lock owner notification failed: " << cpp_strerror(r) << dendl; @@ -683,6 +721,8 @@ int ImageWatcher::notify_lock_owner(bufferlist &bl, bufferlist& response) { int ImageWatcher::notify_async_request(uint64_t async_request_id, bufferlist &in, ProgressContext& prog_ctx) { + assert(m_image_ctx.owner_lock.is_locked()); + Mutex my_lock("librbd::ImageWatcher::notify_async_request::my_lock"); Cond cond; bool done = false; @@ -734,13 +774,16 @@ void ImageWatcher::handle_acquired_lock() { void ImageWatcher::handle_released_lock() { ldout(m_image_ctx.cct, 20) << "exclusive lock released" << dendl; + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::cancel_async_requests, this, -ERESTART)); + m_finisher->queue(ctx); Mutex::Locker l(m_aio_request_lock); if (!m_aio_requests.empty()) { ldout(m_image_ctx.cct, 20) << "queuing lock request" << dendl; - FunctionContext *ctx = new FunctionContext( + FunctionContext *req_ctx = new FunctionContext( boost::bind(&ImageWatcher::finalize_request_lock, this)); - m_finisher->queue(ctx); + m_finisher->queue(req_ctx); } } @@ -867,7 +910,6 @@ void ImageWatcher::handle_snap_create(bufferlist::iterator iter, bufferlist *out ::decode(snap_name, iter); ldout(m_image_ctx.cct, 20) << "remote snap_create request: " << snap_name << dendl; - int r = librbd::snap_create(&m_image_ctx, snap_name.c_str(), false); ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, *out); ::encode(r, *out); diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index b30ab5ffcf7fa..1ef02f140f845 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -151,7 +151,6 @@ namespace librbd { int notify_async_request(uint64_t async_request_id, bufferlist &in, ProgressContext& prog_ctx); void notify_request_leadership(); - int notify_leader(bufferlist &bl, bufferlist &response); void handle_header_update(); void handle_acquired_lock(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index 5087a92ed7f46..db2be22d5a746 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -157,6 +157,10 @@ namespace librbd { void trim_image(ImageCtx *ictx, uint64_t newsize, ProgressContext& prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + Mutex my_lock("librbd::trim_image::my_lock"); Cond cond; bool done; @@ -445,7 +449,7 @@ namespace librbd { { assert(ictx->owner_lock.is_locked() && !ictx->owner_lock.is_wlocked()); if (ictx->image_watcher == NULL) { - return -EROFS;; + return -EROFS; } else if (!ictx->image_watcher->is_lock_supported() || ictx->image_watcher->is_lock_owner()) { return 0; @@ -476,13 +480,19 @@ namespace librbd { if (r < 0) return r; - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_snap_create(snap_name); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } + + r = ictx->image_watcher->notify_snap_create(snap_name); + if (r != -ETIMEDOUT) { + return r; + } + ldout(ictx->cct, 5) << "snap_create timed out notifying lock owner" << dendl; } RWLock::RLocker l2(ictx->md_lock); @@ -1443,8 +1453,20 @@ reprotect_and_return_err: unknown_format = false; id = ictx->id; + ictx->owner_lock.get_read(); + if (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0 || !ictx->image_watcher->is_lock_owner()) { + lderr(cct) << "cannot obtain exclusive lock - not removing" << dendl; + ictx->owner_lock.put_read(); + close_image(ictx); + return -EBUSY; + } + } + if (ictx->snaps.size()) { lderr(cct) << "image has snapshots - not removing" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return -ENOTEMPTY; } @@ -1453,11 +1475,13 @@ reprotect_and_return_err: r = io_ctx.list_watchers(header_oid, &watchers); if (r < 0) { lderr(cct) << "error listing watchers" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return r; } if (watchers.size() > 1) { lderr(cct) << "image has watchers - not removing" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return -EBUSY; } @@ -1476,9 +1500,12 @@ reprotect_and_return_err: parent_info.spec, id); if (r < 0 && r != -ENOENT) { lderr(cct) << "error removing child from children list" << dendl; + ictx->owner_lock.put_read(); close_image(ictx); return r; } + + ictx->owner_lock.put_read(); close_image(ictx); ldout(cct, 2) << "removing header..." << dendl; @@ -1532,39 +1559,50 @@ reprotect_and_return_err: ldout(cct, 20) << "resize " << ictx << " " << ictx->size << " -> " << size << dendl; - { - RWLock::RLocker l(ictx->owner_lock); - int r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_resize(size, prog_ctx); - } - } + int r; + do { + Mutex my_lock("librbd::resize::my_lock"); + Cond cond; + bool done; + { + RWLock::RLocker l(ictx->owner_lock); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } - Mutex my_lock("librbd::resize::my_lock"); - Cond cond; - bool done; - int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + r = ictx->image_watcher->notify_resize(size, prog_ctx); + if (r != -ETIMEDOUT && r != -ERESTART) { + return r; + } + ldout(ictx->cct, 5) << "resize timed out notifying lock owner" << dendl; + } - ret = async_resize(ictx, ctx, size, prog_ctx); - if (ret < 0) { - delete ctx; - return ret; - } + Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r); + r = async_resize(ictx, ctx, size, prog_ctx); + if (r < 0) { + delete ctx; + return r; + } + } - my_lock.Lock(); - while (!done) { - cond.Wait(my_lock); - } - my_lock.Unlock(); + my_lock.Lock(); + while (!done) { + cond.Wait(my_lock); + } + my_lock.Unlock(); + + if (r == -ERESTART) { + ldout(ictx->cct, 5) << "resize interrupted: restarting" << dendl; + } + } while (r == -ERESTART); notify_change(ictx->md_ctx, ictx->header_oid, ictx); ldout(cct, 2) << "resize finished" << dendl; - return ret; + return r; } class AsyncResizeFinishContext : public Context { @@ -1588,6 +1626,10 @@ reprotect_and_return_err: int async_resize(ImageCtx *ictx, Context *ctx, uint64_t size, ProgressContext &prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + CephContext *cct = ictx->cct; ldout(cct, 20) << "async_resize " << ictx << " " << ictx->size << " -> " << size << dendl; @@ -1603,17 +1645,7 @@ reprotect_and_return_err: uint64_t original_size; { - RWLock::RLocker l(ictx->owner_lock); - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return -EROFS; - } - - RWLock::RLocker l2(ictx->md_lock); + RWLock::RLocker l(ictx->md_lock); original_size = ictx->size; if (size < ictx->size) { ictx->wait_for_pending_copyup(); @@ -1662,7 +1694,7 @@ reprotect_and_return_err: RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -1745,7 +1777,7 @@ reprotect_and_return_err: RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - return -EROFS; + return -ERESTART; } string oid = m_ictx->get_object_name(m_object_no); @@ -1782,7 +1814,7 @@ reprotect_and_return_err: RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -2588,7 +2620,7 @@ reprotect_and_return_err: RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - return -EROFS; + return -ERESTART; } RWLock::RLocker l2(m_ictx->md_lock); @@ -2661,7 +2693,7 @@ reprotect_and_return_err: RWLock::RLocker l(m_ictx->owner_lock); if (m_ictx->image_watcher->is_lock_supported() && !m_ictx->image_watcher->is_lock_owner()) { - r = -EROFS; + r = -ERESTART; return; } @@ -2713,43 +2745,58 @@ reprotect_and_return_err: return -EROFS; } - { - RWLock::RLocker l(ictx->owner_lock); - int r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - return ictx->image_watcher->notify_flatten(prog_ctx); - } - } + int r; + do { + Mutex my_lock("librbd::flatten:my_lock"); + Cond cond; + bool done; + { + RWLock::RLocker l(ictx->owner_lock); + while (ictx->image_watcher->is_lock_supported()) { + r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } else if (ictx->image_watcher->is_lock_owner()) { + break; + } - Mutex my_lock("librbd::flatten:my_lock"); - Cond cond; - bool done; - int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + r = ictx->image_watcher->notify_flatten(prog_ctx); + if (r != -ETIMEDOUT && r != -ERESTART) { + return r; + } + ldout(ictx->cct, 5) << "flatten timed out notifying lock owner" << dendl; + } - ret = async_flatten(ictx, ctx, prog_ctx); - if (ret < 0) { - delete ctx; - return ret; - } + Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &r); + r = async_flatten(ictx, ctx, prog_ctx); + if (r < 0) { + delete ctx; + return r; + } + } - my_lock.Lock(); - while (!done) { - cond.Wait(my_lock); - } - my_lock.Unlock(); + my_lock.Lock(); + while (!done) { + cond.Wait(my_lock); + } + my_lock.Unlock(); + + if (r == -ERESTART) { + ldout(ictx->cct, 5) << "flatten interrupted: restarting" << dendl; + } + } while (r == -ERESTART); notify_change(ictx->md_ctx, ictx->header_oid, ictx); ldout(cct, 20) << "flatten finished" << dendl; - return ret; + return r; } int async_flatten(ImageCtx *ictx, Context *ctx, ProgressContext &prog_ctx) { + assert(ictx->owner_lock.is_locked()); + assert(!ictx->image_watcher->is_lock_supported() || + ictx->image_watcher->is_lock_owner()); + CephContext *cct = ictx->cct; ldout(cct, 20) << "flatten" << dendl; @@ -2793,19 +2840,6 @@ reprotect_and_return_err: overlap_objects = Striper::get_num_objects(ictx->layout, overlap); } - { - RWLock::RLocker l(ictx->owner_lock); - r = prepare_image_update(ictx); - if (r < 0) { - return -EROFS; - } - if (ictx->image_watcher->is_lock_supported() && - !ictx->image_watcher->is_lock_owner()) { - // TODO: temporary until request proxied to lock owner - return -EROFS; - } - } - AsyncObjectThrottle::ContextFactory context_factory( boost::lambda::bind(boost::lambda::new_ptr(), boost::lambda::_1, ictx, object_size, snapc, diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index 0d13fd68b5926..f74e83ec769e8 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -225,19 +225,19 @@ TEST_F(TestInternal, FlattenFailsToLockImage) { int order = ictx->order; ASSERT_EQ(0, librbd::clone(m_ioctx, m_image_name.c_str(), "snap1", m_ioctx, clone_name.c_str(), features, &order, 0, 0)); - BOOST_SCOPE_EXIT( (&m_ioctx) (clone_name) ) { + + TestInternal *parent = this; + librbd::ImageCtx *ictx2 = NULL; + BOOST_SCOPE_EXIT( (&m_ioctx) (clone_name) (parent) (&ictx2) ) { + if (ictx2 != NULL) { + parent->close_image(ictx2); + parent->unlock_image(); + } librbd::NoOpProgressContext no_op; ASSERT_EQ(0, librbd::remove(m_ioctx, clone_name.c_str(), no_op)); } BOOST_SCOPE_EXIT_END; - librbd::ImageCtx *ictx2; ASSERT_EQ(0, open_image(clone_name, &ictx2)); - - TestInternal *parent = this; - BOOST_SCOPE_EXIT( (parent) (ictx2) ) { - parent->close_image(ictx2); - } BOOST_SCOPE_EXIT_END; - ASSERT_EQ(0, lock_image(*ictx2, LOCK_EXCLUSIVE, "manually locked")); librbd::NoOpProgressContext no_op; -- 2.39.5