From 376c7e0f149b013154d6a7eeaa80a80449166508 Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 9 Oct 2014 00:00:17 -0400 Subject: [PATCH] librbd: Coordinate maintenance through exclusive lock leader When the exclusive lock feature is enabled, only a single client can modify the image. As a result, certain maintenance activities need to be proxied from the maintenance client to the active leader. Signed-off-by: Jason Dillaman --- src/librbd/ImageWatcher.cc | 14 ++++++++++- src/librbd/ImageWatcher.h | 1 + src/librbd/internal.cc | 41 +++++++++++++++++++++++++++----- src/librbd/internal.h | 2 +- src/librbd/librbd.cc | 4 ++-- src/test/librbd/test_internal.cc | 6 ++--- src/test/pybind/test_rbd.py | 30 +++++++++++++++++++---- 7 files changed, 80 insertions(+), 18 deletions(-) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index 1818582776982..b02c6828364d1 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -403,6 +403,11 @@ void ImageWatcher::release_lock() unlock(); } +void ImageWatcher::finalize_header_update() { + librbd::notify_change(m_image_ctx.md_ctx, m_image_ctx.header_oid, + &m_image_ctx); +} + int ImageWatcher::notify_async_progress(const RemoteAsyncRequest &request, uint64_t offset, uint64_t total) { ldout(m_image_ctx.cct, 20) << "remote async request progress: " @@ -857,10 +862,17 @@ void ImageWatcher::handle_snap_create(bufferlist::iterator iter, bufferlist *out ldout(m_image_ctx.cct, 20) << "remote snap_create request: " << snap_name << dendl; - int r = librbd::snap_create(&m_image_ctx, snap_name.c_str()); + int r = librbd::snap_create(&m_image_ctx, snap_name.c_str(), false); ENCODE_START(NOTIFY_VERSION, NOTIFY_VERSION, *out); ::encode(r, *out); ENCODE_FINISH(*out); + + if (r == 0) { + // cannot notify within a notificiation + FunctionContext *ctx = new FunctionContext( + boost::bind(&ImageWatcher::finalize_header_update, this)); + m_finisher->queue(ctx); + } } } diff --git a/src/librbd/ImageWatcher.h b/src/librbd/ImageWatcher.h index f098c6a947f9a..1cf4dee7c08a1 100644 --- a/src/librbd/ImageWatcher.h +++ b/src/librbd/ImageWatcher.h @@ -129,6 +129,7 @@ namespace librbd { void release_lock(); bool try_request_lock(); void finalize_request_lock(); + void finalize_header_update(); void schedule_retry_aio_requests(); void cancel_retry_aio_requests(); diff --git a/src/librbd/internal.cc b/src/librbd/internal.cc index b8e9cfa5c26c8..e089cfa16471b 100644 --- a/src/librbd/internal.cc +++ b/src/librbd/internal.cc @@ -464,7 +464,7 @@ namespace librbd { return r; } - int snap_create(ImageCtx *ictx, const char *snap_name) + int snap_create(ImageCtx *ictx, const char *snap_name, bool notify) { assert(ictx->owner_lock.is_locked()); ldout(ictx->cct, 20) << "snap_create " << ictx << " " << snap_name << dendl; @@ -482,8 +482,7 @@ namespace librbd { } if (ictx->image_watcher->is_lock_supported() && !ictx->image_watcher->is_lock_owner()) { - // TODO: temporary until request proxied to lock owner - return -EROFS; + return ictx->image_watcher->notify_snap_create(snap_name); } RWLock::RLocker l2(ictx->md_lock); @@ -494,7 +493,9 @@ namespace librbd { if (r < 0) return r; - notify_change(ictx->md_ctx, ictx->header_oid, ictx); + if (notify) { + notify_change(ictx->md_ctx, ictx->header_oid, ictx); + } ictx->perfcounter->inc(l_librbd_snap_create); return 0; @@ -1531,12 +1532,24 @@ reprotect_and_return_err: ldout(cct, 20) << "resize " << ictx << " " << ictx->size << " -> " << size << dendl; + { + RWLock::RLocker l(ictx->owner_lock); + int r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } + if (ictx->image_watcher->is_lock_supported() && + !ictx->image_watcher->is_lock_owner()) { + return ictx->image_watcher->notify_resize(size, prog_ctx); + } + } + Mutex my_lock("librbd::resize::my_lock"); Cond cond; bool done; int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + ret = async_resize(ictx, ctx, size, prog_ctx); if (ret < 0) { delete ctx; @@ -2681,12 +2694,28 @@ reprotect_and_return_err: CephContext *cct = ictx->cct; ldout(cct, 20) << "flatten" << dendl; + if (ictx->read_only || ictx->snap_id != CEPH_NOSNAP) { + return -EROFS; + } + + { + RWLock::RLocker l(ictx->owner_lock); + int r = prepare_image_update(ictx); + if (r < 0) { + return -EROFS; + } + if (ictx->image_watcher->is_lock_supported() && + !ictx->image_watcher->is_lock_owner()) { + return ictx->image_watcher->notify_flatten(prog_ctx); + } + } + Mutex my_lock("librbd::flatten:my_lock"); Cond cond; bool done; int ret; - Context *ctx = new C_SafeCond(&my_lock, &cond, &done, &ret); + ret = async_flatten(ictx, ctx, prog_ctx); if (ret < 0) { delete ctx; diff --git a/src/librbd/internal.h b/src/librbd/internal.h index 3b777e948284a..1578f00ccbdc7 100644 --- a/src/librbd/internal.h +++ b/src/librbd/internal.h @@ -107,7 +107,7 @@ namespace librbd { int remove(librados::IoCtx& io_ctx, const char *imgname, ProgressContext& prog_ctx); int resize(ImageCtx *ictx, uint64_t size, ProgressContext& prog_ctx); - int snap_create(ImageCtx *ictx, const char *snap_name); + int snap_create(ImageCtx *ictx, const char *snap_name, bool notify); int snap_list(ImageCtx *ictx, std::vector& snaps); bool snap_exists(ImageCtx *ictx, const char *snap_name); int snap_rollback(ImageCtx *ictx, const char *snap_name, diff --git a/src/librbd/librbd.cc b/src/librbd/librbd.cc index 769be219577d6..f2d3dd92d4054 100644 --- a/src/librbd/librbd.cc +++ b/src/librbd/librbd.cc @@ -501,7 +501,7 @@ namespace librbd { ImageCtx *ictx = (ImageCtx *)ctx; tracepoint(librbd, snap_create_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, snap_name); RWLock::RLocker owner_locker(ictx->owner_lock); - int r = librbd::snap_create(ictx, snap_name); + int r = librbd::snap_create(ictx, snap_name, true); tracepoint(librbd, snap_create_exit, r); return r; } @@ -1192,7 +1192,7 @@ extern "C" int rbd_snap_create(rbd_image_t image, const char *snap_name) librbd::ImageCtx *ictx = (librbd::ImageCtx *)image; tracepoint(librbd, snap_create_enter, ictx, ictx->name.c_str(), ictx->snap_name.c_str(), ictx->read_only, snap_name); RWLock::RLocker owner_locker(ictx->owner_lock); - int r = librbd::snap_create(ictx, snap_name); + int r = librbd::snap_create(ictx, snap_name, true); tracepoint(librbd, snap_create_exit, r); return r; } diff --git a/src/test/librbd/test_internal.cc b/src/test/librbd/test_internal.cc index 215478f5fa689..0d13fd68b5926 100644 --- a/src/test/librbd/test_internal.cc +++ b/src/test/librbd/test_internal.cc @@ -39,7 +39,7 @@ public: { RWLock::RLocker l(ictx->owner_lock); - r = librbd::snap_create(ictx, snap_name); + r = librbd::snap_create(ictx, snap_name, true); } if (r < 0) { return r; @@ -117,7 +117,7 @@ TEST_F(TestInternal, SnapCreateLocksImage) { { RWLock::RLocker l(ictx->owner_lock); - ASSERT_EQ(0, librbd::snap_create(ictx, "snap1")); + ASSERT_EQ(0, librbd::snap_create(ictx, "snap1", true)); } BOOST_SCOPE_EXIT( (ictx) ) { ASSERT_EQ(0, librbd::snap_remove(ictx, "snap1")); @@ -136,7 +136,7 @@ TEST_F(TestInternal, SnapCreateFailsToLockImage) { ASSERT_EQ(0, lock_image(*ictx, LOCK_EXCLUSIVE, "manually locked")); RWLock::RLocker l(ictx->owner_lock); - ASSERT_EQ(-EROFS, librbd::snap_create(ictx, "snap1")); + ASSERT_EQ(-EROFS, librbd::snap_create(ictx, "snap1", true)); } TEST_F(TestInternal, SnapRollbackLocksImage) { diff --git a/src/test/pybind/test_rbd.py b/src/test/pybind/test_rbd.py index f640f41adc309..c713ff3c27aa9 100644 --- a/src/test/pybind/test_rbd.py +++ b/src/test/pybind/test_rbd.py @@ -945,9 +945,18 @@ class TestExclusiveLock(object): RBD().clone(ioctx, image_name, 'snap', ioctx, 'clone', features) with nested(Image(ioctx, 'clone'), Image(ioctx2, 'clone')) as ( image1, image2): - image1.write('0'*256, 0) - assert_raises(ReadOnlyImage, image2.flatten) - image1.flatten() + data = rand_data(256) + image1.write(data, 0) + image2.flatten() + assert_raises(ImageNotFound, image1.parent_info) + parent = True + for x in xrange(30): + try: + image2.parent_info() + except ImageNotFound: + parent = False + break + eq(False, parent) finally: RBD().remove(ioctx, 'clone') with Image(ioctx, image_name) as image: @@ -959,8 +968,19 @@ class TestExclusiveLock(object): image1, image2): image1.write('0'*256, 0) for new_size in [IMG_SIZE * 2, IMG_SIZE / 2]: - assert_raises(ReadOnlyImage, image2.resize, new_size) - image1.resize(new_size); + image2.resize(new_size); + eq(new_size, image1.size()) + for x in xrange(30): + if new_size == image2.size(): + break + time.sleep(1) + eq(new_size, image2.size()) + + def test_follower_snap_create(self): + with nested(Image(ioctx, image_name), Image(ioctx2, image_name)) as ( + image1, image2): + image2.create_snap('snap1') + image1.remove_snap('snap1') def test_follower_snap_rollback(self): with nested(Image(ioctx, image_name), Image(ioctx2, image_name)) as ( -- 2.39.5