From f9279f8f7773f161483916bf758740207ef684ba Mon Sep 17 00:00:00 2001 From: Jason Dillaman Date: Thu, 6 Sep 2018 13:38:17 -0400 Subject: [PATCH] librbd: reacquire lock should properly handle failed watcher If the watch has been lost, assume the lock has been lost but attempt to reacquire it if and when the watch is re-established. Signed-off-by: Jason Dillaman (cherry picked from commit 2057d99f451e3007d4fd05a88faa968319d0ba90) Conflicts: src/librbd/ManagedLock.cc: trivial resolution --- src/librbd/ImageWatcher.cc | 2 +- src/librbd/ManagedLock.cc | 97 +++++++++++-------- src/librbd/ManagedLock.h | 3 +- src/test/librbd/mock/MockExclusiveLock.h | 2 +- src/test/librbd/test_mock_ManagedLock.cc | 71 +++++++++++++- .../rbd_mirror/test_mock_LeaderWatcher.cc | 2 +- src/tools/rbd_mirror/LeaderWatcher.cc | 2 +- 7 files changed, 129 insertions(+), 50 deletions(-) diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index daac18868f71d..d9ed8ee613c1c 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -979,7 +979,7 @@ void ImageWatcher::handle_rewatch_complete(int r) { RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.exclusive_lock != nullptr) { // update the lock cookie with the new watch handle - m_image_ctx.exclusive_lock->reacquire_lock(); + m_image_ctx.exclusive_lock->reacquire_lock(nullptr); } } diff --git a/src/librbd/ManagedLock.cc b/src/librbd/ManagedLock.cc index 86b63c062c364..38fd3aa9d79b8 100644 --- a/src/librbd/ManagedLock.cc +++ b/src/librbd/ManagedLock.cc @@ -132,7 +132,7 @@ void ManagedLock::shut_down(Context *on_shut_down) { if (m_state == STATE_WAITING_FOR_REGISTER) { // abort stalled acquire lock state - ldout(m_cct, 10) << "woke up waiting acquire" << dendl; + ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl; Action active_action = get_active_action(); assert(active_action == ACTION_TRY_LOCK || active_action == ACTION_ACQUIRE_LOCK); @@ -467,14 +467,20 @@ void ManagedLock::send_acquire_lock() { } ldout(m_cct, 10) << dendl; - m_state = STATE_ACQUIRING; uint64_t watch_handle = m_watcher->get_watch_handle(); if (watch_handle == 0) { lderr(m_cct) << "watcher not registered - delaying request" << dendl; m_state = STATE_WAITING_FOR_REGISTER; + + // shut down might race w/ release/re-acquire of the lock + if (is_state_shutdown()) { + complete_active_action(STATE_UNLOCKED, -ESHUTDOWN); + } return; } + + m_state = STATE_ACQUIRING; m_cookie = encode_lock_cookie(watch_handle); m_work_queue->queue(new FunctionContext([this](int r) { @@ -522,13 +528,6 @@ void ManagedLock::handle_acquire_lock(int r) { })); } -template -void ManagedLock::handle_no_op_reacquire_lock(int r) { - ldout(m_cct, 10) << "r=" << r << dendl; - assert(r >= 0); - complete_active_action(STATE_LOCKED, 0); -} - template void ManagedLock::handle_post_acquire_lock(int r) { ldout(m_cct, 10) << ": r=" << r << dendl; @@ -568,13 +567,20 @@ void ManagedLock::send_reacquire_lock() { return; } + ldout(m_cct, 10) << dendl; + m_state = STATE_REACQUIRING; + uint64_t watch_handle = m_watcher->get_watch_handle(); if (watch_handle == 0) { - // watch (re)failed while recovering - lderr(m_cct) << ": aborting reacquire due to invalid watch handle" - << dendl; - complete_active_action(STATE_LOCKED, 0); - return; + // watch (re)failed while recovering + lderr(m_cct) << ": aborting reacquire due to invalid watch handle" + << dendl; + + // treat double-watch failure as a lost lock and invoke the + // release/acquire handlers + release_acquire_lock(); + complete_active_action(STATE_LOCKED, 0); + return; } m_new_cookie = encode_lock_cookie(watch_handle); @@ -587,9 +593,6 @@ void ManagedLock::send_reacquire_lock() { return; } - ldout(m_cct, 10) << dendl; - m_state = STATE_REACQUIRING; - auto ctx = create_context_callback< ManagedLock, &ManagedLock::handle_reacquire_lock>(this); ctx = new FunctionContext([this, ctx](int r) { @@ -617,36 +620,44 @@ void ManagedLock::handle_reacquire_lock(int r) { << dendl; } - if (!is_state_shutdown()) { - // queue a release and re-acquire of the lock since cookie cannot - // be updated on older OSDs - execute_action(ACTION_RELEASE_LOCK, nullptr); - - assert(!m_actions_contexts.empty()); - ActionContexts &action_contexts(m_actions_contexts.front()); - - // reacquire completes when the request lock completes - Contexts contexts; - std::swap(contexts, action_contexts.second); - if (contexts.empty()) { - execute_action(ACTION_ACQUIRE_LOCK, nullptr); - } else { - for (auto ctx : contexts) { - ctx = new FunctionContext([ctx, r](int acquire_ret_val) { - if (acquire_ret_val >= 0) { - acquire_ret_val = r; - } - ctx->complete(acquire_ret_val); - }); - execute_action(ACTION_ACQUIRE_LOCK, ctx); - } - } - } + release_acquire_lock(); } else { m_cookie = m_new_cookie; } - complete_active_action(STATE_LOCKED, r); + complete_active_action(STATE_LOCKED, 0); +} + +template +void ManagedLock::handle_no_op_reacquire_lock(int r) { + ldout(m_cct, 10) << "r=" << r << dendl; + assert(r >= 0); + complete_active_action(STATE_LOCKED, 0); +} + +template +void ManagedLock::release_acquire_lock() { + assert(m_lock.is_locked()); + + if (!is_state_shutdown()) { + // queue a release and re-acquire of the lock since cookie cannot + // be updated on older OSDs + execute_action(ACTION_RELEASE_LOCK, nullptr); + + ceph_assert(!m_actions_contexts.empty()); + ActionContexts &action_contexts(m_actions_contexts.front()); + + // reacquire completes when the request lock completes + Contexts contexts; + std::swap(contexts, action_contexts.second); + if (contexts.empty()) { + execute_action(ACTION_ACQUIRE_LOCK, nullptr); + } else { + for (auto ctx : contexts) { + execute_action(ACTION_ACQUIRE_LOCK, ctx); + } + } + } } template diff --git a/src/librbd/ManagedLock.h b/src/librbd/ManagedLock.h index fdb6d43d78c41..a152adfc84d62 100644 --- a/src/librbd/ManagedLock.h +++ b/src/librbd/ManagedLock.h @@ -55,7 +55,7 @@ public: void acquire_lock(Context *on_acquired); void try_acquire_lock(Context *on_acquired); void release_lock(Context *on_released); - void reacquire_lock(Context *on_reacquired = nullptr); + void reacquire_lock(Context *on_reacquired); void get_locker(managed_lock::Locker *locker, Context *on_finish); void break_lock(const managed_lock::Locker &locker, bool force_break_lock, Context *on_finish); @@ -247,6 +247,7 @@ private: void send_reacquire_lock(); void handle_reacquire_lock(int r); + void release_acquire_lock(); void send_release_lock(); void handle_pre_release_lock(int r); diff --git a/src/test/librbd/mock/MockExclusiveLock.h b/src/test/librbd/mock/MockExclusiveLock.h index b73ab02c709ef..4c9d2995891b2 100644 --- a/src/test/librbd/mock/MockExclusiveLock.h +++ b/src/test/librbd/mock/MockExclusiveLock.h @@ -18,7 +18,7 @@ struct MockExclusiveLock { MOCK_METHOD2(init, void(uint64_t features, Context*)); MOCK_METHOD1(shut_down, void(Context*)); - MOCK_METHOD0(reacquire_lock, void()); + MOCK_METHOD1(reacquire_lock, void(Context*)); MOCK_METHOD1(try_acquire_lock, void(Context*)); MOCK_METHOD1(block_requests, void(int)); diff --git a/src/test/librbd/test_mock_ManagedLock.cc b/src/test/librbd/test_mock_ManagedLock.cc index 8668485acc273..a1e955a6a658e 100644 --- a/src/test/librbd/test_mock_ManagedLock.cc +++ b/src/test/librbd/test_mock_ManagedLock.cc @@ -195,7 +195,6 @@ public: ContextWQ *work_queue, MockReacquireRequest &mock_reacquire_request, int r) { - expect_get_watch_handle(watcher, 98765); EXPECT_CALL(mock_reacquire_request, send()) .WillOnce(QueueRequest(&mock_reacquire_request, r, work_queue)); } @@ -523,6 +522,7 @@ TEST_F(TestMockManagedLock, ReacquireLock) { MockReacquireRequest mock_reacquire_request; C_SaferCond reacquire_ctx; + expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765); expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, 0); managed_lock.reacquire_lock(&reacquire_ctx); ASSERT_EQ(0, reacquire_ctx.wait()); @@ -533,6 +533,72 @@ TEST_F(TestMockManagedLock, ReacquireLock) { ASSERT_FALSE(is_lock_owner(managed_lock)); } +TEST_F(TestMockManagedLock, AttemptReacquireBlacklistedLock) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockManagedLockImageCtx mock_image_ctx(*ictx); + MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue, + ictx->header_oid, mock_image_ctx.image_watcher, + librbd::managed_lock::EXCLUSIVE, true, 0); + InSequence seq; + + MockAcquireRequest request_lock_acquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_acquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + MockReleaseRequest request_release; + expect_release_lock(ictx->op_work_queue, request_release, 0); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + managed_lock.reacquire_lock(nullptr); + + ASSERT_EQ(0, when_shut_down(managed_lock)); + ASSERT_FALSE(is_lock_owner(managed_lock)); +} + +TEST_F(TestMockManagedLock, ReacquireBlacklistedLock) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockManagedLockImageCtx mock_image_ctx(*ictx); + MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue, + ictx->header_oid, mock_image_ctx.image_watcher, + librbd::managed_lock::EXCLUSIVE, true, 0); + InSequence seq; + + MockAcquireRequest request_lock_acquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_acquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + MockReleaseRequest request_release; + expect_release_lock(ictx->op_work_queue, request_release, 0); + + MockAcquireRequest request_lock_reacquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_reacquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + C_SaferCond reacquire_ctx; + managed_lock.reacquire_lock(&reacquire_ctx); + ASSERT_EQ(0, reacquire_ctx.wait()); + + MockReleaseRequest shutdown_release; + expect_release_lock(ictx->op_work_queue, shutdown_release, 0); + ASSERT_EQ(0, when_shut_down(managed_lock)); + ASSERT_FALSE(is_lock_owner(managed_lock)); +} + TEST_F(TestMockManagedLock, ReacquireLockError) { librbd::ImageCtx *ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); @@ -550,6 +616,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) { MockReacquireRequest mock_reacquire_request; C_SaferCond reacquire_ctx; + expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765); expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, -EOPNOTSUPP); MockReleaseRequest reacquire_lock_release; @@ -559,7 +626,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) { expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, reacquire_lock_acquire, 0); managed_lock.reacquire_lock(&reacquire_ctx); - ASSERT_EQ(-EOPNOTSUPP, reacquire_ctx.wait()); + ASSERT_EQ(0, reacquire_ctx.wait()); MockReleaseRequest shutdown_release; expect_release_lock(ictx->op_work_queue, shutdown_release, 0); diff --git a/src/test/rbd_mirror/test_mock_LeaderWatcher.cc b/src/test/rbd_mirror/test_mock_LeaderWatcher.cc index 2588db43c6d8e..a8ba98956a227 100644 --- a/src/test/rbd_mirror/test_mock_LeaderWatcher.cc +++ b/src/test/rbd_mirror/test_mock_LeaderWatcher.cc @@ -127,7 +127,7 @@ struct ManagedLock { m_work_queue->queue(pre_release_ctx, 0); } - void reacquire_lock() { + void reacquire_lock(Context* on_finish) { MockManagedLock::get_instance().reacquire_lock(); } diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc index 51351e0b8d04b..4aadeb15db41c 100644 --- a/src/tools/rbd_mirror/LeaderWatcher.cc +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -1093,7 +1093,7 @@ template void LeaderWatcher::handle_rewatch_complete(int r) { dout(5) << "r=" << r << dendl; - m_leader_lock->reacquire_lock(); + m_leader_lock->reacquire_lock(nullptr); } template -- 2.39.5