From: Jason Dillaman Date: Thu, 6 Sep 2018 17:38:17 +0000 (-0400) Subject: librbd: reacquire lock should properly handle failed watcher X-Git-Tag: v14.0.1~228^2~2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=2057d99f451e3007d4fd05a88faa968319d0ba90;p=ceph-ci.git librbd: reacquire lock should properly handle failed watcher If the watch has been lost, assume the lock has been lost but attempt to reacquire it if and when the watch is re-established. Signed-off-by: Jason Dillaman --- diff --git a/src/librbd/ImageWatcher.cc b/src/librbd/ImageWatcher.cc index cfa08173d65..fba8025fcc4 100644 --- a/src/librbd/ImageWatcher.cc +++ b/src/librbd/ImageWatcher.cc @@ -1023,7 +1023,7 @@ void ImageWatcher::handle_rewatch_complete(int r) { RWLock::RLocker owner_locker(m_image_ctx.owner_lock); if (m_image_ctx.exclusive_lock != nullptr) { // update the lock cookie with the new watch handle - m_image_ctx.exclusive_lock->reacquire_lock(); + m_image_ctx.exclusive_lock->reacquire_lock(nullptr); } } diff --git a/src/librbd/ManagedLock.cc b/src/librbd/ManagedLock.cc index 433c5964471..1e4ced6095c 100644 --- a/src/librbd/ManagedLock.cc +++ b/src/librbd/ManagedLock.cc @@ -132,7 +132,7 @@ void ManagedLock::shut_down(Context *on_shut_down) { if (m_state == STATE_WAITING_FOR_REGISTER) { // abort stalled acquire lock state - ldout(m_cct, 10) << "woke up waiting acquire" << dendl; + ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl; Action active_action = get_active_action(); ceph_assert(active_action == ACTION_TRY_LOCK || active_action == ACTION_ACQUIRE_LOCK); @@ -206,7 +206,7 @@ void ManagedLock::reacquire_lock(Context *on_reacquired) { if (m_state == STATE_WAITING_FOR_REGISTER) { // restart the acquire lock process now that watch is valid - ldout(m_cct, 10) << "woke up waiting acquire" << dendl; + ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl; Action active_action = get_active_action(); ceph_assert(active_action == ACTION_TRY_LOCK || active_action == ACTION_ACQUIRE_LOCK); @@ -467,14 +467,20 @@ void ManagedLock::send_acquire_lock() { } ldout(m_cct, 10) << dendl; - m_state = STATE_ACQUIRING; uint64_t watch_handle = m_watcher->get_watch_handle(); if (watch_handle == 0) { lderr(m_cct) << "watcher not registered - delaying request" << dendl; m_state = STATE_WAITING_FOR_REGISTER; + + // shut down might race w/ release/re-acquire of the lock + if (is_state_shutdown()) { + complete_active_action(STATE_UNLOCKED, -ESHUTDOWN); + } return; } + + m_state = STATE_ACQUIRING; m_cookie = encode_lock_cookie(watch_handle); m_work_queue->queue(new FunctionContext([this](int r) { @@ -522,13 +528,6 @@ void ManagedLock::handle_acquire_lock(int r) { })); } -template -void ManagedLock::handle_no_op_reacquire_lock(int r) { - ldout(m_cct, 10) << "r=" << r << dendl; - ceph_assert(r >= 0); - complete_active_action(STATE_LOCKED, 0); -} - template void ManagedLock::handle_post_acquire_lock(int r) { ldout(m_cct, 10) << "r=" << r << dendl; @@ -568,13 +567,20 @@ void ManagedLock::send_reacquire_lock() { return; } + ldout(m_cct, 10) << dendl; + m_state = STATE_REACQUIRING; + uint64_t watch_handle = m_watcher->get_watch_handle(); if (watch_handle == 0) { - // watch (re)failed while recovering - lderr(m_cct) << "aborting reacquire due to invalid watch handle" - << dendl; - complete_active_action(STATE_LOCKED, 0); - return; + // watch (re)failed while recovering + lderr(m_cct) << "aborting reacquire due to invalid watch handle" + << dendl; + + // treat double-watch failure as a lost lock and invoke the + // release/acquire handlers + release_acquire_lock(); + complete_active_action(STATE_LOCKED, 0); + return; } m_new_cookie = encode_lock_cookie(watch_handle); @@ -587,9 +593,6 @@ void ManagedLock::send_reacquire_lock() { return; } - ldout(m_cct, 10) << dendl; - m_state = STATE_REACQUIRING; - auto ctx = create_context_callback< ManagedLock, &ManagedLock::handle_reacquire_lock>(this); ctx = new FunctionContext([this, ctx](int r) { @@ -617,36 +620,45 @@ void ManagedLock::handle_reacquire_lock(int r) { << dendl; } - if (!is_state_shutdown()) { - // queue a release and re-acquire of the lock since cookie cannot - // be updated on older OSDs - execute_action(ACTION_RELEASE_LOCK, nullptr); - - ceph_assert(!m_actions_contexts.empty()); - ActionContexts &action_contexts(m_actions_contexts.front()); - - // reacquire completes when the request lock completes - Contexts contexts; - std::swap(contexts, action_contexts.second); - if (contexts.empty()) { - execute_action(ACTION_ACQUIRE_LOCK, nullptr); - } else { - for (auto ctx : contexts) { - ctx = new FunctionContext([ctx, r](int acquire_ret_val) { - if (acquire_ret_val >= 0) { - acquire_ret_val = r; - } - ctx->complete(acquire_ret_val); - }); - execute_action(ACTION_ACQUIRE_LOCK, ctx); - } - } - } + release_acquire_lock(); } else { m_cookie = m_new_cookie; } - complete_active_action(STATE_LOCKED, r); + complete_active_action(STATE_LOCKED, 0); +} + +template +void ManagedLock::handle_no_op_reacquire_lock(int r) { + ldout(m_cct, 10) << "r=" << r << dendl; + ceph_assert(m_state == STATE_REACQUIRING); + ceph_assert(r >= 0); + complete_active_action(STATE_LOCKED, 0); +} + +template +void ManagedLock::release_acquire_lock() { + assert(m_lock.is_locked()); + + if (!is_state_shutdown()) { + // queue a release and re-acquire of the lock since cookie cannot + // be updated on older OSDs + execute_action(ACTION_RELEASE_LOCK, nullptr); + + ceph_assert(!m_actions_contexts.empty()); + ActionContexts &action_contexts(m_actions_contexts.front()); + + // reacquire completes when the request lock completes + Contexts contexts; + std::swap(contexts, action_contexts.second); + if (contexts.empty()) { + execute_action(ACTION_ACQUIRE_LOCK, nullptr); + } else { + for (auto ctx : contexts) { + execute_action(ACTION_ACQUIRE_LOCK, ctx); + } + } + } } template diff --git a/src/librbd/ManagedLock.h b/src/librbd/ManagedLock.h index bbd932b744c..b27e549dab4 100644 --- a/src/librbd/ManagedLock.h +++ b/src/librbd/ManagedLock.h @@ -55,7 +55,7 @@ public: void acquire_lock(Context *on_acquired); void try_acquire_lock(Context *on_acquired); void release_lock(Context *on_released); - void reacquire_lock(Context *on_reacquired = nullptr); + void reacquire_lock(Context *on_reacquired); void get_locker(managed_lock::Locker *locker, Context *on_finish); void break_lock(const managed_lock::Locker &locker, bool force_break_lock, Context *on_finish); @@ -247,6 +247,7 @@ private: void send_reacquire_lock(); void handle_reacquire_lock(int r); + void release_acquire_lock(); void send_release_lock(); void handle_pre_release_lock(int r); diff --git a/src/test/librbd/mock/MockExclusiveLock.h b/src/test/librbd/mock/MockExclusiveLock.h index b73ab02c709..4c9d2995891 100644 --- a/src/test/librbd/mock/MockExclusiveLock.h +++ b/src/test/librbd/mock/MockExclusiveLock.h @@ -18,7 +18,7 @@ struct MockExclusiveLock { MOCK_METHOD2(init, void(uint64_t features, Context*)); MOCK_METHOD1(shut_down, void(Context*)); - MOCK_METHOD0(reacquire_lock, void()); + MOCK_METHOD1(reacquire_lock, void(Context*)); MOCK_METHOD1(try_acquire_lock, void(Context*)); MOCK_METHOD1(block_requests, void(int)); diff --git a/src/test/librbd/test_mock_ManagedLock.cc b/src/test/librbd/test_mock_ManagedLock.cc index 237025788c8..921d9f50442 100644 --- a/src/test/librbd/test_mock_ManagedLock.cc +++ b/src/test/librbd/test_mock_ManagedLock.cc @@ -199,7 +199,6 @@ public: ContextWQ *work_queue, MockReacquireRequest &mock_reacquire_request, int r) { - expect_get_watch_handle(watcher, 98765); EXPECT_CALL(mock_reacquire_request, send()) .WillOnce(QueueRequest(&mock_reacquire_request, r, work_queue)); } @@ -526,6 +525,7 @@ TEST_F(TestMockManagedLock, ReacquireLock) { MockReacquireRequest mock_reacquire_request; C_SaferCond reacquire_ctx; + expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765); expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, 0); managed_lock.reacquire_lock(&reacquire_ctx); ASSERT_EQ(0, reacquire_ctx.wait()); @@ -536,6 +536,72 @@ TEST_F(TestMockManagedLock, ReacquireLock) { ASSERT_FALSE(is_lock_owner(managed_lock)); } +TEST_F(TestMockManagedLock, AttemptReacquireBlacklistedLock) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockManagedLockImageCtx mock_image_ctx(*ictx); + MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue, + ictx->header_oid, mock_image_ctx.image_watcher, + librbd::managed_lock::EXCLUSIVE, true, 0); + InSequence seq; + + MockAcquireRequest request_lock_acquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_acquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + MockReleaseRequest request_release; + expect_release_lock(ictx->op_work_queue, request_release, 0); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + managed_lock.reacquire_lock(nullptr); + + ASSERT_EQ(0, when_shut_down(managed_lock)); + ASSERT_FALSE(is_lock_owner(managed_lock)); +} + +TEST_F(TestMockManagedLock, ReacquireBlacklistedLock) { + librbd::ImageCtx *ictx; + ASSERT_EQ(0, open_image(m_image_name, &ictx)); + + MockManagedLockImageCtx mock_image_ctx(*ictx); + MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue, + ictx->header_oid, mock_image_ctx.image_watcher, + librbd::managed_lock::EXCLUSIVE, true, 0); + InSequence seq; + + MockAcquireRequest request_lock_acquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_acquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + expect_get_watch_handle(*mock_image_ctx.image_watcher, 0); + + MockReleaseRequest request_release; + expect_release_lock(ictx->op_work_queue, request_release, 0); + + MockAcquireRequest request_lock_reacquire; + expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, + request_lock_reacquire, 0); + ASSERT_EQ(0, when_acquire_lock(managed_lock)); + ASSERT_TRUE(is_lock_owner(managed_lock)); + + C_SaferCond reacquire_ctx; + managed_lock.reacquire_lock(&reacquire_ctx); + ASSERT_EQ(0, reacquire_ctx.wait()); + + MockReleaseRequest shutdown_release; + expect_release_lock(ictx->op_work_queue, shutdown_release, 0); + ASSERT_EQ(0, when_shut_down(managed_lock)); + ASSERT_FALSE(is_lock_owner(managed_lock)); +} + TEST_F(TestMockManagedLock, ReacquireLockError) { librbd::ImageCtx *ictx; ASSERT_EQ(0, open_image(m_image_name, &ictx)); @@ -553,6 +619,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) { MockReacquireRequest mock_reacquire_request; C_SaferCond reacquire_ctx; + expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765); expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, -EOPNOTSUPP); MockReleaseRequest reacquire_lock_release; @@ -562,7 +629,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) { expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, reacquire_lock_acquire, 0); managed_lock.reacquire_lock(&reacquire_ctx); - ASSERT_EQ(-EOPNOTSUPP, reacquire_ctx.wait()); + ASSERT_EQ(0, reacquire_ctx.wait()); MockReleaseRequest shutdown_release; expect_release_lock(ictx->op_work_queue, shutdown_release, 0); diff --git a/src/test/rbd_mirror/test_mock_LeaderWatcher.cc b/src/test/rbd_mirror/test_mock_LeaderWatcher.cc index 113eeec7d7f..e8b4326d890 100644 --- a/src/test/rbd_mirror/test_mock_LeaderWatcher.cc +++ b/src/test/rbd_mirror/test_mock_LeaderWatcher.cc @@ -127,7 +127,7 @@ struct ManagedLock { m_work_queue->queue(pre_release_ctx, 0); } - void reacquire_lock() { + void reacquire_lock(Context* on_finish) { MockManagedLock::get_instance().reacquire_lock(); } diff --git a/src/tools/rbd_mirror/LeaderWatcher.cc b/src/tools/rbd_mirror/LeaderWatcher.cc index 0e7cd20405b..4be445ba3da 100644 --- a/src/tools/rbd_mirror/LeaderWatcher.cc +++ b/src/tools/rbd_mirror/LeaderWatcher.cc @@ -1090,7 +1090,7 @@ template void LeaderWatcher::handle_rewatch_complete(int r) { dout(5) << "r=" << r << dendl; - m_leader_lock->reacquire_lock(); + m_leader_lock->reacquire_lock(nullptr); } template