]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: reacquire lock should properly handle failed watcher
authorJason Dillaman <dillaman@redhat.com>
Thu, 6 Sep 2018 17:38:17 +0000 (13:38 -0400)
committerJason Dillaman <dillaman@redhat.com>
Wed, 19 Sep 2018 18:52:48 +0000 (14:52 -0400)
If the watch has been lost, assume the lock has been lost but attempt
to reacquire it if and when the watch is re-established.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
src/librbd/ImageWatcher.cc
src/librbd/ManagedLock.cc
src/librbd/ManagedLock.h
src/test/librbd/mock/MockExclusiveLock.h
src/test/librbd/test_mock_ManagedLock.cc
src/test/rbd_mirror/test_mock_LeaderWatcher.cc
src/tools/rbd_mirror/LeaderWatcher.cc

index cfa08173d656336aa7c03184ea13d9280903a7d8..fba8025fcc4d3525f7f26d754f5d97ad9ef5537f 100644 (file)
@@ -1023,7 +1023,7 @@ void ImageWatcher<I>::handle_rewatch_complete(int r) {
     RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
     if (m_image_ctx.exclusive_lock != nullptr) {
       // update the lock cookie with the new watch handle
-      m_image_ctx.exclusive_lock->reacquire_lock();
+      m_image_ctx.exclusive_lock->reacquire_lock(nullptr);
     }
   }
 
index 433c5964471cd247e36ac6719087bb8a53ec014b..1e4ced6095c1575b7667088f03c17d6c5d54327a 100644 (file)
@@ -132,7 +132,7 @@ void ManagedLock<I>::shut_down(Context *on_shut_down) {
 
   if (m_state == STATE_WAITING_FOR_REGISTER) {
     // abort stalled acquire lock state
-    ldout(m_cct, 10) << "woke up waiting acquire" << dendl;
+    ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl;
     Action active_action = get_active_action();
     ceph_assert(active_action == ACTION_TRY_LOCK ||
                 active_action == ACTION_ACQUIRE_LOCK);
@@ -206,7 +206,7 @@ void ManagedLock<I>::reacquire_lock(Context *on_reacquired) {
 
     if (m_state == STATE_WAITING_FOR_REGISTER) {
       // restart the acquire lock process now that watch is valid
-      ldout(m_cct, 10) << "woke up waiting acquire" << dendl;
+      ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl;
       Action active_action = get_active_action();
       ceph_assert(active_action == ACTION_TRY_LOCK ||
                   active_action == ACTION_ACQUIRE_LOCK);
@@ -467,14 +467,20 @@ void ManagedLock<I>::send_acquire_lock() {
   }
 
   ldout(m_cct, 10) << dendl;
-  m_state = STATE_ACQUIRING;
 
   uint64_t watch_handle = m_watcher->get_watch_handle();
   if (watch_handle == 0) {
     lderr(m_cct) << "watcher not registered - delaying request" << dendl;
     m_state = STATE_WAITING_FOR_REGISTER;
+
+    // shut down might race w/ release/re-acquire of the lock
+    if (is_state_shutdown()) {
+      complete_active_action(STATE_UNLOCKED, -ESHUTDOWN);
+    }
     return;
   }
+
+  m_state = STATE_ACQUIRING;
   m_cookie = encode_lock_cookie(watch_handle);
 
   m_work_queue->queue(new FunctionContext([this](int r) {
@@ -522,13 +528,6 @@ void ManagedLock<I>::handle_acquire_lock(int r) {
   }));
 }
 
-template <typename I>
-void ManagedLock<I>::handle_no_op_reacquire_lock(int r) {
-  ldout(m_cct, 10) << "r=" << r << dendl;
-  ceph_assert(r >= 0);
-  complete_active_action(STATE_LOCKED, 0);
-}
-
 template <typename I>
 void ManagedLock<I>::handle_post_acquire_lock(int r) {
   ldout(m_cct, 10) << "r=" << r << dendl;
@@ -568,13 +567,20 @@ void ManagedLock<I>::send_reacquire_lock() {
     return;
   }
 
+  ldout(m_cct, 10) << dendl;
+  m_state = STATE_REACQUIRING;
+
   uint64_t watch_handle = m_watcher->get_watch_handle();
   if (watch_handle == 0) {
-     // watch (re)failed while recovering
-     lderr(m_cct) << "aborting reacquire due to invalid watch handle"
-                  << dendl;
-     complete_active_action(STATE_LOCKED, 0);
-     return;
+    // watch (re)failed while recovering
+    lderr(m_cct) << "aborting reacquire due to invalid watch handle"
+                 << dendl;
+
+    // treat double-watch failure as a lost lock and invoke the
+    // release/acquire handlers
+    release_acquire_lock();
+    complete_active_action(STATE_LOCKED, 0);
+    return;
   }
 
   m_new_cookie = encode_lock_cookie(watch_handle);
@@ -587,9 +593,6 @@ void ManagedLock<I>::send_reacquire_lock() {
     return;
   }
 
-  ldout(m_cct, 10) << dendl;
-  m_state = STATE_REACQUIRING;
-
   auto ctx = create_context_callback<
     ManagedLock, &ManagedLock<I>::handle_reacquire_lock>(this);
   ctx = new FunctionContext([this, ctx](int r) {
@@ -617,36 +620,45 @@ void ManagedLock<I>::handle_reacquire_lock(int r) {
                    << dendl;
     }
 
-    if (!is_state_shutdown()) {
-      // queue a release and re-acquire of the lock since cookie cannot
-      // be updated on older OSDs
-      execute_action(ACTION_RELEASE_LOCK, nullptr);
-
-      ceph_assert(!m_actions_contexts.empty());
-      ActionContexts &action_contexts(m_actions_contexts.front());
-
-      // reacquire completes when the request lock completes
-      Contexts contexts;
-      std::swap(contexts, action_contexts.second);
-      if (contexts.empty()) {
-        execute_action(ACTION_ACQUIRE_LOCK, nullptr);
-      } else {
-        for (auto ctx : contexts) {
-          ctx = new FunctionContext([ctx, r](int acquire_ret_val) {
-              if (acquire_ret_val >= 0) {
-                acquire_ret_val = r;
-              }
-              ctx->complete(acquire_ret_val);
-            });
-          execute_action(ACTION_ACQUIRE_LOCK, ctx);
-        }
-      }
-    }
+    release_acquire_lock();
   } else {
     m_cookie = m_new_cookie;
   }
 
-  complete_active_action(STATE_LOCKED, r);
+  complete_active_action(STATE_LOCKED, 0);
+}
+
+template <typename I>
+void ManagedLock<I>::handle_no_op_reacquire_lock(int r) {
+  ldout(m_cct, 10) << "r=" << r << dendl;
+  ceph_assert(m_state == STATE_REACQUIRING);
+  ceph_assert(r >= 0);
+  complete_active_action(STATE_LOCKED, 0);
+}
+
+template <typename I>
+void ManagedLock<I>::release_acquire_lock() {
+  assert(m_lock.is_locked());
+
+  if (!is_state_shutdown()) {
+    // queue a release and re-acquire of the lock since cookie cannot
+    // be updated on older OSDs
+    execute_action(ACTION_RELEASE_LOCK, nullptr);
+
+    ceph_assert(!m_actions_contexts.empty());
+    ActionContexts &action_contexts(m_actions_contexts.front());
+
+    // reacquire completes when the request lock completes
+    Contexts contexts;
+    std::swap(contexts, action_contexts.second);
+    if (contexts.empty()) {
+      execute_action(ACTION_ACQUIRE_LOCK, nullptr);
+    } else {
+      for (auto ctx : contexts) {
+        execute_action(ACTION_ACQUIRE_LOCK, ctx);
+      }
+    }
+  }
 }
 
 template <typename I>
index bbd932b744c36ac34fbb86cd5eeabac49d19367f..b27e549dab470db0c7a09230306f7cd5bddd55a2 100644 (file)
@@ -55,7 +55,7 @@ public:
   void acquire_lock(Context *on_acquired);
   void try_acquire_lock(Context *on_acquired);
   void release_lock(Context *on_released);
-  void reacquire_lock(Context *on_reacquired = nullptr);
+  void reacquire_lock(Context *on_reacquired);
   void get_locker(managed_lock::Locker *locker, Context *on_finish);
   void break_lock(const managed_lock::Locker &locker, bool force_break_lock,
                   Context *on_finish);
@@ -247,6 +247,7 @@ private:
 
   void send_reacquire_lock();
   void handle_reacquire_lock(int r);
+  void release_acquire_lock();
 
   void send_release_lock();
   void handle_pre_release_lock(int r);
index b73ab02c709ef6c8300eeab0738d081e982126a9..4c9d2995891b24d84d2acd37e443f223eaf38ff9 100644 (file)
@@ -18,7 +18,7 @@ struct MockExclusiveLock {
   MOCK_METHOD2(init, void(uint64_t features, Context*));
   MOCK_METHOD1(shut_down, void(Context*));
 
-  MOCK_METHOD0(reacquire_lock, void());
+  MOCK_METHOD1(reacquire_lock, void(Context*));
   MOCK_METHOD1(try_acquire_lock, void(Context*));
 
   MOCK_METHOD1(block_requests, void(int));
index 237025788c8b61675a77b30ce93091470cd1e16a..921d9f50442bb0a14d06a2d85c7c8e156b691330 100644 (file)
@@ -199,7 +199,6 @@ public:
                              ContextWQ *work_queue,
                              MockReacquireRequest &mock_reacquire_request,
                              int r) {
-    expect_get_watch_handle(watcher, 98765);
     EXPECT_CALL(mock_reacquire_request, send())
                   .WillOnce(QueueRequest(&mock_reacquire_request, r, work_queue));
   }
@@ -526,6 +525,7 @@ TEST_F(TestMockManagedLock, ReacquireLock) {
 
   MockReacquireRequest mock_reacquire_request;
   C_SaferCond reacquire_ctx;
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765);
   expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, 0);
   managed_lock.reacquire_lock(&reacquire_ctx);
   ASSERT_EQ(0, reacquire_ctx.wait());
@@ -536,6 +536,72 @@ TEST_F(TestMockManagedLock, ReacquireLock) {
   ASSERT_FALSE(is_lock_owner(managed_lock));
 }
 
+TEST_F(TestMockManagedLock, AttemptReacquireBlacklistedLock) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockManagedLockImageCtx mock_image_ctx(*ictx);
+  MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue,
+                               ictx->header_oid, mock_image_ctx.image_watcher,
+                               librbd::managed_lock::EXCLUSIVE, true, 0);
+  InSequence seq;
+
+  MockAcquireRequest request_lock_acquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_acquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  MockReleaseRequest request_release;
+  expect_release_lock(ictx->op_work_queue, request_release, 0);
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  managed_lock.reacquire_lock(nullptr);
+
+  ASSERT_EQ(0, when_shut_down(managed_lock));
+  ASSERT_FALSE(is_lock_owner(managed_lock));
+}
+
+TEST_F(TestMockManagedLock, ReacquireBlacklistedLock) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockManagedLockImageCtx mock_image_ctx(*ictx);
+  MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue,
+                               ictx->header_oid, mock_image_ctx.image_watcher,
+                               librbd::managed_lock::EXCLUSIVE, true, 0);
+  InSequence seq;
+
+  MockAcquireRequest request_lock_acquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_acquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  MockReleaseRequest request_release;
+  expect_release_lock(ictx->op_work_queue, request_release, 0);
+
+  MockAcquireRequest request_lock_reacquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_reacquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  C_SaferCond reacquire_ctx;
+  managed_lock.reacquire_lock(&reacquire_ctx);
+  ASSERT_EQ(0, reacquire_ctx.wait());
+
+  MockReleaseRequest shutdown_release;
+  expect_release_lock(ictx->op_work_queue, shutdown_release, 0);
+  ASSERT_EQ(0, when_shut_down(managed_lock));
+  ASSERT_FALSE(is_lock_owner(managed_lock));
+}
+
 TEST_F(TestMockManagedLock, ReacquireLockError) {
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
@@ -553,6 +619,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) {
 
   MockReacquireRequest mock_reacquire_request;
   C_SaferCond reacquire_ctx;
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765);
   expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, -EOPNOTSUPP);
 
   MockReleaseRequest reacquire_lock_release;
@@ -562,7 +629,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) {
   expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, reacquire_lock_acquire, 0);
 
   managed_lock.reacquire_lock(&reacquire_ctx);
-  ASSERT_EQ(-EOPNOTSUPP, reacquire_ctx.wait());
+  ASSERT_EQ(0, reacquire_ctx.wait());
 
   MockReleaseRequest shutdown_release;
   expect_release_lock(ictx->op_work_queue, shutdown_release, 0);
index 113eeec7d7f9f72d9b2c2a7e9f82fd87211dbe91..e8b4326d890f077bf5d6f9f6bc618bbdd1ded7af 100644 (file)
@@ -127,7 +127,7 @@ struct ManagedLock<MockTestImageCtx> {
     m_work_queue->queue(pre_release_ctx, 0);
   }
 
-  void reacquire_lock() {
+  void reacquire_lock(Context* on_finish) {
     MockManagedLock::get_instance().reacquire_lock();
   }
 
index 0e7cd20405ba6bf8b1be0a45e2c548a88da6261d..4be445ba3da1a62134b6b1ef260ae1a3e58cbf6b 100644 (file)
@@ -1090,7 +1090,7 @@ template <typename I>
 void LeaderWatcher<I>::handle_rewatch_complete(int r) {
   dout(5) << "r=" << r << dendl;
 
-  m_leader_lock->reacquire_lock();
+  m_leader_lock->reacquire_lock(nullptr);
 }
 
 template <typename I>