]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
librbd: reacquire lock should properly handle failed watcher
authorJason Dillaman <dillaman@redhat.com>
Thu, 6 Sep 2018 17:38:17 +0000 (13:38 -0400)
committerJason Dillaman <dillaman@redhat.com>
Wed, 3 Oct 2018 15:25:05 +0000 (11:25 -0400)
If the watch has been lost, assume the lock has been lost but attempt
to reacquire it if and when the watch is re-established.

Signed-off-by: Jason Dillaman <dillaman@redhat.com>
(cherry picked from commit 2057d99f451e3007d4fd05a88faa968319d0ba90)

Conflicts:
src/librbd/ManagedLock.cc: trivial resolution

src/librbd/ImageWatcher.cc
src/librbd/ManagedLock.cc
src/librbd/ManagedLock.h
src/test/librbd/mock/MockExclusiveLock.h
src/test/librbd/test_mock_ManagedLock.cc
src/test/rbd_mirror/test_mock_LeaderWatcher.cc
src/tools/rbd_mirror/LeaderWatcher.cc

index daac18868f71d04f22126a1322f1dbf302b0cad0..d9ed8ee613c1c370b73c1a3a6708b74f4d01cb3e 100644 (file)
@@ -979,7 +979,7 @@ void ImageWatcher<I>::handle_rewatch_complete(int r) {
     RWLock::RLocker owner_locker(m_image_ctx.owner_lock);
     if (m_image_ctx.exclusive_lock != nullptr) {
       // update the lock cookie with the new watch handle
-      m_image_ctx.exclusive_lock->reacquire_lock();
+      m_image_ctx.exclusive_lock->reacquire_lock(nullptr);
     }
   }
 
index 86b63c062c364a8e1d23dc7c6ee1f1b5fdf3cb84..38fd3aa9d79b81d181db110a3067324b9455c814 100644 (file)
@@ -132,7 +132,7 @@ void ManagedLock<I>::shut_down(Context *on_shut_down) {
 
   if (m_state == STATE_WAITING_FOR_REGISTER) {
     // abort stalled acquire lock state
-    ldout(m_cct, 10) << "woke up waiting acquire" << dendl;
+    ldout(m_cct, 10) << "woke up waiting (re)acquire" << dendl;
     Action active_action = get_active_action();
     assert(active_action == ACTION_TRY_LOCK ||
            active_action == ACTION_ACQUIRE_LOCK);
@@ -467,14 +467,20 @@ void ManagedLock<I>::send_acquire_lock() {
   }
 
   ldout(m_cct, 10) << dendl;
-  m_state = STATE_ACQUIRING;
 
   uint64_t watch_handle = m_watcher->get_watch_handle();
   if (watch_handle == 0) {
     lderr(m_cct) << "watcher not registered - delaying request" << dendl;
     m_state = STATE_WAITING_FOR_REGISTER;
+
+    // shut down might race w/ release/re-acquire of the lock
+    if (is_state_shutdown()) {
+      complete_active_action(STATE_UNLOCKED, -ESHUTDOWN);
+    }
     return;
   }
+
+  m_state = STATE_ACQUIRING;
   m_cookie = encode_lock_cookie(watch_handle);
 
   m_work_queue->queue(new FunctionContext([this](int r) {
@@ -522,13 +528,6 @@ void ManagedLock<I>::handle_acquire_lock(int r) {
   }));
 }
 
-template <typename I>
-void ManagedLock<I>::handle_no_op_reacquire_lock(int r) {
-  ldout(m_cct, 10) << "r=" << r << dendl;
-  assert(r >= 0);
-  complete_active_action(STATE_LOCKED, 0);
-}
-
 template <typename I>
 void ManagedLock<I>::handle_post_acquire_lock(int r) {
   ldout(m_cct, 10) << ": r=" << r << dendl;
@@ -568,13 +567,20 @@ void ManagedLock<I>::send_reacquire_lock() {
     return;
   }
 
+  ldout(m_cct, 10) << dendl;
+  m_state = STATE_REACQUIRING;
+
   uint64_t watch_handle = m_watcher->get_watch_handle();
   if (watch_handle == 0) {
-     // watch (re)failed while recovering
-     lderr(m_cct) << ": aborting reacquire due to invalid watch handle"
-                  << dendl;
-     complete_active_action(STATE_LOCKED, 0);
-     return;
+    // watch (re)failed while recovering
+    lderr(m_cct) << ": aborting reacquire due to invalid watch handle"
+                 << dendl;
+
+    // treat double-watch failure as a lost lock and invoke the
+    // release/acquire handlers
+    release_acquire_lock();
+    complete_active_action(STATE_LOCKED, 0);
+    return;
   }
 
   m_new_cookie = encode_lock_cookie(watch_handle);
@@ -587,9 +593,6 @@ void ManagedLock<I>::send_reacquire_lock() {
     return;
   }
 
-  ldout(m_cct, 10) << dendl;
-  m_state = STATE_REACQUIRING;
-
   auto ctx = create_context_callback<
     ManagedLock, &ManagedLock<I>::handle_reacquire_lock>(this);
   ctx = new FunctionContext([this, ctx](int r) {
@@ -617,36 +620,44 @@ void ManagedLock<I>::handle_reacquire_lock(int r) {
                    << dendl;
     }
 
-    if (!is_state_shutdown()) {
-      // queue a release and re-acquire of the lock since cookie cannot
-      // be updated on older OSDs
-      execute_action(ACTION_RELEASE_LOCK, nullptr);
-
-      assert(!m_actions_contexts.empty());
-      ActionContexts &action_contexts(m_actions_contexts.front());
-
-      // reacquire completes when the request lock completes
-      Contexts contexts;
-      std::swap(contexts, action_contexts.second);
-      if (contexts.empty()) {
-        execute_action(ACTION_ACQUIRE_LOCK, nullptr);
-      } else {
-        for (auto ctx : contexts) {
-          ctx = new FunctionContext([ctx, r](int acquire_ret_val) {
-              if (acquire_ret_val >= 0) {
-                acquire_ret_val = r;
-              }
-              ctx->complete(acquire_ret_val);
-            });
-          execute_action(ACTION_ACQUIRE_LOCK, ctx);
-        }
-      }
-    }
+    release_acquire_lock();
   } else {
     m_cookie = m_new_cookie;
   }
 
-  complete_active_action(STATE_LOCKED, r);
+  complete_active_action(STATE_LOCKED, 0);
+}
+
+template <typename I>
+void ManagedLock<I>::handle_no_op_reacquire_lock(int r) {
+  ldout(m_cct, 10) << "r=" << r << dendl;
+  assert(r >= 0);
+  complete_active_action(STATE_LOCKED, 0);
+}
+
+template <typename I>
+void ManagedLock<I>::release_acquire_lock() {
+  assert(m_lock.is_locked());
+
+  if (!is_state_shutdown()) {
+    // queue a release and re-acquire of the lock since cookie cannot
+    // be updated on older OSDs
+    execute_action(ACTION_RELEASE_LOCK, nullptr);
+
+    ceph_assert(!m_actions_contexts.empty());
+    ActionContexts &action_contexts(m_actions_contexts.front());
+
+    // reacquire completes when the request lock completes
+    Contexts contexts;
+    std::swap(contexts, action_contexts.second);
+    if (contexts.empty()) {
+      execute_action(ACTION_ACQUIRE_LOCK, nullptr);
+    } else {
+      for (auto ctx : contexts) {
+        execute_action(ACTION_ACQUIRE_LOCK, ctx);
+      }
+    }
+  }
 }
 
 template <typename I>
index fdb6d43d78c417e32a583e928fa28ea8707f9504..a152adfc84d6207048353db9b0bc2081228f7c5f 100644 (file)
@@ -55,7 +55,7 @@ public:
   void acquire_lock(Context *on_acquired);
   void try_acquire_lock(Context *on_acquired);
   void release_lock(Context *on_released);
-  void reacquire_lock(Context *on_reacquired = nullptr);
+  void reacquire_lock(Context *on_reacquired);
   void get_locker(managed_lock::Locker *locker, Context *on_finish);
   void break_lock(const managed_lock::Locker &locker, bool force_break_lock,
                   Context *on_finish);
@@ -247,6 +247,7 @@ private:
 
   void send_reacquire_lock();
   void handle_reacquire_lock(int r);
+  void release_acquire_lock();
 
   void send_release_lock();
   void handle_pre_release_lock(int r);
index b73ab02c709ef6c8300eeab0738d081e982126a9..4c9d2995891b24d84d2acd37e443f223eaf38ff9 100644 (file)
@@ -18,7 +18,7 @@ struct MockExclusiveLock {
   MOCK_METHOD2(init, void(uint64_t features, Context*));
   MOCK_METHOD1(shut_down, void(Context*));
 
-  MOCK_METHOD0(reacquire_lock, void());
+  MOCK_METHOD1(reacquire_lock, void(Context*));
   MOCK_METHOD1(try_acquire_lock, void(Context*));
 
   MOCK_METHOD1(block_requests, void(int));
index 8668485acc273210cf5d54348ce387432d3f8c2d..a1e955a6a658ed7e4b9a6d8694fe9e6a53c4134e 100644 (file)
@@ -195,7 +195,6 @@ public:
                              ContextWQ *work_queue,
                              MockReacquireRequest &mock_reacquire_request,
                              int r) {
-    expect_get_watch_handle(watcher, 98765);
     EXPECT_CALL(mock_reacquire_request, send())
                   .WillOnce(QueueRequest(&mock_reacquire_request, r, work_queue));
   }
@@ -523,6 +522,7 @@ TEST_F(TestMockManagedLock, ReacquireLock) {
 
   MockReacquireRequest mock_reacquire_request;
   C_SaferCond reacquire_ctx;
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765);
   expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, 0);
   managed_lock.reacquire_lock(&reacquire_ctx);
   ASSERT_EQ(0, reacquire_ctx.wait());
@@ -533,6 +533,72 @@ TEST_F(TestMockManagedLock, ReacquireLock) {
   ASSERT_FALSE(is_lock_owner(managed_lock));
 }
 
+TEST_F(TestMockManagedLock, AttemptReacquireBlacklistedLock) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockManagedLockImageCtx mock_image_ctx(*ictx);
+  MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue,
+                               ictx->header_oid, mock_image_ctx.image_watcher,
+                               librbd::managed_lock::EXCLUSIVE, true, 0);
+  InSequence seq;
+
+  MockAcquireRequest request_lock_acquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_acquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  MockReleaseRequest request_release;
+  expect_release_lock(ictx->op_work_queue, request_release, 0);
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  managed_lock.reacquire_lock(nullptr);
+
+  ASSERT_EQ(0, when_shut_down(managed_lock));
+  ASSERT_FALSE(is_lock_owner(managed_lock));
+}
+
+TEST_F(TestMockManagedLock, ReacquireBlacklistedLock) {
+  librbd::ImageCtx *ictx;
+  ASSERT_EQ(0, open_image(m_image_name, &ictx));
+
+  MockManagedLockImageCtx mock_image_ctx(*ictx);
+  MockManagedLock managed_lock(ictx->md_ctx, ictx->op_work_queue,
+                               ictx->header_oid, mock_image_ctx.image_watcher,
+                               librbd::managed_lock::EXCLUSIVE, true, 0);
+  InSequence seq;
+
+  MockAcquireRequest request_lock_acquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_acquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 0);
+
+  MockReleaseRequest request_release;
+  expect_release_lock(ictx->op_work_queue, request_release, 0);
+
+  MockAcquireRequest request_lock_reacquire;
+  expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue,
+                      request_lock_reacquire, 0);
+  ASSERT_EQ(0, when_acquire_lock(managed_lock));
+  ASSERT_TRUE(is_lock_owner(managed_lock));
+
+  C_SaferCond reacquire_ctx;
+  managed_lock.reacquire_lock(&reacquire_ctx);
+  ASSERT_EQ(0, reacquire_ctx.wait());
+
+  MockReleaseRequest shutdown_release;
+  expect_release_lock(ictx->op_work_queue, shutdown_release, 0);
+  ASSERT_EQ(0, when_shut_down(managed_lock));
+  ASSERT_FALSE(is_lock_owner(managed_lock));
+}
+
 TEST_F(TestMockManagedLock, ReacquireLockError) {
   librbd::ImageCtx *ictx;
   ASSERT_EQ(0, open_image(m_image_name, &ictx));
@@ -550,6 +616,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) {
 
   MockReacquireRequest mock_reacquire_request;
   C_SaferCond reacquire_ctx;
+  expect_get_watch_handle(*mock_image_ctx.image_watcher, 98765);
   expect_reacquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, mock_reacquire_request, -EOPNOTSUPP);
 
   MockReleaseRequest reacquire_lock_release;
@@ -559,7 +626,7 @@ TEST_F(TestMockManagedLock, ReacquireLockError) {
   expect_acquire_lock(*mock_image_ctx.image_watcher, ictx->op_work_queue, reacquire_lock_acquire, 0);
 
   managed_lock.reacquire_lock(&reacquire_ctx);
-  ASSERT_EQ(-EOPNOTSUPP, reacquire_ctx.wait());
+  ASSERT_EQ(0, reacquire_ctx.wait());
 
   MockReleaseRequest shutdown_release;
   expect_release_lock(ictx->op_work_queue, shutdown_release, 0);
index 2588db43c6d8e387aacd7bef5a0b22b379c23801..a8ba98956a227a8c65b6956a0f8a5936fde13c1e 100644 (file)
@@ -127,7 +127,7 @@ struct ManagedLock<MockTestImageCtx> {
     m_work_queue->queue(pre_release_ctx, 0);
   }
 
-  void reacquire_lock() {
+  void reacquire_lock(Context* on_finish) {
     MockManagedLock::get_instance().reacquire_lock();
   }
 
index 51351e0b8d04bf1de6201b09df62cc5e287dbee1..4aadeb15db41c6c5fde0667cecb6ec099c548188 100644 (file)
@@ -1093,7 +1093,7 @@ template <typename I>
 void LeaderWatcher<I>::handle_rewatch_complete(int r) {
   dout(5) << "r=" << r << dendl;
 
-  m_leader_lock->reacquire_lock();
+  m_leader_lock->reacquire_lock(nullptr);
 }
 
 template <typename I>