]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
tools/cephfs_mirror: Fix lock order issue wip-khiremat-mulithread-mirror-66572-reviewed-1
authorKotresh HR <khiremat@redhat.com>
Sun, 15 Feb 2026 18:41:51 +0000 (00:11 +0530)
committerKotresh HR <khiremat@redhat.com>
Sat, 21 Feb 2026 16:34:18 +0000 (22:04 +0530)
Lock order 1:
InstanceWatcher::m_lock ----> FSMirror::m_lock
Lock order 2:
FSMirror::m_lock -----> InstanceWatcher::m_lock

The Lock order 1 is where it's aborted and it happens
during blocklisting. The InstanceWatcher::handle_rewatch_complete()
acquires InstanceWatcher::m_lock and calls
m_elistener.set_blocklisted_ts() which tries to acquire
FSMirror::m_lock

The Lock order 2 exists in mirror peer status command.
The FSMirror::mirror_status(Formatter *f) takes FSMirro::m_lock
and calls is_blocklisted which takes InstanceWatcher::m_lock

Fix:
FSMirror::m_blocklisted_ts and FSMirror::m_failed_ts is converted
to std::<atomic> and also fixed the scope of m_lock in
InstanceWatcher::handle_rewatch_complete() and
MirrorWatcher::handle_rewatch_complete()

Look at the tracker for traceback and further details.

Fixes: https://tracker.ceph.com/issues/74953
Signed-off-by: Kotresh HR <khiremat@redhat.com>
src/tools/cephfs_mirror/FSMirror.h
src/tools/cephfs_mirror/InstanceWatcher.cc
src/tools/cephfs_mirror/MirrorWatcher.cc

index 17f0f82164b0f5d0d1909e18b5c499fa8d0d991f..594049baa6bceedfd48f46ae3f39bda155fde8d2 100644 (file)
@@ -58,13 +58,11 @@ public:
   }
 
   monotime get_failed_ts() {
-    std::scoped_lock locker(m_lock);
-    return m_failed_ts;
+    return m_failed_ts.load(std::memory_order_relaxed);
   }
 
   void set_failed_ts() {
-    std::scoped_lock locker(m_lock);
-    m_failed_ts = clock::now();
+    m_failed_ts.store(clock::now(), std::memory_order_relaxed);
   }
 
   bool is_blocklisted() {
@@ -73,13 +71,11 @@ public:
   }
 
   monotime get_blocklisted_ts() {
-    std::scoped_lock locker(m_lock);
-    return m_blocklisted_ts;
+    return m_blocklisted_ts.load(std::memory_order_relaxed);
   }
 
   void set_blocklisted_ts() {
-    std::scoped_lock locker(m_lock);
-    m_blocklisted_ts = clock::now();
+    m_blocklisted_ts.store(clock::now(), std::memory_order_relaxed);
   }
 
   Peers get_peers() {
@@ -140,8 +136,8 @@ private:
     }
   };
 
-  monotime m_blocklisted_ts;
-  monotime m_failed_ts;
+  std::atomic<monotime> m_blocklisted_ts;
+  std::atomic<monotime> m_failed_ts;
   CephContext *m_cct;
   Filesystem m_filesystem;
   uint64_t m_pool_id;
index 3ea3906404cc5bf8ab984af0946464d8f2e4b416..8cd7214b5531c15def6315fd41ba1834fa905ca3 100644 (file)
@@ -115,8 +115,10 @@ void InstanceWatcher::handle_rewatch_complete(int r) {
 
   if (r == -EBLOCKLISTED) {
     dout(0) << ": client blocklisted" <<dendl;
-    std::scoped_lock locker(m_lock);
-    m_blocklisted = true;
+    {
+      std::scoped_lock locker(m_lock);
+      m_blocklisted = true;
+    }
     m_elistener.set_blocklisted_ts();
   } else if (r == -ENOENT) {
     derr << ": mirroring object deleted" << dendl;
index 52c760e750170c76dce752ce0c283fbb3b4c53ea..e260666c3bf765a72a41524fa25300aca57545d7 100644 (file)
@@ -91,8 +91,10 @@ void MirrorWatcher::handle_rewatch_complete(int r) {
 
   if (r == -EBLOCKLISTED) {
     dout(0) << ": client blocklisted" <<dendl;
-    std::scoped_lock locker(m_lock);
-    m_blocklisted = true;
+    {
+      std::scoped_lock locker(m_lock);
+      m_blocklisted = true;
+    }
     m_elistener.set_blocklisted_ts();
   } else if (r == -ENOENT) {
     derr << ": mirroring object deleted" << dendl;