]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
tools/cephfs_mirror: Fix dequeue of syncm on error
authorKotresh HR <khiremat@redhat.com>
Wed, 14 Jan 2026 19:59:36 +0000 (01:29 +0530)
committerKotresh HR <khiremat@redhat.com>
Tue, 17 Feb 2026 20:10:51 +0000 (01:40 +0530)
On error encountered in crawler thread or datasync
thread while processing a syncm object, it's possible
that multiple datasync threads attempts the dequeue of
syncm object. Though it's safe, add a condition to avoid
it.

Fixes: https://tracker.ceph.com/issues/73452
Signed-off-by: Kotresh HR <khiremat@redhat.com>
src/tools/cephfs_mirror/PeerReplayer.cc
src/tools/cephfs_mirror/PeerReplayer.h

index 75c221cb023c10f246aeae5ff0a394a18230edeb..15c8545fad9a86fb88b935a7fe3a8491b62e38af 100644 (file)
@@ -2217,6 +2217,10 @@ void PeerReplayer::run(SnapshotReplayerThread *replayer) {
   }
 }
 
+bool PeerReplayer::is_syncm_active(const std::shared_ptr<PeerReplayer::SyncMechanism>& syncm_obj) {
+    return std::find(syncm_q.begin(), syncm_q.end(), syncm_obj) != syncm_q.end();
+}
+
 void PeerReplayer::remove_syncm(const std::shared_ptr<PeerReplayer::SyncMechanism>& syncm_obj)
 {
     // caller holds lock
@@ -2343,16 +2347,20 @@ void PeerReplayer::run_datasync(SnapshotDataSyncThread *data_replayer) {
         syncm->get_datasync_error_unlocked() ||
         syncm->get_crawl_error_unlocked();
       if (!syncm_q.empty() && no_in_flight_syncm_jobs && (crawl_finished || sync_error)) {
-        dout(20) << ": Dequeue syncm object=" << syncm << dendl;
-        syncm->set_snapshot_unlocked();
-        syncm->sdq_cv_notify_all_unlocked(); // To wake up crawler thread waiting to take snapshot
-        if (syncm_q.front() == syncm) {
-          syncm_q.pop_front();
-        } else { // if syncms in the middle finishes first
-          remove_syncm(syncm);
-        }
-        dout(20) << ": syncm_q after removal " << syncm_q << dendl;
-        smq_cv.notify_all();
+        if (sync_error && !is_syncm_active(syncm)){
+          dout(20) << ": syncm object=" << syncm << " already dequeued" << dendl;
+        } else {
+          dout(20) << ": Dequeue syncm object=" << syncm << dendl;
+          syncm->set_snapshot_unlocked();
+          syncm->sdq_cv_notify_all_unlocked(); // To wake up crawler thread waiting to take snapshot
+          if (syncm_q.front() == syncm) {
+            syncm_q.pop_front();
+          } else { // if syncms in the middle finishes first
+            remove_syncm(syncm);
+          }
+          dout(20) << ": syncm_q after removal " << syncm_q << dendl;
+          smq_cv.notify_all();
+       }
       }
     }
 
index 477134bddacdef42b4d7e702fb884a35d76c5ed1..98c94a4838e7ba941259bed11fdef6f0b05c3e6f 100644 (file)
@@ -488,6 +488,7 @@ private:
   void run(SnapshotReplayerThread *replayer);
   void run_datasync(SnapshotDataSyncThread *data_replayer);
   void remove_syncm(const std::shared_ptr<SyncMechanism>& syncm_obj);
+  bool is_syncm_active(const std::shared_ptr<SyncMechanism>& syncm_obj);
   std::shared_ptr<SyncMechanism> pick_next_syncm() const;
 
   boost::optional<std::string> pick_directory();