]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph-ci.git/commitdiff
tools/cephfs_mirror: Fix dequeue of syncm on error
authorKotresh HR <khiremat@redhat.com>
Sat, 21 Feb 2026 10:36:31 +0000 (16:06 +0530)
committerKotresh HR <khiremat@redhat.com>
Sat, 21 Feb 2026 20:12:39 +0000 (01:42 +0530)
On error encountered in crawler thread or datasync
thread while processing a syncm object, it's possible
that multiple datasync threads attempts the dequeue of
syncm object. Though it's safe, add a condition to avoid
it.

Fixes: https://tracker.ceph.com/issues/73452
Signed-off-by: Kotresh HR <khiremat@redhat.com>
src/tools/cephfs_mirror/PeerReplayer.cc
src/tools/cephfs_mirror/PeerReplayer.h

index 22bb09e55c16e2187c77973d2ed5d427d943b839..04759b1372a27faa632840d56ced11d850745a3f 100644 (file)
@@ -2217,6 +2217,10 @@ void PeerReplayer::run(SnapshotReplayerThread *replayer) {
   }
 }
 
+bool PeerReplayer::is_syncm_active(const std::shared_ptr<PeerReplayer::SyncMechanism>& syncm_obj) {
+    return std::find(syncm_q.begin(), syncm_q.end(), syncm_obj) != syncm_q.end();
+}
+
 void PeerReplayer::remove_syncm(const std::shared_ptr<PeerReplayer::SyncMechanism>& syncm_obj)
 {
     // caller holds lock
@@ -2342,15 +2346,19 @@ void PeerReplayer::run_datasync(SnapshotDataSyncThread *data_replayer) {
         syncm->get_datasync_error_unlocked() ||
         syncm->get_crawl_error_unlocked();
       if (!syncm_q.empty() && no_in_flight_syncm_jobs && (crawl_finished || sync_error)) {
-        dout(20) << ": Dequeue syncm object=" << syncm << dendl;
-        syncm->set_sync_finished_and_notify_unlocked(); // To wake up crawler thread waiting to take snapshot
-        if (syncm_q.front() == syncm) {
-          syncm_q.pop_front();
-        } else { // if syncms in the middle finishes first
-          remove_syncm(syncm);
+        if (sync_error && !is_syncm_active(syncm)){
+          dout(20) << ": syncm object=" << syncm << " already dequeued" << dendl;
+        } else {
+          dout(20) << ": Dequeue syncm object=" << syncm << dendl;
+          syncm->set_sync_finished_and_notify_unlocked(); // To wake up crawler thread waiting to take snapshot
+          if (syncm_q.front() == syncm) {
+            syncm_q.pop_front();
+          } else { // if syncms in the middle finishes first
+            remove_syncm(syncm);
+          }
+          dout(20) << ": syncm_q after removal " << syncm_q << dendl;
+          smq_cv.notify_all();
         }
-        dout(20) << ": syncm_q after removal " << syncm_q << dendl;
-        smq_cv.notify_all();
       }
     }
 
index f60dc06d647ec5577ad1df6f8e5fd669a929e8a1..38282c2c261e01f5c4ebbd16893b51acebca06af 100644 (file)
@@ -489,6 +489,7 @@ private:
   void run(SnapshotReplayerThread *replayer);
   void run_datasync(SnapshotDataSyncThread *data_replayer);
   void remove_syncm(const std::shared_ptr<SyncMechanism>& syncm_obj);
+  bool is_syncm_active(const std::shared_ptr<SyncMechanism>& syncm_obj);
   std::shared_ptr<SyncMechanism> pick_next_syncm() const;
 
   boost::optional<std::string> pick_directory();