From f540d1d6205df00e4c9a880dc267c2824a81402d Mon Sep 17 00:00:00 2001 From: Kotresh HR Date: Sat, 21 Feb 2026 16:06:31 +0530 Subject: [PATCH] tools/cephfs_mirror: Fix dequeue of syncm on error On error encountered in crawler thread or datasync thread while processing a syncm object, it's possible that multiple datasync threads attempts the dequeue of syncm object. Though it's safe, add a condition to avoid it. Fixes: https://tracker.ceph.com/issues/73452 Signed-off-by: Kotresh HR --- src/tools/cephfs_mirror/PeerReplayer.cc | 24 ++++++++++++++++-------- src/tools/cephfs_mirror/PeerReplayer.h | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/tools/cephfs_mirror/PeerReplayer.cc b/src/tools/cephfs_mirror/PeerReplayer.cc index 22bb09e55c1..04759b1372a 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.cc +++ b/src/tools/cephfs_mirror/PeerReplayer.cc @@ -2217,6 +2217,10 @@ void PeerReplayer::run(SnapshotReplayerThread *replayer) { } } +bool PeerReplayer::is_syncm_active(const std::shared_ptr& syncm_obj) { + return std::find(syncm_q.begin(), syncm_q.end(), syncm_obj) != syncm_q.end(); +} + void PeerReplayer::remove_syncm(const std::shared_ptr& syncm_obj) { // caller holds lock @@ -2342,15 +2346,19 @@ void PeerReplayer::run_datasync(SnapshotDataSyncThread *data_replayer) { syncm->get_datasync_error_unlocked() || syncm->get_crawl_error_unlocked(); if (!syncm_q.empty() && no_in_flight_syncm_jobs && (crawl_finished || sync_error)) { - dout(20) << ": Dequeue syncm object=" << syncm << dendl; - syncm->set_sync_finished_and_notify_unlocked(); // To wake up crawler thread waiting to take snapshot - if (syncm_q.front() == syncm) { - syncm_q.pop_front(); - } else { // if syncms in the middle finishes first - remove_syncm(syncm); + if (sync_error && !is_syncm_active(syncm)){ + dout(20) << ": syncm object=" << syncm << " already dequeued" << dendl; + } else { + dout(20) << ": Dequeue syncm object=" << syncm << dendl; + syncm->set_sync_finished_and_notify_unlocked(); // To wake up crawler thread waiting to take snapshot + if (syncm_q.front() == syncm) { + syncm_q.pop_front(); + } else { // if syncms in the middle finishes first + remove_syncm(syncm); + } + dout(20) << ": syncm_q after removal " << syncm_q << dendl; + smq_cv.notify_all(); } - dout(20) << ": syncm_q after removal " << syncm_q << dendl; - smq_cv.notify_all(); } } diff --git a/src/tools/cephfs_mirror/PeerReplayer.h b/src/tools/cephfs_mirror/PeerReplayer.h index f60dc06d647..38282c2c261 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.h +++ b/src/tools/cephfs_mirror/PeerReplayer.h @@ -489,6 +489,7 @@ private: void run(SnapshotReplayerThread *replayer); void run_datasync(SnapshotDataSyncThread *data_replayer); void remove_syncm(const std::shared_ptr& syncm_obj); + bool is_syncm_active(const std::shared_ptr& syncm_obj); std::shared_ptr pick_next_syncm() const; boost::optional pick_directory(); -- 2.47.3