From: Kotresh HR Date: Wed, 14 Jan 2026 19:59:36 +0000 (+0530) Subject: tools/cephfs_mirror: Fix dequeue of syncm on error X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=daff22f2738bb7fdb9136af528ade0ef26ad5cda;p=ceph-ci.git tools/cephfs_mirror: Fix dequeue of syncm on error On error encountered in crawler thread or datasync thread while processing a syncm object, it's possible that multiple datasync threads attempts the dequeue of syncm object. Though it's safe, add a condition to avoid it. Fixes: https://tracker.ceph.com/issues/73452 Signed-off-by: Kotresh HR --- diff --git a/src/tools/cephfs_mirror/PeerReplayer.cc b/src/tools/cephfs_mirror/PeerReplayer.cc index 75c221cb023..15c8545fad9 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.cc +++ b/src/tools/cephfs_mirror/PeerReplayer.cc @@ -2217,6 +2217,10 @@ void PeerReplayer::run(SnapshotReplayerThread *replayer) { } } +bool PeerReplayer::is_syncm_active(const std::shared_ptr& syncm_obj) { + return std::find(syncm_q.begin(), syncm_q.end(), syncm_obj) != syncm_q.end(); +} + void PeerReplayer::remove_syncm(const std::shared_ptr& syncm_obj) { // caller holds lock @@ -2343,16 +2347,20 @@ void PeerReplayer::run_datasync(SnapshotDataSyncThread *data_replayer) { syncm->get_datasync_error_unlocked() || syncm->get_crawl_error_unlocked(); if (!syncm_q.empty() && no_in_flight_syncm_jobs && (crawl_finished || sync_error)) { - dout(20) << ": Dequeue syncm object=" << syncm << dendl; - syncm->set_snapshot_unlocked(); - syncm->sdq_cv_notify_all_unlocked(); // To wake up crawler thread waiting to take snapshot - if (syncm_q.front() == syncm) { - syncm_q.pop_front(); - } else { // if syncms in the middle finishes first - remove_syncm(syncm); - } - dout(20) << ": syncm_q after removal " << syncm_q << dendl; - smq_cv.notify_all(); + if (sync_error && !is_syncm_active(syncm)){ + dout(20) << ": syncm object=" << syncm << " already dequeued" << dendl; + } else { + dout(20) << ": Dequeue syncm object=" << syncm << dendl; + syncm->set_snapshot_unlocked(); + syncm->sdq_cv_notify_all_unlocked(); // To wake up crawler thread waiting to take snapshot + if (syncm_q.front() == syncm) { + syncm_q.pop_front(); + } else { // if syncms in the middle finishes first + remove_syncm(syncm); + } + dout(20) << ": syncm_q after removal " << syncm_q << dendl; + smq_cv.notify_all(); + } } } diff --git a/src/tools/cephfs_mirror/PeerReplayer.h b/src/tools/cephfs_mirror/PeerReplayer.h index 477134bddac..98c94a4838e 100644 --- a/src/tools/cephfs_mirror/PeerReplayer.h +++ b/src/tools/cephfs_mirror/PeerReplayer.h @@ -488,6 +488,7 @@ private: void run(SnapshotReplayerThread *replayer); void run_datasync(SnapshotDataSyncThread *data_replayer); void remove_syncm(const std::shared_ptr& syncm_obj); + bool is_syncm_active(const std::shared_ptr& syncm_obj); std::shared_ptr pick_next_syncm() const; boost::optional pick_directory();