From 66b1fd9ccee48dccc06bd016128c775e5acba640 Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Wed, 19 Nov 2025 17:13:36 +0530 Subject: [PATCH] librbd: fix incomplete group snapshot not being removed on creation failure Problem: GroupCreatePrimaryRequest doesn't remove group snapshot when group snapshot creation encounters an error in notify_quiesce(). As a result, INCOMPLETE snapshots from previous failed attempts remain uncleaned. Log snippet: librbd::watcher::Notifier: 0x7fbdac0168b0 handle_notify: r=-110 librbd::mirror::snapshot::GroupCreatePrimaryRequest: handle_notify_quiesce: r=-110 librbd::mirror::snapshot::GroupCreatePrimaryRequest: notify_unquiesce: librbd::watcher::Notifier: 0x7fbda83c59a0 handle_notify: r=-110 librbd::mirror::snapshot::GroupCreatePrimaryRequest: handle_notify_unquiesce: r=-110 librbd::mirror::snapshot::GroupCreatePrimaryRequest: handle_notify_unquiesce: failed to notify the unquiesce requests: (110) Connection timed out librbd::mirror::snapshot::GroupCreatePrimaryRequest: close_images: librbd::mirror::snapshot::GroupCreatePrimaryRequest: handle_close_images: r=0 librbd::mirror::snapshot::GroupCreatePrimaryRequest: finish: r=-110 When snapshot creation fails, the remove snap path that cleans the snapshot is skipped, leaving behind INCOMPLETE snapshot entries. Solution: Ensure remove_snap_metadata() is executed on failed to quience scenario like above, allowing INCOMPLETE snapshot to be consistently cleaned up. Note: Another issue identified and fixed around GroupUnlinkPeerRequest::remove_peer_uuid(), i.e in case of INCOMPLETE snapshot, group_snap_set() is expected to return EEXIST error, and that is now handled. Signed-off-by: Prasanna Kumar Kalever Resolves: rhbz#2415401 --- src/librbd/mirror/snapshot/GroupCreatePrimaryRequest.cc | 4 +++- src/librbd/mirror/snapshot/GroupUnlinkPeerRequest.cc | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/librbd/mirror/snapshot/GroupCreatePrimaryRequest.cc b/src/librbd/mirror/snapshot/GroupCreatePrimaryRequest.cc index 40a83dc1fe0..8f888ff5758 100644 --- a/src/librbd/mirror/snapshot/GroupCreatePrimaryRequest.cc +++ b/src/librbd/mirror/snapshot/GroupCreatePrimaryRequest.cc @@ -530,8 +530,10 @@ void GroupCreatePrimaryRequest::handle_notify_quiesce(int r) { if (r < 0 && (m_snap_create_flags & SNAP_CREATE_FLAG_IGNORE_NOTIFY_QUIESCE_ERROR) == 0) { + lderr(m_cct) << "failed to notify the quiesce requests: " + << cpp_strerror(r) << dendl; m_ret_code = r; - notify_unquiesce(); + remove_snap_metadata(); return; } diff --git a/src/librbd/mirror/snapshot/GroupUnlinkPeerRequest.cc b/src/librbd/mirror/snapshot/GroupUnlinkPeerRequest.cc index 68920f34960..47778ecefd0 100644 --- a/src/librbd/mirror/snapshot/GroupUnlinkPeerRequest.cc +++ b/src/librbd/mirror/snapshot/GroupUnlinkPeerRequest.cc @@ -157,9 +157,12 @@ void GroupUnlinkPeerRequest::process_snapshot(cls::rbd::GroupSnapshot group_s const auto& ns = std::get( group_snap.snapshot_namespace); - if (ns.mirror_peer_uuids.empty()) { + if (ns.mirror_peer_uuids.empty() || + group_snap.state == cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE) { remove_group_snapshot(group_snap); } else { + // Note: avoid calling remove_peer_uuid() for INCOMPLETE snapshots as + // group_snap_set() returns EEXIST error remove_peer_uuid(group_snap, mirror_peer_uuid); } } -- 2.47.3