From: Prasanna Kumar Kalever Date: Tue, 27 May 2025 07:52:37 +0000 (+0530) Subject: rbd-mirror: avoid altering local group snap vector elements X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=9cb56b6dca7cf3a3b0134fa6934405224ce9314a;p=ceph-ci.git rbd-mirror: avoid altering local group snap vector elements Now the m_local_group_snaps is dedicated to ListSnapshotsRequest() locally and is unaltered. Note, this patch fixes the crash, that can be seen with user snap created, then remove and then created with the same name. Signed-off-by: Prasanna Kumar Kalever --- diff --git a/qa/workunits/rbd/rbd_mirror_group_simple.sh b/qa/workunits/rbd/rbd_mirror_group_simple.sh index 57353079914..ac184eb2f43 100755 --- a/qa/workunits/rbd/rbd_mirror_group_simple.sh +++ b/qa/workunits/rbd/rbd_mirror_group_simple.sh @@ -277,6 +277,7 @@ test_group_rename() wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}" wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group}" 'up+replaying' "${image_count}" group_rename "${primary_cluster}" "${pool}/${group}" "${pool}/${group}_renamed" @@ -299,6 +300,7 @@ test_group_rename() wait_for_group_present "${secondary_cluster}" "${pool}" "${group1}" "${image_count}" wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group1}" "${image_count}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group1}" 'up+replaying' "${image_count}" group_rename "${primary_cluster}" "${pool}/${group1}" "${pool}/${group1}_renamed" diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.cc b/src/tools/rbd_mirror/group_replayer/Replayer.cc index d1c08a8a7b6..db0eec3282f 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.cc +++ b/src/tools/rbd_mirror/group_replayer/Replayer.cc @@ -349,23 +349,21 @@ void Replayer::load_local_group_snapshots() { //FIXME: This is not accessed under a lock if (!m_local_group_snaps.empty()) { for (auto &local_snap : m_local_group_snaps) { + // skip validation for already complete snapshots if (local_snap.state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { continue; } + + // skip validation for primary snapshots auto ns = std::get_if( &local_snap.snapshot_namespace); if (ns != nullptr && ns->state == cls::rbd::MIRROR_SNAPSHOT_STATE_PRIMARY) { continue; } - // do not forward until previous local mirror group snapshot is COMPLETE + + // validate incomplete non-primary mirror or regular snapshots validate_image_snaps_sync_complete(local_snap.id); - auto snap_type = cls::rbd::get_group_snap_namespace_type( - local_snap.snapshot_namespace); - if (snap_type == cls::rbd::GROUP_SNAPSHOT_NAMESPACE_TYPE_MIRROR) { - m_retry_validate_snap = true; - break; - } } } @@ -415,7 +413,8 @@ void Replayer::handle_load_local_group_snapshots(int r) { locker.unlock(); handle_replay_complete(0, "orphan (force promoting)"); return; - } else if (m_retry_validate_snap) { + } else if (m_retry_validate_snap || + it->state == cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE) { m_retry_validate_snap = false; locker.unlock(); schedule_load_group_snapshots(); @@ -967,11 +966,10 @@ void Replayer::create_mirror_snapshot( {}, cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE}; local_snap.name = prepare_non_primary_mirror_snap_name(m_global_group_id, group_snap_id); - m_local_group_snaps.push_back(local_snap); auto comp = create_rados_callback( - new LambdaContext([this, snap, on_finish](int r) { - handle_create_mirror_snapshot(r, snap, on_finish); + new LambdaContext([this, group_snap_id, on_finish](int r) { + handle_create_mirror_snapshot(r, group_snap_id, on_finish); })); librados::ObjectWriteOperation op; @@ -985,19 +983,8 @@ void Replayer::create_mirror_snapshot( template void Replayer::handle_create_mirror_snapshot( - int r, cls::rbd::GroupSnapshot *snap, Context *on_finish) { - dout(10) << snap->id << ", r=" << r << dendl; - - if (r < 0) { // clean the group snapshot here - dout(10) << "cleaning snapshot as failed group_snap_set previously with : " - << snap->id << ", this will be attempted to sync later" << dendl; - r = librbd::cls_client::group_snap_remove(&m_local_io_ctx, - librbd::util::group_header_name(m_local_group_id), snap->id); - if (r < 0) { - derr << "failed to remove group snapshot : " - << snap->id << " : " << cpp_strerror(r) << dendl; - } - } + int r, const std::string &group_snap_id, Context *on_finish) { + dout(10) << group_snap_id << ", r=" << r << dendl; on_finish->complete(r); } @@ -1253,7 +1240,6 @@ void Replayer::create_regular_snapshot( cls::rbd::GroupSnapshotNamespaceUser{}, snap->name, cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE}; - m_local_group_snaps.push_back(group_snap); librbd::cls_client::group_snap_set(&op, group_snap); auto comp = create_rados_callback( @@ -1272,25 +1258,6 @@ void Replayer::handle_create_regular_snapshot( int r, const std::string &group_snap_id, Context *on_finish) { dout(10) << group_snap_id << ", r=" << r << dendl; - if (r < 0) { // clean the group snapshot here - dout(10) << "cleaning snapshot as failed group_snap_set previously with : " - << group_snap_id << ", this will be attempted to sync later" - << dendl; - r = librbd::cls_client::group_snap_remove(&m_local_io_ctx, - librbd::util::group_header_name(m_local_group_id), group_snap_id); - if (r < 0) { - derr << "failed to remove group snapshot : " - << group_snap_id << " : " << cpp_strerror(r) << dendl; - } - for (auto local_snap = m_local_group_snaps.begin(); - local_snap != m_local_group_snaps.end(); ++local_snap) { - if (local_snap->id != group_snap_id) { - continue; - } - m_local_group_snaps.erase(local_snap); - } - } // Note IR's don't need setting set_image_replayer_limits() for regular snaps. - on_finish->complete(r); } @@ -1472,6 +1439,7 @@ template void Replayer::handle_regular_snapshot_complete( int r, const std::string &group_snap_id, Context *on_finish) { dout(10) << group_snap_id << ", r=" << r << dendl; + on_finish->complete(r); } diff --git a/src/tools/rbd_mirror/group_replayer/Replayer.h b/src/tools/rbd_mirror/group_replayer/Replayer.h index 521ec95112e..6a1179a80b5 100644 --- a/src/tools/rbd_mirror/group_replayer/Replayer.h +++ b/src/tools/rbd_mirror/group_replayer/Replayer.h @@ -161,7 +161,7 @@ private: std::unique_lock &locker, Context *on_finish); void handle_create_mirror_snapshot( - int r, cls::rbd::GroupSnapshot *snap, Context *on_finish); + int r, const std::string &group_snap_id, Context *on_finish); std::string prepare_non_primary_mirror_snap_name( const std::string &global_group_id, const std::string &snap_id);