From: VinayBhaskar-V Date: Wed, 15 Oct 2025 10:37:40 +0000 (+0530) Subject: rbd-mirror: allow incomplete group demote snapshot to sync after rbd-mirror daemon... X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=eb41cc13ca4dc8e08bddc9e86b3f76ec933b7e4a;p=ceph-ci.git rbd-mirror: allow incomplete group demote snapshot to sync after rbd-mirror daemon restart Currently when the secondary daemon was killed while the group demote snapshot was in incomplete state on secondary, the promotion state was set to PROMOTION_STATE_ORPHAN upon restart. This state prevents the incomplete demote snapshot sync after restart as bootstrap on secondary fails. In this commit we fix this by assigning promotion state to PROMOTION_STATE_NON_PRIMARY for a group with an incomplete non-primary demote snapshot. The downside is that if the group is removed on the primary cluster, then after restart of rbd-mirror daemon on secondary cluster, the corresponding group on the secondary also gets removed. This is because deletion propagation is unconditionally enabled precisely for PROMOTION_STATE_NON_PRIMARY and this is okay since the user would have deleted the primary demoted group forcefully. Signed-off-by: VinayBhaskar-V Resolves: rhbz#2416554 --- diff --git a/qa/workunits/rbd/rbd_mirror_group_simple.sh b/qa/workunits/rbd/rbd_mirror_group_simple.sh index 79e8d5c7505..9b42f95332f 100755 --- a/qa/workunits/rbd/rbd_mirror_group_simple.sh +++ b/qa/workunits/rbd/rbd_mirror_group_simple.sh @@ -3677,6 +3677,61 @@ test_demote_snap_sync() check_daemon_running "${secondary_cluster}" } +declare -a test_demote_snap_sync_after_restart_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'wait_after_restart' 2) + +test_demote_snap_sync_after_restart_scenarios=1 + +test_demote_snap_sync_after_restart() +{ + local primary_cluster=$1 ; shift + local secondary_cluster=$1 ; shift + local pool=$1 ; shift + local image_prefix=$1 ; shift + local scenario=$1 ; shift + local image_count=$(($1*"${image_multiplier}")) ; shift + + start_mirrors "${primary_cluster}" + + group_create "${primary_cluster}" "${pool}/${group0}" + images_create "${primary_cluster}" "${pool}/${image_prefix}" "${image_count}" 1G + group_images_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}" "${image_count}" + mirror_group_enable "${primary_cluster}" "${pool}/${group0}" + wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}" + wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' + wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}" + + write_image "${primary_cluster}" "${pool}" "${image_prefix}1" 256 4194304 + mirror_group_demote "${primary_cluster}" "${pool}/${group0}" + local group_snap_id + get_newest_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" group_snap_id + wait_for_test_group_snap_present "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" 1 + + stop_mirrors "${secondary_cluster}" '-9' + # check that demote snap is incomplete + test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + start_mirrors "${secondary_cluster}" + + wait_for_group_snap_sync_complete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}/${group0}" 'up+unknown' + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}/${group0}" 'up+unknown' + compare_images "${primary_cluster}" "${secondary_cluster}" "${pool}" "${pool}" "${image_prefix}1" + mirror_group_promote "${secondary_cluster}" "${pool}/${group0}" + wait_for_group_replay_started "${primary_cluster}" "${pool}"/"${group0}" "${image_count}" + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+stopped' + group_remove "${secondary_cluster}" "${pool}/${group0}" + wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}" + wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}" + + images_remove "${secondary_cluster}" "${pool}/${image_prefix}" "${image_count}" + wait_for_no_keys "${primary_cluster}" + wait_for_no_keys "${secondary_cluster}" + stop_mirrors "${primary_cluster}" + check_daemon_running "${secondary_cluster}" +} + check_for_no_keys() { local primary_cluster=$1 @@ -3861,6 +3916,7 @@ run_all_tests() run_test_all_scenarios test_image_snapshots_with_group run_test_all_scenarios test_group_rename run_test_all_scenarios test_demote_snap_sync + run_test_all_scenarios test_demote_snap_sync_after_restart # TODO this test is disabled - policing is missing for actions against groups on the secondary - not MVP #run_test_all_scenarios test_invalid_actions run_test_all_scenarios test_remote_namespace diff --git a/src/librbd/mirror/GroupGetInfoRequest.cc b/src/librbd/mirror/GroupGetInfoRequest.cc index 96936bb0e63..32fb2f602e1 100644 --- a/src/librbd/mirror/GroupGetInfoRequest.cc +++ b/src/librbd/mirror/GroupGetInfoRequest.cc @@ -184,10 +184,15 @@ void GroupGetInfoRequest::handle_get_last_mirror_snapshot_state(int r) { if (it->state == cls::rbd::GROUP_SNAPSHOT_STATE_INCOMPLETE) { continue; } - [[fallthrough]]; - case cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED: *m_promotion_state = PROMOTION_STATE_ORPHAN; break; + case cls::rbd::MIRROR_SNAPSHOT_STATE_NON_PRIMARY_DEMOTED: + if (it->state == cls::rbd::GROUP_SNAPSHOT_STATE_COMPLETE) { + *m_promotion_state = PROMOTION_STATE_ORPHAN; + } else { + *m_promotion_state = PROMOTION_STATE_NON_PRIMARY; + } + break; } break; }