From 122f78aa68187c00e52ed1f4d8e0911f76bdf72d Mon Sep 17 00:00:00 2001 From: Prasanna Kumar Kalever Date: Thu, 11 Dec 2025 10:53:50 +0530 Subject: [PATCH] rbd-mirror: allow resync while a group snapshot is still syncing currently we do not allow resync operation if the snapshot is still inprogress to sync until its fully done. This means that if snapshot synchronization becomes stuck for any reason, a resync cannot be triggered, resulting in an undesirable operational limitation. this change enables resync requests to be processed even when a group snapshot is still syncing, allowing resync in the middle of syncing a group snapshot. Signed-off-by: Prasanna Kumar Kalever --- qa/workunits/rbd/rbd_mirror_group_simple.sh | 91 +++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/qa/workunits/rbd/rbd_mirror_group_simple.sh b/qa/workunits/rbd/rbd_mirror_group_simple.sh index 31389a58505..cdc1d366524 100755 --- a/qa/workunits/rbd/rbd_mirror_group_simple.sh +++ b/qa/workunits/rbd/rbd_mirror_group_simple.sh @@ -3161,6 +3161,96 @@ test_interrupted_sync() image_remove "${primary_cluster}" "${pool}/${big_image}" } +# Scenario 1: The snapshot on the secondary is in the creating phase when the daemon is restarted then resync is flagged. +declare -a test_interrupted_sync_and_resync_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'resync_when_snap_creating' 2) +# Scenario 2: The snapshot on the secondary is in the created phase when the daemon is restarted then resync is flagged. +declare -a test_interrupted_sync_and_resync_2=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${image_prefix}" 'reysnc_when_snap_created' 2) + +test_interrupted_sync_and_resync_scenarios=2 + +test_interrupted_sync_and_resync() +{ + local primary_cluster=$1 ; shift + local secondary_cluster=$1 ; shift + local pool=$1 ; shift + local image_prefix=$1 ; shift + local scenario=$1 ; shift + local image_count=$(($1*"${image_multiplier}")) ; shift + local group0=test-group0 + local snap0='snap_0' + + start_mirrors "${primary_cluster}" + start_mirrors "${secondary_cluster}" + + group_create "${primary_cluster}" "${pool}/${group0}" + image_create "${primary_cluster}" "${pool}/${image_prefix}1" 1G + write_image "${primary_cluster}" "${pool}" "${image_prefix}1" 10 4096 + + big_image=test-image-big + image_create "${primary_cluster}" "${pool}/${big_image}" 4G + group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}1" + group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${big_image}" + + create_snapshot "${primary_cluster}" "${pool}" "${image_prefix}1" "${snap0}" + compare_image_with_snapshot "${primary_cluster}" "${pool}/${image_prefix}1" "${primary_cluster}" "${pool}/${image_prefix}1@${snap0}" + + mirror_group_enable "${primary_cluster}" "${pool}/${group0}" + wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}" + wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}" + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' "${image_count}" + wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}" + + write_image "${primary_cluster}" "${pool}" "${big_image}" 1024 4194304 + + local group_snap_id + mirror_group_snapshot "${primary_cluster}" "${pool}/${group0}" group_snap_id + + local image_snap_id + wait_for_image_snapshot_with_group_snap_info "${secondary_cluster}" "${pool}" "${image_prefix}1" "${group_snap_id}" image_snap_id + if [ "${scenario}" = 'resync_when_snap_creating' ]; then + stop_mirror_while_group_snapshot_incomplete "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "creating" + test_group_snap_state "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "creating" + elif [ "${scenario}" = 'reysnc_when_snap_created' ]; then + stop_mirror_while_group_snapshot_incomplete "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "created" + test_group_snap_state "${secondary_cluster}" "${pool}" "${group0}" "${group_snap_id}" "created" + test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + fi + + local group_id_before + get_id_from_group_info "${secondary_cluster}" "${pool}/${group0}" group_id_before + + # Flag the resync + mirror_group_resync "${secondary_cluster}" "${pool}"/"${group0}" + + # Start the mirror daemon + start_mirrors "${secondary_cluster}" + test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + + # Notice that the group will be resynced immediately, without having to wait + # for the snapshot to reach the CREATED state + wait_for_group_id_changed "${secondary_cluster}" "${pool}/${group0}" "${group_id_before}" + + # confirm that data on secondary again matches initial snapshot on primary + wait_for_group_synced "${primary_cluster}" "${pool}"/"${group0}" "${secondary_cluster}" "${pool}"/"${group0}" + test_group_snap_sync_complete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + + compare_image_with_snapshot "${secondary_cluster}" "${pool}/${image_prefix}1" "${primary_cluster}" "${pool}/${image_prefix}1@${snap0}" + + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'up+stopped' ${image_count} + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + + # tidy up + mirror_group_disable "${primary_cluster}" "${pool}/${group0}" + group_remove "${primary_cluster}" "${pool}/${group0}" + + wait_for_group_not_present "${primary_cluster}" "${pool}" "${group0}" + wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}" + + image_remove "${primary_cluster}" "${pool}/${image_prefix}1" + image_remove "${primary_cluster}" "${pool}/${big_image}" +} + # test force unlink time declare -a test_multiple_mirror_group_snapshot_unlink_time_1=("${CLUSTER2}" "${CLUSTER1}" "${pool0}") @@ -3977,6 +4067,7 @@ run_all_tests() run_test_all_scenarios test_group_with_clone_image run_test_all_scenarios test_interrupted_sync_restarted_daemon run_test_all_scenarios test_interrupted_sync + run_test_all_scenarios test_interrupted_sync_and_resync run_test_all_scenarios test_resync_after_relocate_and_force_promote run_test_all_scenarios test_multiple_mirror_group_snapshot_unlink_time run_test_all_scenarios test_force_promote_delete_group -- 2.47.3