From 9dd8a77c63aebfd703522c897ad215154d1ad6cf Mon Sep 17 00:00:00 2001 From: John Agombar Date: Tue, 22 Apr 2025 14:54:44 +0100 Subject: [PATCH] qa/workunits/rbd: update to mirror group snapshot tests MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Updated tests: • test_force_promote_before_initial_sync - Updated test to automatically calculate size of image depending on cluster resources available (faster cluster => faster sync => larger image) Enabled tests: - test_enable_mirroring_when_duplicate_group_exists scenarios 1,2 and 4 - test_demote_snap_sync New tests: - test_invalid_actions - Disabled as policing is missing from initial release Signed-off-by: John Agombar --- qa/workunits/rbd/rbd_mirror_group_simple.sh | 104 ++++++++++++-------- qa/workunits/rbd/rbd_mirror_helpers.sh | 8 +- 2 files changed, 67 insertions(+), 45 deletions(-) diff --git a/qa/workunits/rbd/rbd_mirror_group_simple.sh b/qa/workunits/rbd/rbd_mirror_group_simple.sh index fc05ab1128659..f2c969388b1d7 100755 --- a/qa/workunits/rbd/rbd_mirror_group_simple.sh +++ b/qa/workunits/rbd/rbd_mirror_group_simple.sh @@ -227,15 +227,14 @@ test_invalid_actions() wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" "${image_count}" wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group}" "${image_count}" wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+replaying' "${image_count}" - wait_for_group_synced "${primary_cluster}" "${pool}/${group}3" "${secondary_cluster}" "${pool}"/"${group}" + wait_for_group_synced "${primary_cluster}" "${pool}/${group}" "${secondary_cluster}" "${pool}"/"${group}" if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'down+unknown' 0 fi - expect_failure "image belongs to a group" rbd --cluster=${primary_cluster} rm "${pool}/${image_prefix}0" - - #image_remove "${primary_cluster}" "${pool}/${image_prefix}0" "try_cmd" + #TODO next command does not fail. Fix is not MVP + expect_failure "group is readonly" rbd --cluster="${secondary_cluster}" group rename "${pool}/${group}" "${pool}/${group}_renamed" group_remove "${primary_cluster}" "${pool}/${group}" check_daemon_running "${secondary_cluster}" @@ -371,7 +370,8 @@ declare -a test_enable_mirroring_when_duplicate_group_exists_2=("${CLUSTER2}" "$ declare -a test_enable_mirroring_when_duplicate_group_exists_3=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" 'rename_primary') declare -a test_enable_mirroring_when_duplicate_group_exists_4=("${CLUSTER2}" "${CLUSTER1}" "${pool0}" "${group0}" 'disable_then_rename_primary') -test_enable_mirroring_when_duplicate_group_exists_scenarios=4 +# scenario 3 fails see TODO below +test_enable_mirroring_when_duplicate_group_exists_scenarios='1 2 4' # This test does the following # 1. create a group on primary site @@ -400,14 +400,12 @@ test_enable_mirroring_when_duplicate_group_exists() # group will be present on secondary, but won't be mirrored wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" 0 - # TODO - fails on next line with blank description - wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 'local group is primary' + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 0 'local group is primary' test_fields_in_group_info "${primary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'true' # Look at the "state" and "description" fields for the peer site in the group status output. # Can't look at the state directly on the secondary because mirroring should have failed to be enabled - # TODO - fails with incorrect description in peer status if [ "${scenario}" = 'remove' ]; then wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected' # remove the non-mirrored group on the secondary @@ -437,6 +435,9 @@ test_enable_mirroring_when_duplicate_group_exists() test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false' elif [ "${scenario}" = 'rename_primary' ]; then # Group should still not be mirrored in this case - need to disable, rename and renable to fix + # TODO sometimes fails on next line with group mirrored - + # Groups do not currently behave like images - see this thread + # https://ibm-systems-storage.slack.com/archives/C07J9Q2E268/p1745320514846339?thread_ts=1745293182.701399&cid=C07J9Q2E268 wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'split-brain detected' elif [ "${scenario}" = 'disable_then_rename_primary' ]; then wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+replaying' @@ -451,6 +452,8 @@ test_enable_mirroring_when_duplicate_group_exists() group_remove "${secondary_cluster}" "${pool}/${group}_renamed" elif [ "${scenario}" = 'rename_primary' ]; then group_remove "${secondary_cluster}" "${pool}/${group_orig}" + elif [ "${scenario}" = 'disable_then_rename_primary' ]; then + group_remove "${secondary_cluster}" "${pool}/${group_orig}" fi wait_for_no_keys "${primary_cluster}" @@ -495,12 +498,13 @@ test_enable_mirroring_when_duplicate_image_exists() # group will be present on secondary, but image won't be mirrored wait_for_group_present "${secondary_cluster}" "${pool}" "${group}" 0 - # TODO fails on next line with description 'bootstrap failed' - wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 'local group is primary' + wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+stopped' 1 'local group is primary' test_fields_in_group_info "${primary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'true' + wait_for_peer_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group}" 'up+error' 'failed to start image replayers' # group should be mirrored, but image can't be - wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 'failed to start image replayers' + # TODO fails on next line with"rbd: mirroring not enabled on the group" rc= 22 + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group}" 'up+error' 0 'failed to start image replayers' test_fields_in_group_info "${secondary_cluster}" "${pool}/${group}" 'snapshot' 'enabled' 'false' if [ "${scenario}" = 'remove' ]; then @@ -568,7 +572,7 @@ test_group_enable_times() for image_count in {0,10,20,30}; do times=() test_create_group_with_images_then_mirror "${primary_cluster}" "${secondary_cluster}" "${pool}" "${group}" "${image_prefix}" 'true' "${image_count}" times - results+=("image count:$image_count enable time:"${times[0]}" sync_time:"${times[1]}) + results+=("image count:$image_count enable time:${times[0]} sync_time:${times[1]}") done for result in "${results[@]}"; do @@ -2877,30 +2881,46 @@ test_force_promote_before_initial_sync() group_images_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${image_prefix}" $(("${image_count}"-1)) big_image=test-image-big - image_create "${primary_cluster}" "${pool}/${big_image}" 4G - # make some changes to the big image so that the sync will take a long time - write_image "${primary_cluster}" "${pool}" "${big_image}" 1024 4194304 - group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${big_image}" - - mirror_group_enable "${primary_cluster}" "${pool}/${group0}" - wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}" + local sync_incomplete=false - wait_for_group_replay_started "${secondary_cluster}" "${pool}"/"${group0}" "${image_count}" - wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + multiplier=1 + while [ "${sync_incomplete}" = false ]; do + image_size=$((multiplier*1024)) + io_count=$((multiplier*256)) - if [ -z "${RBD_MIRROR_USE_RBD_MIRROR}" ]; then - wait_for_group_status_in_pool_dir "${primary_cluster}" "${pool}"/"${group0}" 'down+unknown' 0 - fi + image_create "${primary_cluster}" "${pool}/${big_image}" "${image_size}M" + # make some changes to the big image so that the sync will take a long time count,size + # io-total = count*size 1K, 4M (1024, 4M writes) + write_image "${primary_cluster}" "${pool}" "${big_image}" "${io_count}" 4194304 + group_image_add "${primary_cluster}" "${pool}/${group0}" "${pool}/${big_image}" - local group_snap_id - get_newest_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" group_snap_id - wait_for_test_group_snap_present "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" 1 - - # stop the daemon to prevent further syncing of snapshots - stop_mirrors "${secondary_cluster}" '-9' + mirror_group_enable "${primary_cluster}" "${pool}/${group0}" + wait_for_group_present "${secondary_cluster}" "${pool}" "${group0}" "${image_count}" + local group_snap_id + get_newest_group_snapshot_id "${primary_cluster}" "${pool}/${group0}" group_snap_id + wait_for_test_group_snap_present "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" 1 + + # stop the daemon to prevent further syncing of snapshots + stop_mirrors "${secondary_cluster}" '-9' + + # see if the latest snap is incomplete + test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" && sync_incomplete=true + + # If the sync for the last snapshot is already complete then we need to repeat with a larger image and write more data. + # Disable mirroring, delete the image and go round the loop again + if [ "${sync_incomplete}" = false ]; then + start_mirrors "${secondary_cluster}" + # wait for daemon to restart + wait_for_group_status_in_pool_dir "${secondary_cluster}" "${pool}"/"${group0}" 'up+replaying' "${image_count}" + mirror_group_disable "${primary_cluster}" "${pool}/${group0}" + group_image_remove "${primary_cluster}" "${pool}/${group0}" "${pool}/${big_image}" + image_remove "${primary_cluster}" "${pool}/${big_image}" + wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}" + + multiplier=$((multiplier*2)) + fi - # check that latest snap is incomplete - test_group_snap_sync_incomplete "${secondary_cluster}" "${pool}/${group0}" "${group_snap_id}" + done # force promote the group on the secondary - this should fail with a sensible error message expect_failure "no initial group snapshot available" rbd --cluster=${secondary_cluster} mirror group promote ${pool}/${group0} --force @@ -2964,7 +2984,7 @@ test_multiple_mirror_group_snapshot_unlink_time() done for i in $(seq 0 "${#results[@]}"); do - echo -e "${RED}image count:"${image_counts[$i]}" snapshot time:"${results[$i]}"${NO_COLOUR}" + echo -e "${RED}image count:${image_counts[$i]} snapshot time:${results[$i]}${NO_COLOUR}" done if [ ${results[1]} -gt $((${results[0]}+3)) ]; then @@ -3508,7 +3528,6 @@ test_demote_snap_sync() local secondary_snap_id get_newest_group_snapshot_id "${secondary_cluster}" "${pool}/${group0}" secondary_snap_id - # TODO this test currently fails on the next line. Waiting for fix to issue 39 test "${primary_demote_snap_id}" = "${secondary_snap_id}" || { fail "demote snapshot ${primary_demote_snap_id} not synced"; return 1; } mirror_group_promote "${secondary_cluster}" "${pool}/${group0}" @@ -3520,7 +3539,7 @@ test_demote_snap_sync() wait_for_group_not_present "${secondary_cluster}" "${pool}" "${group0}" images_remove "${secondary_cluster}" "${pool}/${image_prefix}" "${image_count}" - wait_for_no_keys "${secondary_cluster}" + wait_for_no_keys "${primary_cluster}" stop_mirrors "${primary_cluster}" check_daemon_running "${secondary_cluster}" } @@ -3600,9 +3619,6 @@ run_test() # need to call this before checking the current state setup_tempdir - # look at every pool on both clusters and check that there are no entries leftover in rbd_image_leader - check_for_no_keys "${primary_cluster}" "${secondary_cluster}" - if [ -n "${RBD_MIRROR_SAVE_CLI_OUTPUT}" ]; then # Record the test name and scenario and clear any old output in the file echo "Test:${test_name} Scenario:${test_scenario}" > "${TEMPDIR}/${RBD_MIRROR_SAVE_CLI_OUTPUT}" @@ -3630,6 +3646,11 @@ run_test() testlog "TEST:$test_name scenario:$test_scenario parameters:" "${test_parameters[@]}" "$test_name" "${test_parameters[@]}" + + sleep 5 + + # look at every pool on both clusters and check that there are no entries leftover in rbd_image_leader + check_for_no_keys "${primary_cluster}" "${secondary_cluster}" } # exercise all scenarios that are defined for the specified test @@ -3696,16 +3717,15 @@ run_all_tests() run_test_all_scenarios test_force_promote_delete_group run_test_all_scenarios test_create_group_stop_daemon_then_recreate # TODO these next 2 tests are disabled as they fails with incorrect state/description in mirror group status - issue 50 - #run_test_all_scenarios test_enable_mirroring_when_duplicate_group_exists + run_test_all_scenarios test_enable_mirroring_when_duplicate_group_exists #run_test_all_scenarios test_enable_mirroring_when_duplicate_image_exists run_test_all_scenarios test_odf_failover_failback run_test_all_scenarios test_resync_marker run_test_all_scenarios test_force_promote_before_initial_sync run_test_all_scenarios test_image_snapshots_with_group run_test_all_scenarios test_group_rename - # TODO this test is disabled until Nithya delivers her bootstrap changes - #run_test_all_scenarios test_demote_snap_sync - # TODO this test is disabled - not yet complete + run_test_all_scenarios test_demote_snap_sync + # TODO this test is disabled - policing is missing for actions against groups on the secondary - not MVP #run_test_all_scenarios test_invalid_actions run_test_all_scenarios test_remote_namespace run_test_all_scenarios test_create_multiple_groups_do_io diff --git a/qa/workunits/rbd/rbd_mirror_helpers.sh b/qa/workunits/rbd/rbd_mirror_helpers.sh index bcf891f6791d9..b611dd2eeb73c 100755 --- a/qa/workunits/rbd/rbd_mirror_helpers.sh +++ b/qa/workunits/rbd/rbd_mirror_helpers.sh @@ -298,6 +298,7 @@ expect_failure() if "$@" > ${out} 2>&1 ; then cat ${out} >&2 + echo "Command did not fail" return 1 fi @@ -307,6 +308,7 @@ expect_failure() if ! grep -q "${expected}" ${out} ; then cat ${out} >&2 + echo "Command did not fail with expected message" return 1 fi @@ -2712,7 +2714,7 @@ wait_for_test_group_snap_sync_complete() sleep ${s} test_group_snap_sync_complete "${cluster}" "${group_spec}" "${group_snap_id}" && return 0 - if [ "$s" -gt 32 ]; then + if (( $(bc <<<"$s > 32") )); then # query the snap progress for each image in the group - debug info to check that sync is progressing list_image_snaps_for_group "${cluster}" "${group_spec}" fi @@ -2972,7 +2974,7 @@ wait_for_group_status_in_pool_dir() "${state_pattern}" "${image_count}" "${description_pattern}" && return 0 done - fail 1 "failed to reach expected status" + fail "failed to reach expected status" return 1 } @@ -3004,7 +3006,7 @@ wait_for_peer_group_status_in_pool_dir() sleep ${s} test_peer_group_status_in_pool_dir "${cluster}" "${group_spec}" "${state_pattern}" "${description_pattern}" && return 0 done - fail 1 "failed to reach expected peer status" + fail "failed to reach expected peer status" return 1 } -- 2.39.5