From 50c2f0709f09b8a07b0a1206e6392dcadf253ddd Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Mon, 17 Feb 2025 08:58:41 -0600 Subject: [PATCH] qa/standalone/scrub: fix osd-scrub-repair() following changes to 'scrub while recovering' logic, the 'what type of scrubs are allowed to run when recovering' tests were broken, and are now fixed. Signed-off-by: Ronen Friedman --- qa/standalone/scrub/osd-scrub-repair.sh | 86 +++++++++++++++++++++---- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 6dd5b10ae8f..901a64da8cc 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -107,6 +107,30 @@ function TEST_corrupt_and_repair_replicated() { corrupt_and_repair_one $dir $poolname $(get_primary $poolname SOMETHING) || return 1 } +# +# Allow operator-initiated scrubs to be scheduled even when some recovering is still +# undergoing on the same OSD +# +function TEST_allow_oper_initiated_scrub_during_recovery() { + local dir=$1 + local -A cluster_conf=( + ['osds_num']="2" + ['pgs_in_pool']="4" + ['pool_name']="nopool" + ['pool_default_size']="2" + ['extras']="--osd_scrub_during_recovery=false \ + --osd_debug_pretend_recovery_active=true" + ) + + standard_scrub_cluster $dir cluster_conf + local poolname=rbd + create_rbd_pool || return 1 + wait_for_clean || return 1 + + add_something $dir $poolname || return 1 + oper_scrub_and_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 +} + # # Allow repair to be scheduled when some recovering is still undergoing on the same OSD # @@ -117,10 +141,8 @@ function TEST_allow_repair_during_recovery() { run_mon $dir a --osd_pool_default_size=2 || return 1 run_mgr $dir x || return 1 run_osd $dir 0 --osd_scrub_during_recovery=false \ - --osd_repair_during_recovery=true \ --osd_debug_pretend_recovery_active=true || return 1 run_osd $dir 1 --osd_scrub_during_recovery=false \ - --osd_repair_during_recovery=true \ --osd_debug_pretend_recovery_active=true || return 1 create_rbd_pool || return 1 wait_for_clean || return 1 @@ -132,18 +154,20 @@ function TEST_allow_repair_during_recovery() { # # Skip non-repair scrub correctly during recovery # +# Note: forgoing the automatic creation of a pool in standard_scrub_cluster as +# the test requires a specific RBD pool. function TEST_skip_non_repair_during_recovery() { local dir=$1 - local poolname=rbd + local -A cluster_conf=( + ['osds_num']="2" + ['pgs_in_pool']="4" + ['pool_name']="nopool" + ['pool_default_size']="2" + ['extras']="--osd_scrub_during_recovery=false --osd_debug_pretend_recovery_active=true" + ) - run_mon $dir a --osd_pool_default_size=2 || return 1 - run_mgr $dir x || return 1 - run_osd $dir 0 --osd_scrub_during_recovery=false \ - --osd_repair_during_recovery=true \ - --osd_debug_pretend_recovery_active=true || return 1 - run_osd $dir 1 --osd_scrub_during_recovery=false \ - --osd_repair_during_recovery=true \ - --osd_debug_pretend_recovery_active=true || return 1 + standard_scrub_cluster $dir cluster_conf + local poolname=rbd create_rbd_pool || return 1 wait_for_clean || return 1 @@ -151,6 +175,41 @@ function TEST_skip_non_repair_during_recovery() { scrub_and_not_schedule $dir $poolname $(get_not_primary $poolname SOMETHING) || return 1 } + +function oper_scrub_and_schedule() { + local dir=$1 + local poolname=$2 + local osd=$3 + + # + # 1) start an operator-initiated scrub + # + local pg=$(get_pg $poolname SOMETHING) + local last_scrub=$(get_last_scrub_stamp $pg) + ceph tell $pg scrub + + # + # 2) Assure the scrub was executed + # + sleep 3 + for ((i=0; i < 3; i++)); do + if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then + break + fi + if test "$(get_last_scrub_stamp $pg)" '==' "$last_scrub" ; then + return 1 + fi + sleep 1 + done + + # + # 3) Access to the file must OK + # + objectstore_tool $dir $osd SOMETHING list-attrs || return 1 + rados --pool $poolname get SOMETHING $dir/COPY || return 1 + diff $dir/ORIGINAL $dir/COPY || return 1 +} + function scrub_and_not_schedule() { local dir=$1 local poolname=$2 @@ -166,6 +225,7 @@ function scrub_and_not_schedule() { # # 2) Assure the scrub is not scheduled # + sleep 3 for ((i=0; i < 3; i++)); do if test "$(get_last_scrub_stamp $pg)" '>' "$last_scrub" ; then return 1 @@ -218,9 +278,9 @@ function corrupt_and_repair_two() { # # 1) add an object -# 2) remove the corresponding file from a designated OSD +# 2) remove the corresponding object from a designated OSD # 3) repair the PG -# 4) check that the file has been restored in the designated OSD +# 4) check that the object has been restored in the designated OSD # function corrupt_and_repair_one() { local dir=$1 -- 2.39.5