From 83607c06100597b1741d7221ddae8895dc144a8f Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 4 Jun 2023 06:43:19 -0500 Subject: [PATCH] qa/standalone: osd-recovery-scrub: fix slow updates and recovery concurrency 1. Setting frequent scrub status updates, to compensate for the removal of some 'send updates' in PR#50283. 2. Switching back to using the wpq scheduler, as otherwise the number of concurrent recovery operations is below what the test expects. Fixes: https://tracker.ceph.com/issues/61386 Signed-off-by: Ronen Friedman --- qa/standalone/scrub/osd-recovery-scrub.sh | 27 ++++++++++++++++++----- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/qa/standalone/scrub/osd-recovery-scrub.sh b/qa/standalone/scrub/osd-recovery-scrub.sh index 9541852c7d223..14c98c5e8f62b 100755 --- a/qa/standalone/scrub/osd-recovery-scrub.sh +++ b/qa/standalone/scrub/osd-recovery-scrub.sh @@ -24,6 +24,7 @@ function run() { export CEPH_ARGS CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " + CEPH_ARGS+="--osd-op-queue=wpq " export -n CEPH_CLI_TEST_DUP_COMMAND local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} @@ -45,9 +46,12 @@ function TEST_recovery_scrub_1() { ERRORS=0 setup $dir || return 1 - run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ - --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0 " + ceph_osd_args+="--osd_stats_update_period_not_scrubbing=3 " + ceph_osd_args+="--osd_stats_update_period_scrubbing=2" for osd in $(seq 0 $(expr $OSDS - 1)) do run_osd $dir $osd --osd_scrub_during_recovery=false || return 1 @@ -236,12 +240,16 @@ function TEST_recovery_scrub_2() { OBJECTS=40 setup $dir || return 1 - run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true \ - --osd_scrub_interval_randomize_ratio=0.0 || return 1 + run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1 run_mgr $dir x || return 1 + local ceph_osd_args="--osd-scrub-interval-randomize-ratio=0 " + ceph_osd_args+="--osd_scrub_backoff_ratio=0 " + ceph_osd_args+="--osd_stats_update_period_not_scrubbing=3 " + ceph_osd_args+="--osd_stats_update_period_scrubbing=2" for osd in $(seq 0 $(expr $OSDS - 1)) do - run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 || return 1 + run_osd $dir $osd --osd_scrub_during_recovery=true --osd_recovery_sleep=10 \ + $ceph_osd_args || return 1 done # Create a pool with $PGS pgs @@ -260,6 +268,15 @@ function TEST_recovery_scrub_2() { ceph pg dump pgs + # note that the following will be needed if the mclock scheduler is specified + #ceph tell osd.* config get osd_mclock_override_recovery_settings + + # the '_max_active' is expected to be 0 + ceph tell osd.1 config get osd_recovery_max_active + # both next parameters are expected to be >=3 + ceph tell osd.1 config get osd_recovery_max_active_hdd + ceph tell osd.1 config get osd_recovery_max_active_ssd + # Wait for recovery to start count=0 while(true) -- 2.47.3