From 33d2a2c93b533838f8efe1329181fb8990b4a511 Mon Sep 17 00:00:00 2001 From: Sridhar Seshasayee Date: Mon, 21 Jun 2021 18:17:32 +0530 Subject: [PATCH] qa/standalone/scrub: Force a subset of scrub tests to use "wpq" scheduler The following tests in the test files mentioned below use the "osd_scrub_sleep" option to introduce delays during scrubbing to help determine scrubbing states, validate reservations during scrubbing etc.. This works when using the "wpq" scheduler. But when the "mclock_scheduler" is enabled, the "osd_scrub_sleep" is disabled and overridden to 0. This is done to delegate the scheduling of the background scrubs to the "mclock_scheduler" based on the set QoS parameters. Due to this, the checks to verify the scrub states, reservations etc. fail since the window to check them is very short due to scrubs completing very quickly. This affects a small subset of scrub tests mentioned below, 1. osd-scrub-dump.sh -> TEST_recover_unexpected() 2. osd-scrub-repair.sh -> TEST_auto_repair_bluestore_tag() 3. osd-scrub-test.sh -> TEST_scrub_abort(), TEST_deep_scrub_abort() Only for the above tests, until there's a reliable way to query scrub states with "--osd-scrub-sleep" set to 0, the "osd_op_queue" config option is set to "wpq". Signed-off-by: Sridhar Seshasayee --- qa/standalone/scrub/osd-scrub-dump.sh | 13 +++++++------ qa/standalone/scrub/osd-scrub-repair.sh | 6 +++++- qa/standalone/scrub/osd-scrub-test.sh | 13 +++++++++---- 3 files changed, 21 insertions(+), 11 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-dump.sh b/qa/standalone/scrub/osd-scrub-dump.sh index 6d18d6a60f3e2..092e4ef6eadc4 100755 --- a/qa/standalone/scrub/osd-scrub-dump.sh +++ b/qa/standalone/scrub/osd-scrub-dump.sh @@ -24,7 +24,6 @@ POOL_SIZE=3 function run() { local dir=$1 shift - local SLEEP=0 local CHUNK_MAX=5 export CEPH_MON="127.0.0.1:7184" # git grep '\<7184\>' : there must be only one @@ -32,10 +31,13 @@ function run() { CEPH_ARGS+="--fsid=$(uuidgen) --auth-supported=none " CEPH_ARGS+="--mon-host=$CEPH_MON " CEPH_ARGS+="--osd_max_scrubs=$MAX_SCRUBS " - CEPH_ARGS+="--osd_scrub_sleep=$SLEEP " CEPH_ARGS+="--osd_scrub_chunk_max=$CHUNK_MAX " CEPH_ARGS+="--osd_scrub_sleep=$SCRUB_SLEEP " CEPH_ARGS+="--osd_pool_default_size=$POOL_SIZE " + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. + CEPH_ARGS+="--osd_op_queue=wpq " export -n CEPH_CLI_TEST_DUP_COMMAND local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')} @@ -91,10 +93,9 @@ function TEST_recover_unexpected() { ceph pg dump pgs max=$(CEPH_ARGS='' ceph daemon $(get_asok_path osd.0) dump_scrub_reservations | jq '.osd_max_scrubs') - if [ $max != $MAX_SCRUBS]; - then - echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations" - return 1 + if [ $max != $MAX_SCRUBS ]; then + echo "ERROR: Incorrect osd_max_scrubs from dump_scrub_reservations" + return 1 fi ceph osd unset noscrub diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 3e56a2f9b1471..625d976a7cf7a 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -390,9 +390,13 @@ function TEST_auto_repair_bluestore_tag() { # Launch a cluster with 3 seconds scrub interval run_mon $dir a || return 1 run_mgr $dir x || return 1 + # Set scheduler to "wpq" until there's a reliable way to query scrub states + # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the + # scrub sleep to 0 and as a result the checks in the test fail. local ceph_osd_args="--osd-scrub-auto-repair=true \ --osd_deep_scrub_randomize_ratio=0 \ - --osd-scrub-interval-randomize-ratio=0" + --osd-scrub-interval-randomize-ratio=0 \ + --osd-op-queue=wpq" for id in $(seq 0 2) ; do run_osd $dir $id $ceph_osd_args || return 1 done diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh index 5dd029c356fe1..6cea0b8fcf832 100755 --- a/qa/standalone/scrub/osd-scrub-test.sh +++ b/qa/standalone/scrub/osd-scrub-test.sh @@ -301,10 +301,15 @@ function _scrub_abort() { run_mgr $dir x || return 1 for osd in $(seq 0 $(expr $OSDS - 1)) do - run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ - --osd_deep_scrub_randomize_ratio=0.0 \ - --osd_scrub_sleep=5.0 \ - --osd_scrub_interval_randomize_ratio=0 || return 1 + # Set scheduler to "wpq" until there's a reliable way to query scrub + # states with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" + # overrides the scrub sleep to 0 and as a result the checks in the + # test fail. + run_osd $dir $osd --osd_pool_default_pg_autoscale_mode=off \ + --osd_deep_scrub_randomize_ratio=0.0 \ + --osd_scrub_sleep=5.0 \ + --osd_scrub_interval_randomize_ratio=0 \ + --osd_op_queue=wpq || return 1 done # Create a pool with a single pg -- 2.39.5