From 1c57168f85ef9062fbf713601267feb37a382bfe Mon Sep 17 00:00:00 2001 From: Ronen Friedman Date: Sun, 11 Jul 2021 10:43:03 +0300 Subject: [PATCH] qa/standalone: fixing the timings when waiting for deep-scrub to start initiate_and_fetch_state() initiates a scrub, then polls the published PG state looking for 'scrubbing'. Calling flush_pg_stats() as part of the polling process might cause the scrub and the following recovery to be missed altogether. Note: this polling mechanism is definitely not robust. Will be redesigned in the future. Fixes: https://tracker.ceph.com/issues/51581 Signed-off-by: Ronen Friedman (cherry picked from commit ed45acee34435611f8dea7f77fde54a6586cf6d9) --- qa/standalone/scrub/osd-scrub-repair.sh | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/qa/standalone/scrub/osd-scrub-repair.sh b/qa/standalone/scrub/osd-scrub-repair.sh index 5899ac9123b93..e000134a8c528 100755 --- a/qa/standalone/scrub/osd-scrub-repair.sh +++ b/qa/standalone/scrub/osd-scrub-repair.sh @@ -337,7 +337,7 @@ function initiate_and_fetch_state() { env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid" # wait for 'scrubbing' to appear - for ((i=0; i < 40; i++)); do + for ((i=0; i < 80; i++)); do st=`ceph pg $pgid query --format json | jq '.state' ` echo $i ") state now: " $st @@ -346,15 +346,14 @@ function initiate_and_fetch_state() { *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this *scrubbing* ) echo "found scrub"; return 0;; *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;; + *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;; * ) echo $st;; esac - if [ $((i % 5)) == 4 ] ; then + if [ $((i % 10)) == 4 ]; then echo "loop --------> " $i - flush_pg_stats - else - sleep 0.3 fi + sleep 0.3 done echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start" @@ -371,7 +370,7 @@ function wait_end_of_scrub() { # osd# pg [[ $st =~ (.*scrubbing.*) ]] || break if [ $((i % 5)) == 4 ] ; then flush_pg_stats - fi + fi sleep 0.3 done @@ -493,7 +492,8 @@ function TEST_auto_repair_bluestore_scrub() { run_mgr $dir x || return 1 local ceph_osd_args="--osd-scrub-auto-repair=true \ --osd_deep_scrub_randomize_ratio=0 \ - --osd-scrub-interval-randomize-ratio=0" + --osd-scrub-interval-randomize-ratio=0 \ + --osd-scrub-backoff-ratio=0" for id in $(seq 0 2) ; do run_osd $dir $id $ceph_osd_args || return 1 done -- 2.39.5