]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
qa/standalone: fixing the timings when waiting for deep-scrub to start 42398/head
authorRonen Friedman <rfriedma@redhat.com>
Sun, 11 Jul 2021 07:43:03 +0000 (10:43 +0300)
committerSatoru Takeuchi <satoru.takeuchi@gmail.com>
Thu, 22 Jul 2021 16:31:22 +0000 (16:31 +0000)
initiate_and_fetch_state() initiates a scrub, then polls the published
PG state looking for 'scrubbing'. Calling flush_pg_stats() as part of
the polling process might cause the scrub and the following recovery to
be missed altogether.

Note: this polling mechanism is definitely not robust. Will be
redesigned in the future.

Fixes: https://tracker.ceph.com/issues/51581
Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
(cherry picked from commit ed45acee34435611f8dea7f77fde54a6586cf6d9)

qa/standalone/scrub/osd-scrub-repair.sh

index 5899ac9123b93941f6ccf09b96831a0895ac6340..e000134a8c5287ca3ba2d3645e9c9615f53b4a0b 100755 (executable)
@@ -337,7 +337,7 @@ function initiate_and_fetch_state() {
     env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
 
     # wait for 'scrubbing' to appear
-    for ((i=0; i < 40; i++)); do
+    for ((i=0; i < 80; i++)); do
 
         st=`ceph pg $pgid query --format json | jq '.state' `
         echo $i ") state now: " $st
@@ -346,15 +346,14 @@ function initiate_and_fetch_state() {
             *scrubbing*repair* ) echo "found scrub+repair"; return 1;; # PR #41258 should have prevented this
             *scrubbing* ) echo "found scrub"; return 0;;
             *inconsistent* ) echo "Got here too late. Scrub has already finished"; return 1;;
+            *recovery* ) echo "Got here too late. Scrub has already finished."; return 1;;
             * ) echo $st;;
         esac
 
-        if [ $((i % 5)) == 4 ] ; then
+        if [ $((i % 10)) == 4 ]; then
             echo "loop --------> " $i
-            flush_pg_stats
-       else
-            sleep 0.3
         fi
+    sleep 0.3
     done
 
     echo "Timeout waiting for deep-scrub of " $pgid " on " $the_osd " to start"
@@ -371,7 +370,7 @@ function wait_end_of_scrub() { # osd# pg
         [[ $st =~ (.*scrubbing.*) ]] || break
         if [ $((i % 5)) == 4 ] ; then
             flush_pg_stats
-       fi
+        fi
         sleep 0.3
     done
 
@@ -493,7 +492,8 @@ function TEST_auto_repair_bluestore_scrub() {
     run_mgr $dir x || return 1
     local ceph_osd_args="--osd-scrub-auto-repair=true \
             --osd_deep_scrub_randomize_ratio=0 \
-            --osd-scrub-interval-randomize-ratio=0"
+            --osd-scrub-interval-randomize-ratio=0 \
+            --osd-scrub-backoff-ratio=0"
     for id in $(seq 0 2) ; do
         run_osd $dir $id $ceph_osd_args || return 1
     done