From 10909c3cba62016c9800d44a382e67aaf9a0abd0 Mon Sep 17 00:00:00 2001
From: Ronen Friedman <rfriedma@redhat.com>
Date: Mon, 25 Oct 2021 16:03:05 +0000
Subject: [PATCH] osd/scrub: update the stand-alone tests to check 'scrub
 scheduling' entries

Analyzing and verifying the relevant entries in 'pg query' and
'pg dump' output.

Signed-off-by: Ronen Friedman <rfriedma@redhat.com>
---
 qa/standalone/scrub/osd-scrub-test.sh | 114 ++++++++++++---
 qa/standalone/scrub/scrub-helpers.sh  | 196 ++++++++++++++++++++++++++
 2 files changed, 287 insertions(+), 23 deletions(-)
 create mode 100644 qa/standalone/scrub/scrub-helpers.sh

diff --git a/qa/standalone/scrub/osd-scrub-test.sh b/qa/standalone/scrub/osd-scrub-test.sh
index 9105474e30f21..6a1132ef22555 100755
--- a/qa/standalone/scrub/osd-scrub-test.sh
+++ b/qa/standalone/scrub/osd-scrub-test.sh
@@ -15,6 +15,7 @@
 # GNU Library Public License for more details.
 #
 source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
 
 function run() {
     local dir=$1
@@ -28,7 +29,9 @@ function run() {
     export -n CEPH_CLI_TEST_DUP_COMMAND
     local funcs=${@:-$(set | sed -n -e 's/^\(TEST_[0-9a-z_]*\) .*/\1/p')}
     for func in $funcs ; do
+        setup $dir || return 1
         $func $dir || return 1
+        teardown $dir || return 1
     done
 }
 
@@ -40,7 +43,6 @@ function TEST_scrub_test() {
 
     TESTDATA="testdata.$$"
 
-    setup $dir || return 1
     run_mon $dir a --osd_pool_default_size=3 || return 1
     run_mgr $dir x || return 1
     for osd in $(seq 0 $(expr $OSDS - 1))
@@ -103,8 +105,6 @@ function TEST_scrub_test() {
     test "$(ceph pg $pgid query | jq '.peer_info[0].stats.stat_sum.num_scrub_errors')" = "0" || return 1
     test "$(ceph pg $pgid query | jq '.peer_info[1].stats.stat_sum.num_scrub_errors')" = "0" || return 1
     ceph pg dump pgs | grep ^${pgid} | grep -vq -- +inconsistent || return 1
-
-    teardown $dir || return 1
 }
 
 # Grab year-month-day
@@ -138,7 +138,6 @@ function TEST_interval_changes() {
 
     TESTDATA="testdata.$$"
 
-    setup $dir || return 1
     # This min scrub interval results in 30 seconds backoff time
     run_mon $dir a --osd_pool_default_size=$OSDS || return 1
     run_mgr $dir x || return 1
@@ -183,8 +182,6 @@ function TEST_interval_changes() {
     ceph osd pool set $poolname scrub_max_interval $(expr $week \* 3)
     sleep $WAIT_FOR_UPDATE
     check_dump_scrubs $primary "3 days" "3 week" || return 1
-
-    teardown $dir || return 1
 }
 
 function TEST_scrub_extended_sleep() {
@@ -205,7 +202,6 @@ function TEST_scrub_extended_sleep() {
     DAY_START=$(expr $DAY + 2)
     DAY_END=$(expr $DAY + 3)
 
-    setup $dir || return 1
     run_mon $dir a --osd_pool_default_size=3 || return 1
     run_mgr $dir x || return 1
     for osd in $(seq 0 $(expr $OSDS - 1))
@@ -274,8 +270,6 @@ function TEST_scrub_extended_sleep() {
     then
       return 1
     fi
-
-    teardown $dir || return 1
 }
 
 function _scrub_abort() {
@@ -295,8 +289,6 @@ function _scrub_abort() {
       check="nodeep_scrub"
     fi
 
-
-    setup $dir || return 1
     run_mon $dir a --osd_pool_default_size=3 || return 1
     run_mgr $dir x || return 1
     for osd in $(seq 0 $(expr $OSDS - 1))
@@ -393,8 +385,6 @@ function _scrub_abort() {
     fi
     TIMEOUT=$(($objects / 2))
     wait_for_scrub $pgid "$last_scrub" || return 1
-
-    teardown $dir || return 1
 }
 
 function TEST_scrub_abort() {
@@ -415,7 +405,6 @@ function TEST_scrub_permit_time() {
 
     TESTDATA="testdata.$$"
 
-    setup $dir || return 1
     run_mon $dir a --osd_pool_default_size=3 || return 1
     run_mgr $dir x || return 1
     local scrub_begin_hour=$(date -d '2 hour ago' +"%H" | sed 's/^0//')
@@ -450,11 +439,10 @@ function TEST_scrub_permit_time() {
         fi
         sleep 1
     done
-
-    teardown $dir || return 1
 }
 
-function TEST_pg_dump_scrub_duration() {
+
+function TEST_dump_scrub_schedule() {
     local dir=$1
     local poolname=test
     local OSDS=3
@@ -462,12 +450,21 @@ function TEST_pg_dump_scrub_duration() {
 
     TESTDATA="testdata.$$"
 
-    setup $dir || return 1
     run_mon $dir a --osd_pool_default_size=$OSDS || return 1
     run_mgr $dir x || return 1
+
+    # Set scheduler to "wpq" until there's a reliable way to query scrub states
+    # with "--osd-scrub-sleep" set to 0. The "mclock_scheduler" overrides the
+    # scrub sleep to 0 and as a result the checks in the test fail.
+    local ceph_osd_args="--osd_deep_scrub_randomize_ratio=0 \
+            --osd_scrub_interval_randomize_ratio=0 \
+            --osd_scrub_backoff_ratio=0.0 \
+            --osd_op_queue=wpq \
+            --osd_scrub_sleep=0.2"
+
     for osd in $(seq 0 $(expr $OSDS - 1))
     do
-      run_osd $dir $osd || return 1
+      run_osd $dir $osd $ceph_osd_args|| return 1
     done
 
     # Create a pool with a single pg
@@ -483,12 +480,83 @@ function TEST_pg_dump_scrub_duration() {
     rm -f $TESTDATA
 
     local pgid="${poolid}.0"
-    pg_scrub $pgid || return 1
+    local now_is=`date -I"ns"`
+
+    # before the scrubbing starts
+
+    # last scrub duration should be 0. The scheduling data should show
+    # a time in the future:
+    # e.g. 'periodic scrub scheduled @ 2021-10-12T20:32:43.645168+0000'
 
-    ceph pg $pgid query | jq '.info.stats.scrub_duration'
-    test "$(ceph pg $pgid query | jq '.info.stats.scrub_duration')" '>' "0" || return 1
+    declare -A expct_starting=( ['query_active']="false" ['query_is_future']="true" ['query_schedule']="scrub scheduled" )
+    declare -A sched_data
+    extract_published_sch $pgid $now_is "2019-10-12T20:32:43.645168+0000" sched_data
+    schedule_against_expected sched_data expct_starting "initial"
+    (( ${sched_data['dmp_last_duration']} == 0)) || return 1
+    echo "last-scrub  --- " ${sched_data['query_last_scrub']}
 
-    teardown $dir || return 1
+    #
+    # step 1: scrub once (mainly to ensure there is no urgency to scrub)
+    #
+
+    saved_last_stamp=${sched_data['query_last_stamp']}
+    ceph tell osd.* config set osd_scrub_sleep "0"
+    ceph pg deep-scrub $pgid
+    ceph pg scrub $pgid
+
+    # wait for the 'last duration' entries to change. Note that the 'dump' one will need
+    # up to 5 seconds to sync
+
+    sleep 5
+    sched_data=()
+    declare -A expct_qry_duration=( ['query_last_duration']="0" ['query_last_duration_neg']="not0" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_qry_duration "WaitingAfterScrub " sched_data || return 1
+    # verify that 'pg dump' also shows the change in last_scrub_duration
+    sched_data=()
+    declare -A expct_dmp_duration=( ['dmp_last_duration']="0" ['dmp_last_duration_neg']="not0" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_dmp_duration "WaitingAfterScrub_dmp " sched_data || return 1
+
+    sleep 2
+
+    #
+    # step 2: set noscrub and request a "periodic scrub". Watch for the change in the 'is the scrub
+    #         scheduled for the future' value
+    #
+
+    ceph tell osd.* config set osd_scrub_chunk_max "3" || return 1
+    ceph tell osd.* config set osd_scrub_sleep "1.0" || return 1
+    ceph osd set noscrub || return 1
+    sleep 2
+    saved_last_stamp=${sched_data['query_last_stamp']}
+
+    ceph pg $pgid scrub
+    sleep 1
+    sched_data=()
+    declare -A expct_scrub_peri_sched=( ['query_is_future']="false" )
+    wait_any_cond $pgid 10 $saved_last_stamp expct_scrub_peri_sched "waitingBeingScheduled" sched_data || return 1
+
+    # note: the induced change in 'last_scrub_stamp' that we've caused above, is by itself not a publish-stats
+    # trigger. Thus it might happen that the information in 'pg dump' will not get updated here. Do not expect
+    # 'dmp_is_future' to follow 'query_is_future' without a good reason
+    ## declare -A expct_scrub_peri_sched_dmp=( ['dmp_is_future']="false" )
+    ## wait_any_cond $pgid 15 $saved_last_stamp expct_scrub_peri_sched_dmp "waitingBeingScheduled" sched_data || echo "must be fixed"
+
+    #
+    # step 3: allow scrubs. Watch for the conditions during the scrubbing
+    #
+
+    saved_last_stamp=${sched_data['query_last_stamp']}
+    ceph osd unset noscrub
+
+    declare -A cond_active=( ['query_active']="true" )
+    sched_data=()
+    wait_any_cond $pgid 10 $saved_last_stamp cond_active "WaitingActive " sched_data || return 1
+
+    # check for pg-dump to show being active. But if we see 'query_active' being reset - we've just
+    # missed it.
+    declare -A cond_active_dmp=( ['dmp_state_has_scrubbing']="true" ['query_active']="false" )
+    sched_data=()
+    wait_any_cond $pgid 10 $saved_last_stamp cond_active_dmp "WaitingActive " sched_data || return 1
 }
 
 main osd-scrub-test "$@"
diff --git a/qa/standalone/scrub/scrub-helpers.sh b/qa/standalone/scrub/scrub-helpers.sh
new file mode 100644
index 0000000000000..cf13732c53854
--- /dev/null
+++ b/qa/standalone/scrub/scrub-helpers.sh
@@ -0,0 +1,196 @@
+#!/usr/bin/env bash
+# @file   scrub-helpers.sh
+# @brief  a collection of bash functions useful for scrub standalone tests
+#
+
+# extract_published_sch()
+#
+# Use the output from both 'ceph pg dump pgs' and 'ceph pg x.x query' commands to determine
+# the published scrub scheduling status of a given PG.
+#
+# $1: pg id
+# $2: 'current' time to compare to
+# $3: an additional time-point to compare to
+# $4: [out] dictionary
+#
+function extract_published_sch() {
+  local pgn="$1"
+  local -n dict=$4 # a ref to the in/out dictionary
+  local current_time=$2
+  local extra_time=$3
+  local extr_dbg=1 # note: 3 and above leave some temp files around
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  (( extr_dbg >= 3 )) && ceph pg dump pgs -f json-pretty >> /tmp/a_dmp$$
+  (( extr_dbg >= 3 )) && ceph pg $1 query -f json-pretty >> /tmp/a_qry$$
+
+  from_dmp=`ceph pg dump pgs -f json-pretty | jq -r --arg pgn "$pgn" --arg extra_dt "$extra_time" --arg current_dt "$current_time" '[
+    [[.pg_stats[]] | group_by(.pg_stats)][0][0] | 
+    [.[] |
+    select(has("pgid") and .pgid == $pgn) |
+
+        (.dmp_stat_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+        (.dmp_when_part=(.scrub_schedule | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+
+     [ {
+       dmp_pg_state: .state,
+       dmp_state_has_scrubbing: (.state | test(".*scrub.*";"i")),
+       dmp_last_duration:.last_scrub_duration,
+       dmp_schedule: .dmp_stat_part,
+       dmp_schedule_at: .dmp_when_part,
+       dmp_is_future: ( .dmp_when_part > $current_dt ),
+       dmp_vs_date: ( .dmp_when_part > $extra_dt  ),
+       dmp_reported_epoch: .reported_epoch,
+       dmp_seq: .reported_seq
+      }] ]][][][]'`
+
+  (( extr_dbg >= 2 )) && echo "from pg dump pg: $from_dmp"
+  (( extr_dbg >= 2 )) && echo "query output:"
+  (( extr_dbg >= 2 )) && ceph pg $1 query -f json-pretty | awk -e '/scrubber/,/agent_state/ {print;}'
+
+  from_qry=`ceph pg $1 query -f json-pretty | jq -r --arg extra_dt "$extra_time" --arg current_dt "$current_time"  --arg spt "'" '
+    . |
+        (.q_stat_part=((.scrubber.schedule// "-") | if test(".*@.*") then (split(" @ ")|first) else . end)) |
+        (.q_when_part=((.scrubber.schedule// "0") | if test(".*@.*") then (split(" @ ")|last) else "0" end)) |
+	(.q_when_is_future=(.q_when_part > $current_dt)) |
+	(.q_vs_date=(.q_when_part > $extra_dt)) |	
+      {
+        query_epoch: .epoch,
+        query_seq: .info.stats.reported_seq,
+        query_active: (.scrubber | if has("active") then .active else "bug" end),
+        query_schedule: .q_stat_part,
+        query_schedule_at: .q_when_part,
+        query_last_duration: .info.stats.last_scrub_duration,
+        query_last_stamp: .info.history.last_scrub_stamp,
+        query_last_scrub: (.info.history.last_scrub| sub($spt;"x") ),
+	query_is_future: .q_when_is_future,
+	query_vs_date: .q_vs_date,
+
+      }
+   '`
+  (( extr_dbg >= 1 )) && echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'
+
+  # note that using a ref to an associative array directly is tricky. Instead - we are copying:
+  local -A dict_src=`echo $from_qry " " $from_dmp | jq -s -r 'add | "(",(to_entries | .[] | "["+(.key)+"]="+(.value|@sh)),")"'`
+  dict=()
+  for k in "${!dict_src[@]}"; do dict[$k]=${dict_src[$k]}; done
+
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+}
+
+# query the PG, until any of the conditions in the 'expected' array are met
+#
+# A condition may be negated by an additional entry in the 'expected' array. Its
+# form should be:
+#  key: the original key, with a "_neg" suffix;
+#  Value: not checked
+#
+# $1: pg id
+# $2: max retries
+# $3: a date to use in comparisons
+# $4: set of K/V conditions
+# $5: debug message
+# $6: [out] the results array
+function wait_any_cond() {
+  local pgid="$1"
+  local retries=$2
+  local cmp_date=$3
+  local -n ep=$4
+  local -n out_array=$6
+  local -A sc_data
+  local extr_dbg=2
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  local now_is=`date -I"ns"`
+  (( extr_dbg >= 2 )) && echo "waiting for any condition ($5): pg:$pgid dt:$cmp_date ($retries retries)"
+
+  for i in $(seq 1 $retries)
+  do
+    sleep 0.5
+    extract_published_sch $pgid $now_is $cmp_date sc_data
+    (( extr_dbg >= 4 )) && echo "${sc_data['dmp_last_duration']}"
+    (( extr_dbg >= 4 )) && echo "----> loop:  $i  ~ ${sc_data['dmp_last_duration']}  / " ${sc_data['query_vs_date']} " /   ${sc_data['dmp_is_future']}"
+    (( extr_dbg >= 2 )) && echo "--> loop:  $i ~ ${sc_data['query_active']} / ${sc_data['query_seq']} / ${sc_data['dmp_seq']} " \
+                      "/ ${sc_data['query_is_future']} / ${sc_data['query_last_stamp']} / ${sc_data['query_schedule']} %%% ${!ep[@]}"
+
+    # perform schedule_against_expected(), but with slightly different out-messages behaviour
+    for k_ref in "${!ep[@]}"
+    do
+      (( extr_dbg >= 3 )) && echo "key is $k_ref"
+      # is this a real key, or just a negation flag for another key??
+      [[ $k_ref =~ "_neg" ]] && continue
+
+      local act_val=${sc_data[$k_ref]}
+      local exp_val=${ep[$k_ref]}
+
+      # possible negation? look for a matching key
+      local neg_key="${k_ref}_neg"
+      (( extr_dbg >= 3 )) && echo "neg-key is $neg_key"
+      if [ -v 'ep[$neg_key]' ]; then
+        is_neg=1
+      else
+        is_neg=0
+      fi
+
+      (( extr_dbg >= 1 )) && echo "key is $k_ref: negation:$is_neg # expected: $exp_val # in actual: $act_val"
+      is_eq=0
+      [[ $exp_val == $act_val ]] && is_eq=1
+      if (($is_eq ^ $is_neg))  
+      then
+        echo "$5 - '$k_ref' actual value ($act_val) matches expected ($exp_val) (negation: $is_neg)"
+        for k in "${!sc_data[@]}"; do out_array[$k]=${sc_data[$k]}; done
+        if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+        return 0
+      fi
+    done
+  done
+
+  echo "$5: wait_any_cond(): failure. Note: query-active=${sc_data['query_active']}"
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+  return 1
+}
+
+
+# schedule_against_expected()
+#
+# Compare the scrub scheduling state collected by extract_published_sch() to a set of expected values.
+# All values are expected to match.
+#
+# $1: the published scheduling state
+# $2: a set of conditions to verify
+# $3: text to be echoed for a failed match
+#
+function schedule_against_expected() {
+  local -n dict=$1 # a ref to the published state
+  local -n ep=$2  # the expected results
+  local extr_dbg=1
+
+  #turn off '-x' (but remember previous state)
+  local saved_echo_flag=${-//[^x]/}
+  set +x
+
+  (( extr_dbg >= 1 )) && echo "-- - comparing:"
+  for k_ref in "${!ep[@]}"
+  do
+    local act_val=${dict[$k_ref]}
+    local exp_val=${ep[$k_ref]}
+    (( extr_dbg >= 1 )) && echo "key is " $k_ref "  expected: " $exp_val " in actual: " $act_val
+    if [[ $exp_val != $act_val ]]
+    then
+      echo "$3 - '$k_ref' actual value ($act_val) differs from expected ($exp_val)"
+      echo '####################################################^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^'
+
+      if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+      return 1
+    fi
+  done
+
+  if [[ -n "$saved_echo_flag" ]]; then set -x; fi
+  return 0
+}
-- 
2.39.5