# **get_last_scrub_stamp** function reports a timestamp different from
# the one stored before starting the scrub.
#
+# The scrub is initiated using the "operator initiated" method, and
+# the scrub triggered is not subject to no-scrub flags etc.
+#
# @param pgid the id of the PG
# @return 0 on success, 1 on error
#
#######################################################################
+##
+# Trigger a "scheduled" scrub on **pgid** (by mnaually modifying the relevant
+# last-scrub stamp) and wait until it completes. The pg_scrub
+# function will fail if scrubbing does not complete within $TIMEOUT
+# seconds. The pg_scrub is complete whenever the
+# **get_last_scrub_stamp** function reports a timestamp different from
+# the one stored before starting the scrub.
+#
+# @param pgid the id of the PG
+# @return 0 on success, 1 on error
+#
+function pg_schedule_scrub() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ ceph pg scrub $pgid
+ wait_for_scrub $pgid "$last_scrub"
+}
+
+function pg_schedule_deep_scrub() {
+ local pgid=$1
+ local last_scrub=$(get_last_scrub_stamp $pgid last_deep_scrub_stamp)
+ ceph pg deep-scrub $pgid
+ wait_for_scrub $pgid "$last_scrub" last_deep_scrub_stamp
+}
+
+function test_pg_schedule_scrub() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 --mon_allow_pool_size_one=true || return 1
+ run_mgr $dir x || return 1
+ run_osd $dir 0 || return 1
+ create_rbd_pool || return 1
+ wait_for_clean || return 1
+ pg_schedule_scrub 1.0 || return 1
+ kill_daemons $dir KILL osd || return 1
+ ! TIMEOUT=1 pg_scrub 1.0 || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
##
# Run the *command* and expect it to fail (i.e. return a non zero status).
# The output (stderr and stdout) is stored in a temporary file in *dir*
#
set -x
source $CEPH_ROOT/qa/standalone/ceph-helpers.sh
+source $CEPH_ROOT/qa/standalone/scrub/scrub-helpers.sh
if [ `uname` = FreeBSD ]; then
# erasure coding overwrites are only tested on Bluestore
#
local pg=$(get_pg $poolname SOMETHING)
local last_scrub=$(get_last_scrub_stamp $pg)
- ceph pg scrub $pg
+ ceph tell $pg schedule-scrub
#
# 2) Assure the scrub is not scheduled
date --rfc-3339=ns
# note: must initiate a "regular" (periodic) deep scrub - not an operator-initiated one
- env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) deep_scrub "$pgid"
- env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) scrub "$pgid"
+ env CEPH_ARGS= ceph --format json daemon $(get_asok_path $the_osd) schedule-deep-scrub "$pgid"
# wait for 'scrubbing' to appear
for ((i=0; i < 80; i++)); do
function TEST_auto_repair_bluestore_basic() {
local dir=$1
- local poolname=testpool
-
- # Launch a cluster with 5 seconds scrub interval
- run_mon $dir a || return 1
- run_mgr $dir x || return 1
- local ceph_osd_args="--osd-scrub-auto-repair=true \
- --osd_deep_scrub_randomize_ratio=0 \
- --osd-scrub-interval-randomize-ratio=0"
- for id in $(seq 0 2) ; do
- run_osd $dir $id $ceph_osd_args || return 1
- done
+ local -A cluster_conf=(
+ ['osds_num']="3"
+ ['pgs_in_pool']="1"
+ ['pool_name']="testpool"
+ ['extras']=" --osd_scrub_auto_repair=true"
+ )
+ local extr_dbg=3
+ standard_scrub_cluster $dir cluster_conf
+ local poolid=${cluster_conf['pool_id']}
+ local poolname=${cluster_conf['pool_name']}
- create_pool $poolname 1 1 || return 1
ceph osd pool set $poolname size 2
wait_for_clean || return 1
# Remove the object from one shard physically
# Restarted osd get $ceph_osd_args passed
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
+ ceph tell osd.* config set osd_scrub_auto_repair true
local pgid=$(get_pg $poolname SOMETHING)
local primary=$(get_primary $poolname SOMETHING)
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
- ceph tell $pgid deep_scrub
- ceph tell $pgid scrub
+ # note: the scrub initiated must be a "regular" (periodic) deep scrub - not an
+ # operator-initiated one (as there's no 'auto-repair' for the latter)
+ ceph tell $pgid schedule-deep-scrub
# Wait for auto repair
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
local pgid=$(get_pg $poolname SOMETHING)
local primary=$(get_primary $poolname SOMETHING)
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
- ceph tell $pgid scrub
+ ceph tell $pgid schedule-scrub
# Wait for scrub -> auto repair
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
ceph pg dump pgs
# Actually this causes 2 scrubs, so we better wait a little longer
+ sleep 2
+ ceph pg dump pgs
+ sleep 2
+ ceph pg dump pgs
sleep 5
wait_for_clean || return 1
ceph pg dump pgs
local pgid=$(get_pg $poolname obj1)
local primary=$(get_primary $poolname obj1)
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
- ceph tell $pgid deep_scrub
- ceph tell $pgid scrub
+ ceph tell $pgid schedule-deep-scrub
# Wait for auto repair
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
# obj2 can't be repaired
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) obj2 remove || return 1
objectstore_tool $dir $(get_primary $poolname SOMETHING) obj2 rm-attr _ || return 1
+ ceph tell osd.* config set osd_scrub_auto_repair true
local pgid=$(get_pg $poolname obj1)
local primary=$(get_primary $poolname obj1)
local last_scrub_stamp="$(get_last_scrub_stamp $pgid)"
- ceph tell $pgid deep_scrub
- ceph tell $pgid scrub
+ ceph tell $pgid schedule-deep-scrub
# Wait for auto repair
wait_for_scrub $pgid "$last_scrub_stamp" || return 1
flush_pg_stats
local last_scrub=$(get_last_scrub_stamp $pg)
# Fake a schedule scrub
- ceph tell $pg scrub || return 1
+ ceph tell $pg schedule-scrub || return 1
# Wait for schedule regular scrub
wait_for_scrub $pg "$last_scrub"
sleep 5
# Fake a schedule scrub
- ceph tell $pg scrub || return 1
+ ceph tell $pg schedule-scrub || return 1
# Wait for schedule regular scrub
# to notice scrub and skip it
local found=false
flush_pg_stats
# Request a regular scrub and it will be done
- pg_scrub $pg
+ pg_schedule_scrub $pg
grep -q "Regular scrub request, deep-scrub details will be lost" $dir/osd.${primary}.log || return 1
# deep-scrub error is no longer present
else
overdue_seconds=$conf_overdue_seconds
fi
- ceph tell ${i}.0 scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+ ceph tell ${i}.0 schedule-scrub $(expr ${overdue_seconds} + ${i}00) || return 1
done
# Fake schedule deep scrubs
for i in $(seq $(expr $scrubs + 1) $(expr $scrubs + $deep_scrubs))
else
overdue_seconds=$conf_overdue_seconds
fi
- ceph tell ${i}.0 deep_scrub $(expr ${overdue_seconds} + ${i}00) || return 1
+ ceph tell ${i}.0 schedule-deep-scrub $(expr ${overdue_seconds} + ${i}00) || return 1
done
flush_pg_stats
ceph health | grep -q " pgs not deep-scrubbed in time" || return 1
ceph health | grep -q " pgs not scrubbed in time" || return 1
- # note that the 'ceph tell pg deep_scrub' command now also sets the regular scrub
+ # note that the 'ceph tell pg deep-scrub' command now also sets the regular scrub
# time-stamp. I.e. - all 'late for deep scrubbing' pgs are also late for
# regular scrubbing. For now, we'll allow both responses.
COUNT=$(ceph health detail | grep "not scrubbed since" | wc -l)
otherpgs="${otherpgs}${opg} "
local other_last_scrub=$(get_last_scrub_stamp $pg)
# Fake a schedule scrub
- ceph tell $opg scrub $opg || return 1
+ ceph tell $opg schedule-scrub $opg || return 1
done
sleep 15
flush_pg_stats
- # Request a regular scrub and it will be done
+ # Force a shallow scrub and it will be done
local last_scrub=$(get_last_scrub_stamp $pg)
- ceph pg scrub $pg
+ ceph tell $pg scrub || return 1
ceph osd unset noscrub || return 1
ceph osd unset nodeep-scrub || return 1
create_pool $poolname 1 1
wait_for_clean || return 1
- # Trigger a scrub on a PG
+ # Trigger a periodic scrub on a PG (no 'extended sleep' for h.p. scrubs)
local pgid=$(get_pg $poolname SOMETHING)
local primary=$(get_primary $poolname SOMETHING)
local last_scrub=$(get_last_scrub_stamp $pgid)
- ceph tell $pgid scrub || return 1
+ ceph tell $pgid schedule-scrub || return 1
# Allow scrub to start extended sleep
PASSED="false"
local primary=$(get_primary $poolname obj1)
local pgid="${poolid}.0"
- ceph tell $pgid $type || return 1
- # deep-scrub won't start without scrub noticing
- if [ "$type" = "deep_scrub" ];
- then
- ceph tell $pgid scrub || return 1
- fi
+ ceph tell $pgid schedule-$type || return 1
# Wait for scrubbing to start
set -o pipefail
fi
ceph osd set $stopscrub
- if [ "$type" = "deep_scrub" ];
+ if [ "$type" = "deep-scrub" ];
then
ceph osd set noscrub
fi
ceph config set osd "osd_scrub_sleep" "0.1"
ceph osd unset $stopscrub
- if [ "$type" = "deep_scrub" ];
+ if [ "$type" = "deep-scrub" ];
then
ceph osd unset noscrub
fi
function TEST_deep_scrub_abort() {
local dir=$1
- _scrub_abort $dir deep_scrub
+ _scrub_abort $dir deep-scrub
}
function TEST_scrub_permit_time() {
# current time to set last_scrub_stamp, it sets the deadline
# back by osd_max_interval which would cause the time permit checking
# to be skipped. Set back 1 day, the default scrub_min_interval.
- ceph tell $pgid scrub $(( 24 * 60 * 60 )) || return 1
+ ceph tell $pgid schedule-scrub $(( 24 * 60 * 60 )) || return 1
# Scrub should not run
for ((i=0; i < 30; i++)); do
local dbg_counter_at_start=${sched_data['query_scrub_seq']}
echo "test counter @ start: $dbg_counter_at_start"
- ceph pg $pgid deep_scrub
+ ceph tell $pgid schedule-deep-scrub
sleep 5 # 5s is the 'pg dump' interval
declare -A sc_data_2
saved_last_stamp=${sched_data['query_last_stamp']}
ceph tell osd.* config set osd_scrub_sleep "0"
- ceph pg deep-scrub $pgid
- ceph pg scrub $pgid
+ ceph tell $pgid deep-scrub
# wait for the 'last duration' entries to change. Note that the 'dump' one will need
# up to 5 seconds to sync
sleep 2
saved_last_stamp=${sched_data['query_last_stamp']}
- ceph pg $pgid scrub
+ ceph tell $pgid schedule-scrub
sleep 1
sched_data=()
declare -A expct_scrub_peri_sched=( ['query_is_future']="false" )