##
# Run repair on **pgid** and wait until it completes. The repair
# function will fail if repair does not complete within $TIMEOUT
-# seconds. The repair is complete whenever the
-# **get_last_scrub_stamp** function reports a timestamp different from
-# the one stored before starting the repair.
+# seconds.
#
# @param pgid the id of the PG
# @return 0 on success, 1 on error
function repair() {
local pgid=$1
local last_scrub=$(get_last_scrub_stamp $pgid)
-
ceph pg repair $pgid
- for ((i=0; i < $TIMEOUT; i++)); do
- if test "$last_scrub" != "$(get_last_scrub_stamp $pgid)" ; then
- return 0
- fi
- sleep 1
- done
- return 1
+ wait_for_scrub $pgid "$last_scrub"
}
function test_repair() {
#######################################################################
+##
+# Given the *last_scrub*, wait for scrub to happen on **pgid**. It
+# will fail if scrub does not complete within $TIMEOUT seconds. The
+# repair is complete whenever the **get_last_scrub_stamp** function
+# reports a timestamp different from the one given in argument.
+#
+# @param pgid the id of the PG
+# @param last_scrub timestamp of the last scrub for *pgid*
+# @return 0 on success, 1 on error
+#
+function wait_for_scrub() {
+ local pgid=$1
+ local last_scrub="$2"
+
+ for ((i=0; i < $TIMEOUT; i++)); do
+ if test "$last_scrub" != "$(get_last_scrub_stamp $pgid)" ; then
+ return 0
+ fi
+ sleep 1
+ done
+ return 1
+}
+
+function test_wait_for_scrub() {
+ local dir=$1
+
+ setup $dir || return 1
+ run_mon $dir a --osd_pool_default_size=1 || return 1
+ run_osd $dir 0 || return 1
+ wait_for_clean || return 1
+ local pgid=1.0
+ ceph pg repair $pgid
+ local last_scrub=$(get_last_scrub_stamp $pgid)
+ wait_for_scrub $pgid "$last_scrub" || return 1
+ kill_daemons $dir KILL osd || return 1
+ last_scrub=$(get_last_scrub_stamp $pgid)
+ ! TIMEOUT=1 wait_for_scrub $pgid "$last_scrub" || return 1
+ teardown $dir || return 1
+}
+
+#######################################################################
+
##
# Return 0 if the erasure code *plugin* is available, 1 otherwise.
#
--osd-scrub-min-interval=5 \
--osd-scrub-interval-randomize-ratio=0
done
- wait_for_clean || return 1
# Create an EC pool
ceph osd erasure-code-profile set myprofile \
k=2 m=1 ruleset-failure-domain=osd || return 1
ceph osd pool create $poolname 8 8 erasure myprofile || return 1
- wait_for_clean || return 1
# Put an object
local payload=ABCDEF
echo $payload > $dir/ORIGINAL
rados --pool $poolname put SOMETHING $dir/ORIGINAL || return 1
+ wait_for_clean || return 1
# Remove the object from one shard physically
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING remove || return 1
-
- # Give some time for auto repair
- sleep 20
-
+ # Wait for auto repair
+ local pgid=$(get_pg $poolname SOMETHING)
+ wait_for_scrub $pgid "$(get_last_scrub_stamp $pgid)"
+ wait_for_clean || return 1
# Verify - the file should be back
objectstore_tool $dir $(get_not_primary $poolname SOMETHING) SOMETHING list-attrs || return 1
rados --pool $poolname get SOMETHING $dir/COPY || return 1