#######################################################################
+##
+# Return a list of numbers that are increasingly larger and whose
+# total is **timeout** seconds. It can be used to have short sleep
+# delay while waiting for an event on a fast machine. But if running
+# very slowly the larger delays avoid stressing the machine even
+# further or spamming the logs.
+#
+# @param timeout sum of all delays, in seconds
+# @return a list of sleep delays
+#
+function get_timeout_delays() {
+ local -i timeout=$1
+ local -i first_step=1
+
+ local -i i
+ local -i total=0
+ for (( i = $first_step ; $total + $i <= $timeout ; i *= 2 )) ; do
+ echo -n "$i "
+ total+=$i
+ done
+ if (( $total < $timeout )) ; then
+ echo -n $(( $timeout - $total ))
+ fi
+}
+
+function test_get_timeout_delays() {
+ test "$(get_timeout_delays 1)" = "1 " || return 1
+ test "$(get_timeout_delays 5)" = "1 2 2" || return 1
+ test "$(get_timeout_delays 6)" = "1 2 3" || return 1
+ test "$(get_timeout_delays 7)" = "1 2 4 " || return 1
+ test "$(get_timeout_delays 8)" = "1 2 4 1" || return 1
+}
+
+#######################################################################
+
##
# Wait until the cluster becomes clean or if it does not make progress
# for $TIMEOUT seconds.
# @return 0 if the cluster is clean, 1 otherwise
#
function wait_for_clean() {
- local status=1
local num_active_clean=-1
local cur_active_clean
+ local -a delays=($(get_timeout_delays $TIMEOUT))
local -i timer=0
- local -i loops=0
- local -i phase=0
local num_pgs=$(get_num_pgs)
test $num_pgs != 0 || return 1
- local TENTH_TIMEOUT=($TIMEOUT * 10)
- # The first phase 10 times (1 second) second phase an additional 15 times (15 seconds)
- local backoff_phases=( 10 15 -1 )
- local sleep_backoff=( .1 1 10)
while true ; do
# Comparing get_num_active_clean & get_num_pgs is used to determine
cur_active_clean=$(get_num_active_clean)
test $cur_active_clean = $num_pgs && break
if test $cur_active_clean != $num_active_clean ; then
- timer=0 ; loops=0 ; phase=0
+ timer=0
num_active_clean=$cur_active_clean
elif get_is_making_recovery_progress ; then
- timer=0 ; loops=0 ; phase=0
- elif (( timer >= $TENTH_TIMEOUT)) ; then
+ timer=0
+ elif (( $timer >= ${#delays[*]} )) ; then
ceph report
return 1
fi
- sleep ${sleep_backoff[$phase]}
- timer="$(echo $timer + ${sleep_backoff[$phase]} \* 10 | bc | cut -d. -f1)"
- loops=$(expr $loops + 1)
- if (( $loops == ${backoff_phases[$phase]} )); then
- phase=$(expr $phase + 1)
- loop=0
- fi
+ sleep ${delays[$timer]}
+ timer+=1
done
return 0
}