From e07f9ccb13c045f6ff77cb9b529aef4975203877 Mon Sep 17 00:00:00 2001
From: Willem Jan Withagen <wjw@digiware.nl>
Date: Wed, 19 Apr 2017 11:35:38 +0200
Subject: [PATCH] qa/workunits/ceph-helpers.sh: introduce (and use)
 wait_for_health

Signed-off-by: Willem Jan Withagen <wjw@digiware.nl>
Signed-off-by: David Zafman <dzafman@redhat.com>
---
 qa/workunits/ceph-helpers.sh  | 40 +++++++++++++++++++++++++++++++++++
 qa/workunits/cephtool/test.sh | 16 +++++---------
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/qa/workunits/ceph-helpers.sh b/qa/workunits/ceph-helpers.sh
index c806508e204..b8b21335653 100755
--- a/qa/workunits/ceph-helpers.sh
+++ b/qa/workunits/ceph-helpers.sh
@@ -1294,6 +1294,46 @@ function test_wait_for_clean() {
     teardown $dir || return 1
 }
 
+#######################################################################
+
+##
+# Wait until the cluster becomes HEALTH_OK again or if it does not make progress
+# for $TIMEOUT seconds.
+#
+# @return 0 if the cluster is HEALTHY, 1 otherwise
+#
+function wait_for_health() {
+    local grepstr=$1
+    local -a delays=($(get_timeout_delays $TIMEOUT .1))
+    local -i loop=0
+
+    while ! ceph health detail | grep "$grepstr" ; do
+	if (( $loop >= ${#delays[*]} )) ; then
+            ceph health detail
+            return 1
+        fi
+        sleep ${delays[$loop]}
+        loop+=1
+    done
+}
+
+function wait_for_health_ok() {
+     wait_for_health "HEALTH_OK" || return 1
+}
+
+function test_wait_for_health_ok() {
+    local dir=$1
+
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1
+    run_mgr $dir x || return 1
+    ! TIMEOUT=1 wait_for_health_ok || return 1
+    run_osd $dir 0 || return 1
+    wait_for_health_ok || return 1
+    teardown $dir || return 1
+}
+
+
 #######################################################################
 
 ##
diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh
index d56503c72b3..7228256a8e9 100755
--- a/qa/workunits/cephtool/test.sh
+++ b/qa/workunits/cephtool/test.sh
@@ -1459,28 +1459,22 @@ function test_mon_pg()
   ceph osd set-backfillfull-ratio .912
 
   # Check injected full results
-  WAITFORFULL=10
   $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull nearfull
-  sleep $WAITFORFULL
-  ceph health | grep "HEALTH_WARN.*1 nearfull osd(s)"
+  wait_for_health "HEALTH_WARN.*1 nearfull osd(s)"
   $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull backfillfull
-  sleep $WAITFORFULL
-  ceph health | grep "HEALTH_WARN.*1 backfillfull osd(s)"
+  wait_for_health "HEALTH_WARN.*1 backfillfull osd(s)"
   $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull failsafe
-  sleep $WAITFORFULL
   # failsafe and full are the same as far as the monitor is concerned
-  ceph health | grep "HEALTH_ERR.*1 full osd(s)"
+  wait_for_health "HEALTH_ERR.*1 full osd(s)"
   $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull full
-  sleep  $WAITFORFULL
-  ceph health | grep "HEALTH_ERR.*2 full osd(s)"
+  wait_for_health "HEALTH_ERR.*2 full osd(s)"
   ceph health detail | grep "osd.0 is full at.*%"
   ceph health detail | grep "osd.2 is full at.*%"
   ceph health detail | grep "osd.1 is backfill full at.*%"
   $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull none
   $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull none
   $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull none
-  sleep $WAITFORFULL
-  ceph health | grep HEALTH_OK
+  wait_for_health_ok
 
   ceph pg stat | grep 'pgs:'
   ceph pg 0.0 query
-- 
2.39.5