From e07f9ccb13c045f6ff77cb9b529aef4975203877 Mon Sep 17 00:00:00 2001 From: Willem Jan Withagen Date: Wed, 19 Apr 2017 11:35:38 +0200 Subject: [PATCH] qa/workunits/ceph-helpers.sh: introduce (and use) wait_for_health Signed-off-by: Willem Jan Withagen Signed-off-by: David Zafman --- qa/workunits/ceph-helpers.sh | 40 +++++++++++++++++++++++++++++++++++ qa/workunits/cephtool/test.sh | 16 +++++--------- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/qa/workunits/ceph-helpers.sh b/qa/workunits/ceph-helpers.sh index c806508e204..b8b21335653 100755 --- a/qa/workunits/ceph-helpers.sh +++ b/qa/workunits/ceph-helpers.sh @@ -1294,6 +1294,46 @@ function test_wait_for_clean() { teardown $dir || return 1 } +####################################################################### + +## +# Wait until the cluster becomes HEALTH_OK again or if it does not make progress +# for $TIMEOUT seconds. +# +# @return 0 if the cluster is HEALTHY, 1 otherwise +# +function wait_for_health() { + local grepstr=$1 + local -a delays=($(get_timeout_delays $TIMEOUT .1)) + local -i loop=0 + + while ! ceph health detail | grep "$grepstr" ; do + if (( $loop >= ${#delays[*]} )) ; then + ceph health detail + return 1 + fi + sleep ${delays[$loop]} + loop+=1 + done +} + +function wait_for_health_ok() { + wait_for_health "HEALTH_OK" || return 1 +} + +function test_wait_for_health_ok() { + local dir=$1 + + setup $dir || return 1 + run_mon $dir a --osd_pool_default_size=1 --osd_failsafe_full_ratio=.99 --mon_pg_warn_min_per_osd=0 || return 1 + run_mgr $dir x || return 1 + ! TIMEOUT=1 wait_for_health_ok || return 1 + run_osd $dir 0 || return 1 + wait_for_health_ok || return 1 + teardown $dir || return 1 +} + + ####################################################################### ## diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index d56503c72b3..7228256a8e9 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -1459,28 +1459,22 @@ function test_mon_pg() ceph osd set-backfillfull-ratio .912 # Check injected full results - WAITFORFULL=10 $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull nearfull - sleep $WAITFORFULL - ceph health | grep "HEALTH_WARN.*1 nearfull osd(s)" + wait_for_health "HEALTH_WARN.*1 nearfull osd(s)" $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull backfillfull - sleep $WAITFORFULL - ceph health | grep "HEALTH_WARN.*1 backfillfull osd(s)" + wait_for_health "HEALTH_WARN.*1 backfillfull osd(s)" $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull failsafe - sleep $WAITFORFULL # failsafe and full are the same as far as the monitor is concerned - ceph health | grep "HEALTH_ERR.*1 full osd(s)" + wait_for_health "HEALTH_ERR.*1 full osd(s)" $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull full - sleep $WAITFORFULL - ceph health | grep "HEALTH_ERR.*2 full osd(s)" + wait_for_health "HEALTH_ERR.*2 full osd(s)" ceph health detail | grep "osd.0 is full at.*%" ceph health detail | grep "osd.2 is full at.*%" ceph health detail | grep "osd.1 is backfill full at.*%" $SUDO ceph --admin-daemon $(get_admin_socket osd.0) injectfull none $SUDO ceph --admin-daemon $(get_admin_socket osd.1) injectfull none $SUDO ceph --admin-daemon $(get_admin_socket osd.2) injectfull none - sleep $WAITFORFULL - ceph health | grep HEALTH_OK + wait_for_health_ok ceph pg stat | grep 'pgs:' ceph pg 0.0 query -- 2.39.5