From: Loic Dachary Date: Sat, 6 Sep 2014 17:52:21 +0000 (+0200) Subject: tests: check osd health in cephtool/test.sh X-Git-Tag: v0.88~158^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F2414%2Fhead;p=ceph.git tests: check osd health in cephtool/test.sh Add a trivial osd health test at the beginning of each group of tests. When facing an intermittent failure, it is difficult to diagnose if the cluster appears to be missing an OSD but there is no indication as to when the OSDs were last up. The tests are now only run after all OSDs are up. These checks can be disabled with --no-sanity-check to allow running some tests that have less requirements than running all the tests. Signed-off-by: Loic Dachary --- diff --git a/qa/workunits/cephtool/test.sh b/qa/workunits/cephtool/test.sh index 1445ceb072c5..3e7a0eb8b71a 100755 --- a/qa/workunits/cephtool/test.sh +++ b/qa/workunits/cephtool/test.sh @@ -5,6 +5,24 @@ set -o functrace PS4=' ${FUNCNAME[0]}: $LINENO: ' SUDO=${SUDO:-sudo} +function check_no_osd_down() +{ + ! ceph osd dump | grep ' down ' +} + +function wait_no_osd_down() +{ + for i in $(seq 1 300) ; do + if ! check_no_osd_down ; then + echo "waiting for osd(s) to come back up" + sleep 1 + else + break + fi + done + check_no_osd_down +} + function get_pg() { local pool obj map_output pg @@ -583,17 +601,8 @@ function test_mon_osd() ceph osd thrash 10 ceph osd down `seq 0 31` # force everything down so that we can trust up - # make sure everything gets back up+in. - for ((i=0; i < 100; i++)); do - if ceph osd dump | grep ' down '; then - echo "waiting for osd(s) to come back up" - sleep 10 - else - break - fi - done - ! ceph osd dump | grep ' down ' || exit 1 - + wait_no_osd_down + # if you have more osds than this you are on your own for f in `seq 0 31`; do ceph osd in $f || true @@ -1078,6 +1087,8 @@ function usage() tests_to_run=() +sanity_check=true + while [[ $# -gt 0 ]]; do opt=$1 @@ -1088,6 +1099,9 @@ while [[ $# -gt 0 ]]; do "--asok-does-not-need-root" ) SUDO="" ;; + "--no-sanity-check" ) + sanity_check=false + ;; "-t" ) shift if [[ -z "$1" ]]; then @@ -1114,11 +1128,20 @@ if [[ ${#tests_to_run[@]} -eq 0 ]]; then tests_to_run=("${TESTS[@]}") fi +if $sanity_check ; then + wait_no_osd_down +fi for i in ${tests_to_run[@]}; do + if $sanity_check ; then + check_no_osd_down + fi set -x test_${i} set +x done +if $sanity_check ; then + check_no_osd_down +fi set -x