tests: Optimizing kill_daemons() sleep time

author Erwan Velu <erwan@redhat.com>

Mon, 21 Mar 2016 11:12:13 +0000 (12:12 +0100)

committer Erwan Velu <erwan@redhat.com>

Tue, 5 Apr 2016 07:36:25 +0000 (09:36 +0200)
author Erwan Velu <erwan@redhat.com>
Mon, 21 Mar 2016 11:12:13 +0000 (12:12 +0100)
committer Erwan Velu <erwan@redhat.com>
Tue, 5 Apr 2016 07:36:25 +0000 (09:36 +0200)
diff --git a/qa/workunits/ceph-helpers.sh b/qa/workunits/ceph-helpers.sh

index 2ec151fcbe05066f0c07cb028568c576e3a0ce44..0f9309147ab5fd13fafce5d95f77992432608c7f 100755 (executable)
--- a/qa/workunits/ceph-helpers.sh
+++ b/qa/workunits/ceph-helpers.sh
@@ -158,10 +158,10 @@ function test_teardown() {
  # sleep intervals can be specified as **delays** and defaults
  # to:
  #
-#  0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120
+#  0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120
  #
-# This sequence is designed to not require a sleep time (0) if the
-# machine is fast enough and the daemon terminates in a fraction of a
+# This sequence is designed to run first a very short sleep time (0.1)
+# if the machine is fast enough and the daemon terminates in a fraction of a
  # second. The increasing sleep numbers should give plenty of time for
  # the daemon to die even on the slowest running machine. If a daemon
  # takes more than a few minutes to stop (the sum of all sleep times),
@@ -175,10 +175,9 @@ function test_teardown() {
  function kill_daemon() {
      local pid=$(cat $1)
      local send_signal=$2
-    local delays=${3:-0 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
+    local delays=${3:-0.1 0.2 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
      local exit_code=1
      for try in $delays ; do
-         sleep $try
           if kill -$send_signal $pid 2> /dev/null ; then
              exit_code=1
           else
@@ -186,10 +185,45 @@ function kill_daemon() {
              break
           fi
           send_signal=0
+         sleep $try
      done;
      return $exit_code
  }
  
+function test_kill_daemon() {
+    local dir=$1
+    setup $dir || return 1
+    run_mon $dir a --osd_pool_default_size=1 || return 1
+    run_osd $dir 0 || return 1
+
+    name_prefix=osd
+    for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
+        #
+        # sending signal 0 won't kill the daemon
+        # waiting just for one second instead of the default schedule
+        # allows us to quickly verify what happens when kill fails
+        # to stop the daemon (i.e. it must return false)
+        #
+        ! kill_daemon $pidfile 0 1 || return 1
+        #
+        # killing just the osd and verify the mon still is responsive
+        #
+        kill_daemon $pidfile TERM || return 1
+    done
+
+    ceph osd dump | grep "osd.0 down" || return 1
+
+    for pidfile in $(find $dir -name "*.pid" 2>/dev/null) ; do
+        #
+        # kill the mon and verify it cannot be reached
+        #
+        kill_daemon $pidfile TERM || return 1
+        ! ceph --connect-timeout 60 status || return 1
+    done
+
+    teardown $dir || return 1
+}
+
  ##
  # Kill all daemons for which a .pid file exists in **dir**.  Each
  # daemon is sent a **signal** and kill_daemons waits for it to exit
author	Erwan Velu <erwan@redhat.com>
	Mon, 21 Mar 2016 11:12:13 +0000 (12:12 +0100)
committer	Erwan Velu <erwan@redhat.com>
	Tue, 5 Apr 2016 07:36:25 +0000 (09:36 +0200)