#
function teardown() {
local dir=$1
- kill_daemons $dir
+ kill_daemons $dir KILL
if [ $(stat -f -c '%T' .) == "btrfs" ]; then
__teardown_btrfs $dir
fi
# Kill all daemons for which a .pid file exists in **dir**. Each
# daemon is sent a **signal** and kill_daemons waits for it to exit
# during a few minutes. By default all daemons are killed. If a
-# **name_prefix** is provided, only the daemons matching it are
-# killed.
+# **name_prefix** is provided, only the daemons for which a pid
+# file is found matching the prefix are killed. See run_osd and
+# run_mon for more information about the name conventions for
+# the pid files.
#
-# Send KILL to all daemons : kill_daemons $dir
-# Send TERM to all daemons : kill_daemons $dir TERM
-# Send TERM to all osds : kill_daemons $dir TERM osd
+# Send TERM to all daemons : kill_daemons $dir
+# Send KILL to all daemons : kill_daemons $dir KILL
+# Send KILL to all osds : kill_daemons $dir KILL osd
+# Send KILL to osd 1 : kill_daemons $dir KILL osd.1
#
# If a daemon is sent the TERM signal and does not terminate
# within a few minutes, it will still be running even after
# sleep intervals can be specified as **delays** and defaults
# to:
#
-# 0 1 1 1 2 3 5 5 5 10 10 20 60
+# 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120
#
# This sequence is designed to not require a sleep time (0) if the
# machine is fast enough and the daemon terminates in a fraction of a
# second. The increasing sleep numbers should give plenty of time for
# the daemon to die even on the slowest running machine. If a daemon
-# takes more than two minutes to stop (the sum of all sleep times),
+# takes more than a few minutes to stop (the sum of all sleep times),
# there probably is no point in waiting more and a number of things
# are likely to go wrong anyway: better give up and return on error.
#
# @param dir path name of the environment
-# @param signal name of the first signal (defaults to KILL)
+# @param signal name of the first signal (defaults to TERM)
# @param name_prefix only kill match daemons (defaults to all)
# @params delays sequence of sleep times before failure
# @return 0 on success, 1 on error
local trace=$(shopt -q -o xtrace && echo true || echo false)
$trace && shopt -u -o xtrace
local dir=$1
- local signal=${2:-KILL}
+ local signal=${2:-TERM}
local name_prefix=$3 # optional, osd, mon, osd.1
- local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60}
+ local delays=${4:-0 0 1 1 1 2 3 5 5 5 10 10 20 60 60 60 120}
local status=0
for pidfile in $(find $dir 2>/dev/null | grep $name_prefix'[^/]*\.pid') ; do
setup $dir || return 1
run_mon $dir a --osd_pool_default_size=1 || return 1
run_osd $dir 0 || return 1
+ #
# sending signal 0 won't kill the daemon
# waiting just for one second instead of the default schedule
# allows us to quickly verify what happens when kill fails
# to stop the daemon (i.e. it must return false)
+ #
! kill_daemons $dir 0 osd 1 || return 1
+ #
+ # killing just the osd and verify the mon still is responsive
+ #
kill_daemons $dir TERM osd || return 1
ceph osd dump | grep "osd.0 down" || return 1
+ #
+ # kill the mon and verify it cannot be reached
+ #
kill_daemons $dir TERM || return 1
- ! ceph --connect-timeout 1 status || return 1
+ ! ceph --connect-timeout 60 status || return 1
teardown $dir || return 1
}