``osd_scrub_end_week_day`` are 0 - 6. The use of 7 is now illegal.
Specifying ``0`` for both values causes every day of the week to be allowed.
+* A new health check will warn if different versions of Ceph are running
+ on deamons. It will be a health error if multiple versions are detected.
+ This condition must exists for over 1 week by default in order for the
+ health condition to be triggered. This allows most upgrades to proceed
+ without falsely see the warning. If upgrade is paused for an extended
+ time period, health mute can be used like this
+ "ceph health mute DAEMON_OLD_VERSION --sticky". In this case after
+ upgrading has finish use "ceph health unmute DAEMON_OLD_VERSION".
>=15.0.0
--------
* ``ceph pg #.# list_unfound`` output has been enhanced to provide
might_have_unfound information which indicates which OSDs may
contain the unfound objects.
-
-* A new health check will warn if different versions of Ceph are running
- on deamons. This will be an expected warning while in the middle of
- an upgrade. A manual upgrade should health mute like this
- "ceph health mute DAEMON_OLD_VERSION --sticky". After upgrading has
- finish use ceph health unmute DAEMON_OLD_VERSION".
__________________
Warn if old version(s) of Ceph are running on any deamons.
-This will be an expected warning while in the middle of
-an upgrade. A manual upgrade should health mute like this
-"ceph health mute DAEMON_OLD_VERSION --sticky". After upgrading has
-finish use ceph health unmute DAEMON_OLD_VERSION".
+It will be a health error if multiple versions are detected.
+This condition must exists for over 1 week by default in order for the
+health condition to be triggered. This allows most upgrades to proceed
+without falsely see the warning. If upgrade is paused for an extended
+time period, health mute can be used like this
+"ceph health mute DAEMON_OLD_VERSION --sticky". In this case after
+upgrading has finish use "ceph health unmute DAEMON_OLD_VERSION".
MON_DOWN
________
setup $dir || return 1
# create a cluster with one monitor and three osds
- run_mon $dir a --public-addr=$CEPH_MON_A || return 1
- run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0.0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0.0 || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
setup $dir || return 1
# create a cluster with one monitor and three osds
- run_mon $dir a --public-addr=$CEPH_MON_A || return 1
- run_mon $dir b --public-addr=$CEPH_MON_B || return 1
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=0.0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=0.0 || return 1
run_osd $dir 0 || return 1
run_osd $dir 1 || return 1
run_osd $dir 2 || return 1
ceph health detail | grep DAEMON_OLD_VERSION && return 1
kill_daemons $dir KILL mon.b
- EXTRA_OPTS=" --debug_version_for_testing=01.00.00-gversion-test" run_mon $dir b
+ EXTRA_OPTS=" --debug_version_for_testing=01.00.00-gversion-test" run_mon $dir b --mon_warn_older_version_delay=0.0
# XXX: Manager doesn't seem to use the test specific config for version
#kill_daemons $dir KILL mgr.x
#EXTRA_OPTS=" --debug_version_for_testing=02.00.00-gversion-test" run_mgr $dir x
ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
}
+# Verify delay handling
+function TEST_check_version_health_3() {
+ local dir=$1
+
+ # Asssume MON_A is leader?
+ CEPH_ARGS="$ORIG_CEPH_ARGS --mon-host=$CEPH_MON_A "
+ # setup
+ setup $dir || return 1
+
+ # create a cluster with one monitor and three osds
+ run_mon $dir a --public-addr=$CEPH_MON_A --mon_warn_older_version_delay=20.0 || return 1
+ run_mon $dir b --public-addr=$CEPH_MON_B --mon_warn_older_version_delay=20.0 || return 1
+ run_osd $dir 0 || return 1
+ run_osd $dir 1 || return 1
+ run_osd $dir 2 || return 1
+ run_mgr $dir x || return 1
+ run_mgr $dir y || return 1
+ run_mds $dir m || return 1
+ run_mds $dir n || return 1
+
+ sleep 5
+ ceph health detail
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ kill_daemons $dir KILL osd.1
+ EXTRA_OPTS=" --debug_version_for_testing=01.00.00-gversion-test" activate_osd $dir 1
+ sleep 5 # give kill time
+
+ sleep 10
+ # should not see this yet
+ ceph health detail | grep DAEMON_OLD_VERSION && return 1
+
+ # Now make sure that at least 20 seconds have passed
+ sleep 10
+
+ ceph health detail
+ # Should notice that osd.1 is a different version
+ ceph health | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "HEALTH_WARN .*There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "^[[]WRN[]] DAEMON_OLD_VERSION: There is a daemon running an older version of ceph" || return 1
+ ceph health detail | grep -q "osd.1 is running an older version of ceph: 01.00.00-gversion-test" || return 1
+
+ kill_daemons $dir KILL osd.2
+ EXTRA_OPTS=" --debug_version_for_testing=01.00.00-gversion-test" activate_osd $dir 2
+ kill_daemons $dir KILL osd.0
+ EXTRA_OPTS=" --debug_version_for_testing=02.00.00-gversion-test" activate_osd $dir 0
+ sleep 5
+
+ ceph health detail
+ ceph health | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "HEALTH_ERR .*There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "^[[]ERR[]] DAEMON_OLD_VERSION: There are daemons running multiple old versions of ceph" || return 1
+ ceph health detail | grep -q "osd.1 osd.2 are running an older version of ceph: 01.00.00-gversion-test" || return 1
+ ceph health detail | grep -q "osd.0 is running an older version of ceph: 02.00.00-gversion-test" || return 1
+}
+
main ver-health "$@"
health_check_map_t next;
+ static utime_t old_version_first_time;
+
// DAEMON_OLD_VERSION
if (g_conf().get_val<bool>("mon_warn_on_older_version")) {
+ utime_t now = ceph_clock_now();
+ if (old_version_first_time == utime_t())
+ old_version_first_time = now;
+ if ((now - old_version_first_time) > g_conf().get_val<double>("mon_warn_older_version_delay")) {
std::map<string, std::list<string> > all_versions;
mon->get_all_versions(all_versions);
if (all_versions.size() > 1) {
<< " running an older version of ceph: " << g.first;
d.detail.push_back(ds.str());
}
+ } else {
+ old_version_first_time = utime_t();
+ }
}
}