.set_description("Issue a health warning if there are fewer OSDs than osd_pool_default_size"),
Option("mon_warn_on_slow_ping_time", Option::TYPE_UINT, Option::LEVEL_BASIC)
- .set_default(1000000)
+ .set_default(0)
+ .add_service("mgr")
+ .set_description("Override mon_warn_on_slow_ping_ratio with specified threshold in microseconds")
+ .add_see_also("mon_warn_on_slow_ping_ratio"),
+
+ Option("mon_warn_on_slow_ping_ratio", Option::TYPE_FLOAT, Option::LEVEL_BASIC)
+ .set_default(.05)
.add_service("mgr")
- .set_description("Issue a health warning if heartbeat ping longer than specified microseconds"),
+ .set_description("Issue a health warning if heartbeat ping longer than percentage of osd_heartbeat_grace")
+ .add_see_also("osd_heartbeat_grace")
+ .add_see_also("mon_warn_on_slow_ping_time"),
Option("mon_max_snap_prune_per_epoch", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
.set_default(100)
// Default to health warning level if nothing specified
if (!(cmd_getval(g_ceph_context, cmdmap, "value", value))) {
value = static_cast<int64_t>(g_ceph_context->_conf.get_val<uint64_t>("mon_warn_on_slow_ping_time"));
+ if (value == 0) {
+ double ratio = g_conf().get_val<double>("mon_warn_on_slow_ping_ratio");
+ value = g_conf().get_val<int64_t>("osd_heartbeat_grace");
+ value *= 1000000 * ratio; // Seconds of grace to microseconds at ratio
+ }
}
if (value < 0)
value = 0;
}
// Network ping times (1min 5min 15min)
- f->open_array_section("network_ping_times");
+ f->open_object_section("network_ping_times");
+ f->dump_int("threshold", value);
+ f->open_array_section("entries");
for (auto &sitem : boost::adaptors::reverse(sorted)) {
ceph_assert(!value || sitem.pingtime >= value);
f->dump_unsigned("15min", sitem.times[2]);
f->close_section(); // entry
}
+ f->close_section(); // entries
f->close_section(); // network_ping_times
} else {
ceph_abort_msg("broken asok registration");
// SLOW_PING_TIME
auto warn_slow_ping_time = cct->_conf.get_val<uint64_t>("mon_warn_on_slow_ping_time");
+ if (warn_slow_ping_time == 0) {
+ double ratio = cct->_conf.get_val<double>("mon_warn_on_slow_ping_ratio");
+ warn_slow_ping_time = cct->_conf.get_val<int64_t>("osd_heartbeat_grace");
+ warn_slow_ping_time *= 1000000 * ratio; // Seconds of grace to microseconds at ratio
+ }
if (warn_slow_ping_time > 0) {
struct mon_ping_item_t {
int64_t value = 0;
if (!(cmd_getval(cct, cmdmap, "value", value))) {
value = static_cast<int64_t>(g_conf().get_val<uint64_t>("mon_warn_on_slow_ping_time"));
+ if (value == 0) {
+ double ratio = g_conf().get_val<double>("mon_warn_on_slow_ping_ratio");
+ value = g_conf().get_val<int64_t>("osd_heartbeat_grace");
+ value *= 1000000 * ratio; // Seconds of grace to microseconds at ratio
+ }
}
if (value < 0) value = 0;
delete pingtimes;
//
// Network ping times (1min 5min 15min)
- f->open_array_section("network_ping_times");
+ f->open_object_section("network_ping_times");
+ f->dump_int("threshold", value);
+ f->open_array_section("entries");
for (auto &sitem : boost::adaptors::reverse(sorted)) {
ceph_assert(sitem.pingtime >= value);
f->open_object_section("entry");
f->dump_int("15min", sitem.times[2]);
f->close_section(); // entry
}
+ f->close_section(); // entries
f->close_section(); // network_ping_times
} else {
assert(0 == "broken asok registration");