.set_default(false)
.set_description(""),
+ Option("mon_enable_op_tracker", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+ .set_default(true)
+ .set_description("enable/disable MON op tracking"),
+
+ Option("mon_op_complaint_time", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(30)
+ .set_description("time in seconds to consider a MON OP blocked after no updates"),
+
+ Option("mon_op_log_threshold", Option::TYPE_INT, Option::LEVEL_ADVANCED)
+ .set_default(5)
+ .set_description("max number of slow ops to display"),
+
+ Option("mon_op_history_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(20)
+ .set_description("max number of completed ops to track"),
+
+ Option("mon_op_history_duration", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(600)
+ .set_description("expiration time in seconds of historical MON OPS"),
+
+ Option("mon_op_history_slow_op_size", Option::TYPE_UINT, Option::LEVEL_ADVANCED)
+ .set_default(20)
+ .set_description("max number of slow historical MON OPS to keep"),
+
+ Option("mon_op_history_slow_op_threshold", Option::TYPE_FLOAT, Option::LEVEL_ADVANCED)
+ .set_default(10.0)
+ .set_description("duration time in seconds of an op to be considered as a historical slow op"),
+
Option("mon_data", Option::TYPE_STR, Option::LEVEL_ADVANCED)
.set_flag(Option::FLAG_NO_MON_UPDATE)
.set_default("/var/lib/ceph/mon/$cluster-$id")
paxos_service(PAXOS_NUM),
admin_hook(NULL),
routed_request_tid(0),
- op_tracker(cct, true, 1)
+ op_tracker(cct, g_conf->get_val<bool>("mon_enable_op_tracker"), 1)
{
clog = log_client.create_channel(CLOG_CHANNEL_CLUSTER);
audit_clog = log_client.create_channel(CLOG_CHANNEL_AUDIT);
update_log_clients();
+ op_tracker.set_complaint_and_threshold(g_conf->get_val<double>("mon_op_complaint_time"),
+ g_conf->get_val<int64_t>("mon_op_log_threshold"));
+ op_tracker.set_history_size_and_duration(g_conf->get_val<uint64_t>("mon_op_history_size"),
+ g_conf->get_val<uint64_t>("mon_op_history_duration"));
+ op_tracker.set_history_slow_op_size_and_threshold(g_conf->get_val<uint64_t>("mon_op_history_slow_op_size"),
+ g_conf->get_val<double>("mon_op_history_slow_op_threshold"));
+
paxos = new Paxos(this, "paxos");
paxos_service[PAXOS_MDSMAP] = new MDSMonitor(this, paxos, "mdsmap");
<< "from='admin socket' entity='admin socket' "
<< "cmd='" << command << "' args=" << args << ": dispatch";
+ set<string> filters;
+ vector<string> filter_str;
+ if (cmd_getval(cct, cmdmap, "filterstr", filter_str)) {
+ copy(filter_str.begin(), filter_str.end(),
+ inserter(filters, filters.end()));
+ }
+
+
if (command == "mon_status") {
get_mon_status(f.get(), ss);
if (f)
f->flush(ss);
}
+ } else if (command == "dump_historic_ops") {
+ if (op_tracker.dump_historic_ops(f.get())) {
+ f->flush(ss);
+ } else {
+ ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+ please enable \"mon_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+ }
+ } else if (command == "dump_historic_ops_by_duration" ) {
+ if (op_tracker.dump_historic_ops(f.get(), true)) {
+ f->flush(ss);
+ } else {
+ ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+ please enable \"mon_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+ }
+ } else if (command == "dump_historic_slow_ops") {
+ if (op_tracker.dump_historic_slow_ops(f.get(), filters)) {
+ f->flush(ss);
+ } else {
+ ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+ please enable \"mon_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+ }
} else {
assert(0 == "bad AdminSocket command binding");
}
admin_hook,
"list existing sessions");
assert(r == 0);
+ r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops",
+ admin_hook,
+ "show recent ops");
+ assert(r == 0);
+ r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration",
+ admin_hook,
+ "show recent ops, sorted by duration");
+ assert(r == 0);
+ r = admin_socket->register_command("dump_historic_slow_ops", "dump_historic_slow_ops",
+ admin_hook,
+ "show recent slow ops");
+ assert(r == 0);
lock.Lock();
admin_socket->unregister_command("quorum exit");
admin_socket->unregister_command("ops");
admin_socket->unregister_command("sessions");
+ admin_socket->unregister_command("dump_historic_ops");
+ admin_socket->unregister_command("dump_historic_ops_by_duration");
+ admin_socket->unregister_command("dump_historic_slow_ops");
delete admin_hook;
admin_hook = NULL;
}