From: Jianpeng Ma Date: Fri, 17 Jul 2015 02:44:06 +0000 (+0800) Subject: osd: add dump_blocked_ops asok command. X-Git-Tag: v0.94.10~11^2~6 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=3e00ee5cfb0769e9b1c0e0a7540d0b14be880fed;p=ceph.git osd: add dump_blocked_ops asok command. From ceph health we can see there are blocked ops and from 'ceph health detai' we can see which osd has blocked ops. Later we only in log of osd to see the detail message of blocked ops. Now via asok dump blocked ops. Signed-off-by: Jianpeng Ma (cherry picked from commit cf4e8c655c4a0e6c367c88338cd6e51a3474fa2f) Conflicts: src/common/TrackedOp.h (trivial) --- diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index a11c0652104..e55ad3e6d06 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -93,7 +93,7 @@ void OpTracker::dump_historic_ops(Formatter *f) history.dump_ops(now, f); } -void OpTracker::dump_ops_in_flight(Formatter *f) +void OpTracker::dump_ops_in_flight(Formatter *f, bool print_only_blocked) { f->open_object_section("ops_in_flight"); // overall dump uint64_t total_ops_in_flight = 0; @@ -102,8 +102,10 @@ void OpTracker::dump_ops_in_flight(Formatter *f) for (uint32_t i = 0; i < num_optracker_shards; i++) { ShardedTrackingData* sdata = sharded_in_flight_list[i]; assert(NULL != sdata); - Mutex::Locker locker(sdata->ops_in_flight_lock_sharded); + Mutex::Locker locker(sdata->ops_in_flight_lock_sharded); for (xlist::iterator p = sdata->ops_in_flight_sharded.begin(); !p.end(); ++p) { + if (print_only_blocked && (now - (*p)->get_initiated() <= complaint_time)) + break; f->open_object_section("op"); (*p)->dump(now, f); f->close_section(); // this TrackedOp @@ -111,7 +113,11 @@ void OpTracker::dump_ops_in_flight(Formatter *f) } } f->close_section(); // list of TrackedOps - f->dump_int("num_ops", total_ops_in_flight); + if (print_only_blocked) { + f->dump_float("complaint_time", complaint_time); + f->dump_int("num_blocked_ops", total_ops_in_flight); + } else + f->dump_int("num_ops", total_ops_in_flight); f->close_section(); // overall dump } diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index b2015b88f96..0f406a20ce1 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -97,7 +97,7 @@ public: void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) { history.set_size_and_duration(new_size, new_duration); } - void dump_ops_in_flight(Formatter *f); + void dump_ops_in_flight(Formatter *f, bool print_only_blocked=false); void dump_historic_ops(Formatter *f); void register_inflight_op(xlist::item *i); void unregister_inflight_op(TrackedOp *i); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index d49b64ada94..9c49f653a38 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1686,13 +1686,23 @@ bool OSD::asok_command(string command, cmdmap_t& cmdmap, string format, } else if (command == "dump_ops_in_flight" || command == "ops") { if (!op_tracker.tracking_enabled) { - ss << "op_tracker tracking is not enabled"; + ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ + Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; } else { op_tracker.dump_ops_in_flight(f); } + } else if (command == "dump_blocked_ops") { + if (!op_tracker.tracking_enabled) { + ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ + Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; + } else { + op_tracker.dump_ops_in_flight(f, true); + } + } else if (command == "dump_historic_ops") { if (!op_tracker.tracking_enabled) { - ss << "op_tracker tracking is not enabled"; + ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ + Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; } else { op_tracker.dump_historic_ops(f); } @@ -2016,6 +2026,10 @@ void OSD::final_init() "ops", asok_hook, "show the ops currently in flight"); assert(r == 0); + r = admin_socket->register_command("dump_blocked_ops", + "dump_blocked_ops", asok_hook, + "show the blocked ops currently in flight"); + assert(r == 0); r = admin_socket->register_command("dump_historic_ops", "dump_historic_ops", asok_hook, "show slowest recent ops"); @@ -2335,6 +2349,7 @@ int OSD::shutdown() cct->get_admin_socket()->unregister_command("flush_journal"); cct->get_admin_socket()->unregister_command("dump_ops_in_flight"); cct->get_admin_socket()->unregister_command("ops"); + cct->get_admin_socket()->unregister_command("dump_blocked_ops"); cct->get_admin_socket()->unregister_command("dump_historic_ops"); cct->get_admin_socket()->unregister_command("dump_op_pq_state"); cct->get_admin_socket()->unregister_command("dump_blacklist");