From 8b415e6aed818aadba1928fd09d7f1c6d22ab664 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Piotr=20Da=C5=82ek?= Date: Mon, 20 Mar 2017 13:51:25 +0100 Subject: [PATCH] TrackedOp: allow dumping historic ops sorted by duration MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Currently dump_historic_ops dumps ops sorted by their initiation time, which may not have any relation to how long it took, and sorting output of that command by op duration is neither fast nor convenient. New asok command ("dump_historic_ops_by_duration") outputs the same op list, but ordered by their duration time (longest first). Signed-off-by: Piotr Dałek --- src/common/TrackedOp.cc | 41 +++++++++++++++++++++++++++++++++++++++-- src/common/TrackedOp.h | 3 ++- src/mds/MDSDaemon.cc | 5 +++++ src/mds/MDSRank.cc | 5 +++++ src/osd/OSD.cc | 12 +++++++++++- 5 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index b1ce4b43a9e4..1b088279308c 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -88,6 +88,39 @@ void OpHistory::dump_ops(utime_t now, Formatter *f) f->close_section(); } +void OpHistory::dump_ops_by_duration(utime_t now, Formatter *f) +{ + Mutex::Locker history_lock(ops_history_lock); + cleanup(now); + f->open_object_section("op_history"); + f->dump_int("size", history_size); + f->dump_int("duration", history_duration); + { + f->open_array_section("ops"); + if (arrived.size()) { + vector > durationvec; + durationvec.reserve(arrived.size()); + + for (set >::const_iterator i = + arrived.begin(); + i != arrived.end(); + ++i) { + durationvec.push_back(pair(i->second->get_duration(), i->second)); + } + + sort(durationvec.begin(), durationvec.end()); + + for (auto i = durationvec.rbegin(); i != durationvec.rend(); ++i) { + f->open_object_section("op"); + i->second->dump(now, f); + f->close_section(); + } + } + f->close_section(); + } + f->close_section(); +} + struct ShardedTrackingData { Mutex ops_in_flight_lock_sharded; TrackedOp::tracked_op_list_t ops_in_flight_sharded; @@ -117,14 +150,18 @@ OpTracker::~OpTracker() { } } -bool OpTracker::dump_historic_ops(Formatter *f) +bool OpTracker::dump_historic_ops(Formatter *f, bool by_duration) { RWLock::RLocker l(lock); if (!tracking_enabled) return false; utime_t now = ceph_clock_now(); - history.dump_ops(now, f); + if (by_duration) { + history.dump_ops_by_duration(now, f); + } else { + history.dump_ops(now, f); + } return true; } diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index 31439dc23ff0..412f1d324ef1 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -48,6 +48,7 @@ public: } void insert(utime_t now, TrackedOpRef op); void dump_ops(utime_t now, Formatter *f); + void dump_ops_by_duration(utime_t now, Formatter *f); void on_shutdown(); void set_size_and_duration(uint32_t new_size, uint32_t new_duration) { history_size = new_size; @@ -83,7 +84,7 @@ public: tracking_enabled = enable; } bool dump_ops_in_flight(Formatter *f, bool print_only_blocked=false); - bool dump_historic_ops(Formatter *f); + bool dump_historic_ops(Formatter *f, bool by_duration = false); bool register_inflight_op(TrackedOp *i); void unregister_inflight_op(TrackedOp *i); diff --git a/src/mds/MDSDaemon.cc b/src/mds/MDSDaemon.cc index ff5890b56b30..a77302a19e11 100644 --- a/src/mds/MDSDaemon.cc +++ b/src/mds/MDSDaemon.cc @@ -223,6 +223,10 @@ void MDSDaemon::set_up_admin_socket() asok_hook, "show slowest recent ops"); assert(r == 0); + r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration", + asok_hook, + "show slowest recent ops, sorted by op duration"); + assert(r == 0); r = admin_socket->register_command("scrub_path", "scrub_path name=path,type=CephString " "name=scrubops,type=CephChoices," @@ -321,6 +325,7 @@ void MDSDaemon::clean_up_admin_socket() admin_socket->unregister_command("ops"); admin_socket->unregister_command("dump_blocked_ops"); admin_socket->unregister_command("dump_historic_ops"); + admin_socket->unregister_command("dump_historic_ops_by_duration"); admin_socket->unregister_command("scrub_path"); admin_socket->unregister_command("tag path"); admin_socket->unregister_command("flush_path"); diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 6ebf5a5988b7..bb73fd0bcfc7 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1720,6 +1720,11 @@ bool MDSRankDispatcher::handle_asok_command( ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; } + } else if (command == "dump_historic_ops_by_duration") { + if (!op_tracker.dump_historic_ops(f, true)) { + ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ + please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; + } } else if (command == "osdmap barrier") { int64_t target_epoch = 0; bool got_val = cmd_getval(g_ceph_context, cmdmap, "target_epoch", target_epoch); diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index a42eae074963..1487d2b12498 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -1885,7 +1885,12 @@ bool OSD::asok_command(string command, cmdmap_t& cmdmap, string format, Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; } } else if (command == "dump_historic_ops") { - if (!op_tracker.dump_historic_ops(f)) { + if (!op_tracker.dump_historic_ops(f, false)) { + ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ + Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; + } + } else if (command == "dump_historic_ops_by_duration") { + if (!op_tracker.dump_historic_ops(f, true)) { ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \ Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards."; } @@ -2435,6 +2440,10 @@ void OSD::final_init() asok_hook, "show slowest recent ops"); assert(r == 0); + r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration", + asok_hook, + "show slowest recent ops, sorted by duration"); + assert(r == 0); r = admin_socket->register_command("dump_op_pq_state", "dump_op_pq_state", asok_hook, "dump op priority queue state"); @@ -2849,6 +2858,7 @@ int OSD::shutdown() cct->get_admin_socket()->unregister_command("ops"); cct->get_admin_socket()->unregister_command("dump_blocked_ops"); cct->get_admin_socket()->unregister_command("dump_historic_ops"); + cct->get_admin_socket()->unregister_command("dump_historic_ops_by_duration"); cct->get_admin_socket()->unregister_command("dump_op_pq_state"); cct->get_admin_socket()->unregister_command("dump_blacklist"); cct->get_admin_socket()->unregister_command("dump_watchers"); -- 2.47.3