]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
TrackedOp: allow dumping historic ops sorted by duration 14050/head
authorPiotr Dałek <piotr.dalek@corp.ovh.com>
Mon, 20 Mar 2017 12:51:25 +0000 (13:51 +0100)
committerPiotr Dałek <piotr.dalek@corp.ovh.com>
Wed, 22 Mar 2017 12:32:04 +0000 (13:32 +0100)
Currently dump_historic_ops dumps ops sorted by their initiation time,
which may not have any relation to how long it took, and sorting output
of that command by op duration is neither fast nor convenient.
New asok command ("dump_historic_ops_by_duration") outputs the same
op list, but ordered by their duration time (longest first).

Signed-off-by: Piotr Dałek <piotr.dalek@corp.ovh.com>
src/common/TrackedOp.cc
src/common/TrackedOp.h
src/mds/MDSDaemon.cc
src/mds/MDSRank.cc
src/osd/OSD.cc

index b1ce4b43a9e4399c068ad4fa4dd30a20ba161ee4..1b088279308cb7ae04863704a91ee5d218d4b3b1 100644 (file)
@@ -88,6 +88,39 @@ void OpHistory::dump_ops(utime_t now, Formatter *f)
   f->close_section();
 }
 
+void OpHistory::dump_ops_by_duration(utime_t now, Formatter *f)
+{
+  Mutex::Locker history_lock(ops_history_lock);
+  cleanup(now);
+  f->open_object_section("op_history");
+  f->dump_int("size", history_size);
+  f->dump_int("duration", history_duration);
+  {
+    f->open_array_section("ops");
+    if (arrived.size()) {
+      vector<pair<double, TrackedOpRef> > durationvec;
+      durationvec.reserve(arrived.size());
+
+      for (set<pair<utime_t, TrackedOpRef> >::const_iterator i =
+            arrived.begin();
+          i != arrived.end();
+          ++i) {
+       durationvec.push_back(pair<double, TrackedOpRef>(i->second->get_duration(), i->second));
+      }
+
+      sort(durationvec.begin(), durationvec.end());
+
+      for (auto i = durationvec.rbegin(); i != durationvec.rend(); ++i) {
+       f->open_object_section("op");
+       i->second->dump(now, f);
+       f->close_section();
+      }
+    }
+    f->close_section();
+  }
+  f->close_section();
+}
+
 struct ShardedTrackingData {
   Mutex ops_in_flight_lock_sharded;
   TrackedOp::tracked_op_list_t ops_in_flight_sharded;
@@ -117,14 +150,18 @@ OpTracker::~OpTracker() {
   }
 }
 
-bool OpTracker::dump_historic_ops(Formatter *f)
+bool OpTracker::dump_historic_ops(Formatter *f, bool by_duration)
 {
   RWLock::RLocker l(lock);
   if (!tracking_enabled)
     return false;
 
   utime_t now = ceph_clock_now();
-  history.dump_ops(now, f);
+  if (by_duration) {
+    history.dump_ops_by_duration(now, f);
+  } else {
+    history.dump_ops(now, f);
+  }
   return true;
 }
 
index 31439dc23ff0a4dcb48d2f3cf5bb9ee8847c6739..412f1d324ef1ac3a7b9740c0389d7af459f1b955 100644 (file)
@@ -48,6 +48,7 @@ public:
   }
   void insert(utime_t now, TrackedOpRef op);
   void dump_ops(utime_t now, Formatter *f);
+  void dump_ops_by_duration(utime_t now, Formatter *f);
   void on_shutdown();
   void set_size_and_duration(uint32_t new_size, uint32_t new_duration) {
     history_size = new_size;
@@ -83,7 +84,7 @@ public:
     tracking_enabled = enable;
   }
   bool dump_ops_in_flight(Formatter *f, bool print_only_blocked=false);
-  bool dump_historic_ops(Formatter *f);
+  bool dump_historic_ops(Formatter *f, bool by_duration = false);
   bool register_inflight_op(TrackedOp *i);
   void unregister_inflight_op(TrackedOp *i);
 
index ff5890b56b308c2bb6f66ec95cb190881a36515c..a77302a19e11513064b3f32979901e06256bf0d5 100644 (file)
@@ -223,6 +223,10 @@ void MDSDaemon::set_up_admin_socket()
                                     asok_hook,
                                     "show slowest recent ops");
   assert(r == 0);
+  r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration",
+                                    asok_hook,
+                                    "show slowest recent ops, sorted by op duration");
+  assert(r == 0);
   r = admin_socket->register_command("scrub_path",
                                     "scrub_path name=path,type=CephString "
                                     "name=scrubops,type=CephChoices,"
@@ -321,6 +325,7 @@ void MDSDaemon::clean_up_admin_socket()
   admin_socket->unregister_command("ops");
   admin_socket->unregister_command("dump_blocked_ops");
   admin_socket->unregister_command("dump_historic_ops");
+  admin_socket->unregister_command("dump_historic_ops_by_duration");
   admin_socket->unregister_command("scrub_path");
   admin_socket->unregister_command("tag path");
   admin_socket->unregister_command("flush_path");
index 6ebf5a5988b7adf17ddd85b6a5ad61286796371f..bb73fd0bcfc73d25a19da43784e224baa9f4793a 100644 (file)
@@ -1720,6 +1720,11 @@ bool MDSRankDispatcher::handle_asok_command(
       ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
          please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     }
+  } else if (command == "dump_historic_ops_by_duration") {
+    if (!op_tracker.dump_historic_ops(f, true)) {
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+         please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+    }
   } else if (command == "osdmap barrier") {
     int64_t target_epoch = 0;
     bool got_val = cmd_getval(g_ceph_context, cmdmap, "target_epoch", target_epoch);
index a42eae074963f4991a64e25fa1c5ef08f7e4a2e8..1487d2b124989c377f766ae856758edb6f6c12ee 100644 (file)
@@ -1885,7 +1885,12 @@ bool OSD::asok_command(string command, cmdmap_t& cmdmap, string format,
        Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     }
   } else if (command == "dump_historic_ops") {
-    if (!op_tracker.dump_historic_ops(f)) {
+    if (!op_tracker.dump_historic_ops(f, false)) {
+      ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
+       Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
+    }
+  } else if (command == "dump_historic_ops_by_duration") {
+    if (!op_tracker.dump_historic_ops(f, true)) {
       ss << "op_tracker tracking is not enabled now, so no ops are tracked currently, even those get stuck. \
        Please enable \"osd_enable_op_tracker\", and the tracker will start to track new ops received afterwards.";
     }
@@ -2435,6 +2440,10 @@ void OSD::final_init()
                                     asok_hook,
                                     "show slowest recent ops");
   assert(r == 0);
+  r = admin_socket->register_command("dump_historic_ops_by_duration", "dump_historic_ops_by_duration",
+                                    asok_hook,
+                                    "show slowest recent ops, sorted by duration");
+  assert(r == 0);
   r = admin_socket->register_command("dump_op_pq_state", "dump_op_pq_state",
                                     asok_hook,
                                     "dump op priority queue state");
@@ -2849,6 +2858,7 @@ int OSD::shutdown()
   cct->get_admin_socket()->unregister_command("ops");
   cct->get_admin_socket()->unregister_command("dump_blocked_ops");
   cct->get_admin_socket()->unregister_command("dump_historic_ops");
+  cct->get_admin_socket()->unregister_command("dump_historic_ops_by_duration");
   cct->get_admin_socket()->unregister_command("dump_op_pq_state");
   cct->get_admin_socket()->unregister_command("dump_blacklist");
   cct->get_admin_socket()->unregister_command("dump_watchers");