]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
common: support tracking slow op
authorZhiqiang Wang <zhiqiang@xsky.com>
Fri, 20 Jan 2017 08:26:12 +0000 (16:26 +0800)
committerZhiqiang Wang <zhiqiang@xsky.com>
Thu, 20 Apr 2017 02:22:42 +0000 (10:22 +0800)
Add slow op in op tracker history.

Signed-off-by: Zhiqiang Wang <zhiqiang@xsky.com>
src/common/TrackedOp.cc
src/common/TrackedOp.h
src/common/config_opts.h
src/osd/OSD.cc

index 1b088279308cb7ae04863704a91ee5d218d4b3b1..ee1f4391568c5b7dcbe6150f39a840f8db9a79eb 100644 (file)
@@ -34,6 +34,7 @@ void OpHistory::on_shutdown()
   Mutex::Locker history_lock(ops_history_lock);
   arrived.clear();
   duration.clear();
+  slow_op.clear();
   shutdown = true;
 }
 
@@ -44,6 +45,8 @@ void OpHistory::insert(utime_t now, TrackedOpRef op)
     return;
   duration.insert(make_pair(op->get_duration(), op));
   arrived.insert(make_pair(op->get_initiated(), op));
+  if (op->get_duration() >= history_slow_op_threshold)
+    slow_op.insert(make_pair(op->get_initiated(), op));
   cleanup(now);
 }
 
@@ -64,6 +67,12 @@ void OpHistory::cleanup(utime_t now)
        duration.begin()->second));
     duration.erase(duration.begin());
   }
+
+  while (slow_op.size() > history_slow_op_size) {
+    slow_op.erase(make_pair(
+       slow_op.begin()->second->get_initiated(),
+       slow_op.begin()->second));
+  }
 }
 
 void OpHistory::dump_ops(utime_t now, Formatter *f)
index 412f1d324ef1ac3a7b9740c0389d7af459f1b955..5342e82738168bc0b1e051bc2dd302537d223a3c 100644 (file)
@@ -33,18 +33,23 @@ typedef boost::intrusive_ptr<TrackedOp> TrackedOpRef;
 class OpHistory {
   set<pair<utime_t, TrackedOpRef> > arrived;
   set<pair<double, TrackedOpRef> > duration;
+  set<pair<utime_t, TrackedOpRef> > slow_op;
   Mutex ops_history_lock;
   void cleanup(utime_t now);
   bool shutdown;
   uint32_t history_size;
   uint32_t history_duration;
+  uint32_t history_slow_op_size;
+  uint32_t history_slow_op_threshold;
 
 public:
   OpHistory() : ops_history_lock("OpHistory::Lock"), shutdown(false),
-  history_size(0), history_duration(0) {}
+    history_size(0), history_duration(0),
+    history_slow_op_size(0), history_slow_op_threshold(0) {}
   ~OpHistory() {
     assert(arrived.empty());
     assert(duration.empty());
+    assert(slow_op.empty());
   }
   void insert(utime_t now, TrackedOpRef op);
   void dump_ops(utime_t now, Formatter *f);
@@ -54,6 +59,10 @@ public:
     history_size = new_size;
     history_duration = new_duration;
   }
+  void set_slow_op_size_and_threshold(uint32_t new_size, uint32_t new_threshold) {
+    history_slow_op_size = new_size;
+    history_slow_op_threshold = new_threshold;
+  }
 };
 
 struct ShardedTrackingData;
@@ -79,6 +88,9 @@ public:
   void set_history_size_and_duration(uint32_t new_size, uint32_t new_duration) {
     history.set_size_and_duration(new_size, new_duration);
   }
+  void set_history_slow_op_size_and_threshold(uint32_t new_size, uint32_t new_threshold) {
+    history.set_slow_op_size_and_threshold(new_size, new_threshold);
+  }
   void set_tracking(bool enable) {
     RWLock::WLocker l(lock);
     tracking_enabled = enable;
index 836af93ebadcfe3b9353066d091f9abd3b1889d8..90ac9b8989508748c808e8227e5ae1a9c4552e58 100644 (file)
@@ -880,6 +880,8 @@ OPTION(osd_enable_op_tracker, OPT_BOOL, true) // enable/disable OSD op tracking
 OPTION(osd_num_op_tracker_shard, OPT_U32, 32) // The number of shards for holding the ops
 OPTION(osd_op_history_size, OPT_U32, 20)    // Max number of completed ops to track
 OPTION(osd_op_history_duration, OPT_U32, 600) // Oldest completed op to track
+OPTION(osd_op_history_slow_op_size, OPT_U32, 20)           // Max number of slow ops to track
+OPTION(osd_op_history_slow_op_threshold, OPT_DOUBLE, 10.0) // track the op if over this threshold
 OPTION(osd_target_transaction_size, OPT_INT, 30)     // to adjust various transactions that batch smaller items
 OPTION(osd_failsafe_full_ratio, OPT_FLOAT, .97) // what % full makes an OSD "full" (failsafe)
 OPTION(osd_fast_fail_on_connection_refused, OPT_BOOL, true) // immediately mark OSDs as down once they refuse to accept connections
index 809e8f21e5d4c17e1aff93fde689503094136f22..2eb9e4078d548faa6e57c6b3ea24e58fa9aa463f 100644 (file)
@@ -1883,6 +1883,8 @@ OSD::OSD(CephContext *cct_, ObjectStore *store_,
                                          cct->_conf->osd_op_log_threshold);
   op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size,
                                            cct->_conf->osd_op_history_duration);
+  op_tracker.set_history_slow_op_size_and_threshold(cct->_conf->osd_op_history_slow_op_size,
+                                                    cct->_conf->osd_op_history_slow_op_threshold);
 }
 
 OSD::~OSD()
@@ -8987,6 +8989,11 @@ void OSD::handle_conf_change(const struct md_config_t *conf,
     op_tracker.set_history_size_and_duration(cct->_conf->osd_op_history_size,
                                              cct->_conf->osd_op_history_duration);
   }
+  if (changed.count("osd_op_history_slow_op_size") ||
+      changed.count("osd_op_history_slow_op_threshold")) {
+    op_tracker.set_history_slow_op_size_and_threshold(cct->_conf->osd_op_history_slow_op_size,
+                                                      cct->_conf->osd_op_history_slow_op_threshold);
+  }
   if (changed.count("osd_enable_op_tracker")) {
       op_tracker.set_tracking(cct->_conf->osd_enable_op_tracker);
   }