]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd: add slow ops count into perf dump 47596/head
authoryite.gu <ess_gyt@qq.com>
Mon, 15 Aug 2022 11:15:48 +0000 (19:15 +0800)
committeryite.gu <ess_gyt@qq.com>
Wed, 17 Aug 2022 12:50:01 +0000 (20:50 +0800)
We can know that how many times dose the
osd slow op occur by slow ops count. On the other
hand, slow op randomly appear in osds, we can find
osd what have most times slow op. Usually, these osd
also have most pg number. slow ops count can help find
target osd.

Signed-off-by: Yite Gu <ess_gyt@qq.com>
src/common/TrackedOp.cc
src/common/TrackedOp.h

index 217d571d5cfc72b96c6cf413656f76bb20165732..d63bdb8f9a5744a2d91237f3eefafeab587f6cd4 100644 (file)
@@ -88,8 +88,10 @@ void OpHistory::_insert_delayed(const utime_t& now, TrackedOpRef op)
   double opduration = op->get_duration();
   duration.insert(make_pair(opduration, op));
   arrived.insert(make_pair(op->get_initiated(), op));
-  if (opduration >= history_slow_op_threshold.load())
+  if (opduration >= history_slow_op_threshold.load()) {
     slow_op.insert(make_pair(op->get_initiated(), op));
+    logger->inc(l_osd_slow_op_count);
+  }
   cleanup(now);
 }
 
@@ -156,6 +158,7 @@ struct ShardedTrackingData {
 
 OpTracker::OpTracker(CephContext *cct_, bool tracking, uint32_t num_shards):
   seq(0),
+  history(cct_),
   num_optracker_shards(num_shards),
   complaint_time(0), log_threshold(0),
   tracking_enabled(tracking),
index 03493a22e24df97e9f107c49f70f974d59423fbf..0ff7430b372b5b279919f45e106498ffe0bbf032 100644 (file)
@@ -51,8 +51,14 @@ public:
   void *entry() override;
 };
 
+enum {
+  l_osd_slow_op_first = 1000,
+  l_osd_slow_op_count,
+  l_osd_slow_op_last,
+};
 
 class OpHistory {
+  CephContext* cct = nullptr;
   std::set<std::pair<utime_t, TrackedOpRef> > arrived;
   std::set<std::pair<double, TrackedOpRef> > duration;
   std::set<std::pair<utime_t, TrackedOpRef> > slow_op;
@@ -65,15 +71,28 @@ class OpHistory {
   std::atomic_bool shutdown{false};
   OpHistoryServiceThread opsvc;
   friend class OpHistoryServiceThread;
+  std::unique_ptr<PerfCounters> logger;
 
 public:
-  OpHistory() : opsvc(this) {
+  OpHistory(CephContext *c) : cct(c), opsvc(this) {
+    PerfCountersBuilder b(cct, "osd-slow-ops",
+                         l_osd_slow_op_first, l_osd_slow_op_last);
+    b.add_u64_counter(l_osd_slow_op_count, "slow_ops_count",
+                      "Number of operations taking over ten second");
+
+    logger.reset(b.create_perf_counters());
+    cct->get_perfcounters_collection()->add(logger.get());
+
     opsvc.create("OpHistorySvc");
   }
   ~OpHistory() {
     ceph_assert(arrived.empty());
     ceph_assert(duration.empty());
     ceph_assert(slow_op.empty());
+    if(logger) {
+      cct->get_perfcounters_collection()->remove(logger.get());
+      logger.reset();
+    }
   }
   void insert(const utime_t& now, TrackedOpRef op)
   {