From: yite.gu Date: Mon, 15 Aug 2022 11:15:48 +0000 (+0800) Subject: osd: add slow ops count into perf dump X-Git-Tag: v18.1.0~670^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=e1bd216ed971f7b970c33a225ad099f96a63440d;p=ceph.git osd: add slow ops count into perf dump We can know that how many times dose the osd slow op occur by slow ops count. On the other hand, slow op randomly appear in osds, we can find osd what have most times slow op. Usually, these osd also have most pg number. slow ops count can help find target osd. Signed-off-by: Yite Gu --- diff --git a/src/common/TrackedOp.cc b/src/common/TrackedOp.cc index 217d571d5cfc..d63bdb8f9a57 100644 --- a/src/common/TrackedOp.cc +++ b/src/common/TrackedOp.cc @@ -88,8 +88,10 @@ void OpHistory::_insert_delayed(const utime_t& now, TrackedOpRef op) double opduration = op->get_duration(); duration.insert(make_pair(opduration, op)); arrived.insert(make_pair(op->get_initiated(), op)); - if (opduration >= history_slow_op_threshold.load()) + if (opduration >= history_slow_op_threshold.load()) { slow_op.insert(make_pair(op->get_initiated(), op)); + logger->inc(l_osd_slow_op_count); + } cleanup(now); } @@ -156,6 +158,7 @@ struct ShardedTrackingData { OpTracker::OpTracker(CephContext *cct_, bool tracking, uint32_t num_shards): seq(0), + history(cct_), num_optracker_shards(num_shards), complaint_time(0), log_threshold(0), tracking_enabled(tracking), diff --git a/src/common/TrackedOp.h b/src/common/TrackedOp.h index 03493a22e24d..0ff7430b372b 100644 --- a/src/common/TrackedOp.h +++ b/src/common/TrackedOp.h @@ -51,8 +51,14 @@ public: void *entry() override; }; +enum { + l_osd_slow_op_first = 1000, + l_osd_slow_op_count, + l_osd_slow_op_last, +}; class OpHistory { + CephContext* cct = nullptr; std::set > arrived; std::set > duration; std::set > slow_op; @@ -65,15 +71,28 @@ class OpHistory { std::atomic_bool shutdown{false}; OpHistoryServiceThread opsvc; friend class OpHistoryServiceThread; + std::unique_ptr logger; public: - OpHistory() : opsvc(this) { + OpHistory(CephContext *c) : cct(c), opsvc(this) { + PerfCountersBuilder b(cct, "osd-slow-ops", + l_osd_slow_op_first, l_osd_slow_op_last); + b.add_u64_counter(l_osd_slow_op_count, "slow_ops_count", + "Number of operations taking over ten second"); + + logger.reset(b.create_perf_counters()); + cct->get_perfcounters_collection()->add(logger.get()); + opsvc.create("OpHistorySvc"); } ~OpHistory() { ceph_assert(arrived.empty()); ceph_assert(duration.empty()); ceph_assert(slow_op.empty()); + if(logger) { + cct->get_perfcounters_collection()->remove(logger.get()); + logger.reset(); + } } void insert(const utime_t& now, TrackedOpRef op) {