From 097c434fedb0b71d79183cdfc10c29e944d46827 Mon Sep 17 00:00:00 2001 From: zhangjianwei2 Date: Thu, 27 Jun 2024 16:20:00 +0800 Subject: [PATCH] osd/scheduler: add mclock queue length perfcounter Easy to visualize the number of ops in each subqueue of mclock. Add request statistics for each op_scheduler_class daemon or tell command: ceph daemon osd.0 perf dump mclock-0 { "mclock-shard-queue-0": { "mclock_immediate_queue_len": 0, "mclock_client_queue_len": 0, "mclock_recovery_queue_len": 0, "mclock_best_effort_queue_len": 0 "mclock_all_type_queue_len": 0 } } Fixes: https://tracker.ceph.com/issues/66714 Signed-off-by: zhangjianwei2 (cherry picked from commit 8036426fe3ffb4f68de190d7d58bc5aa98ccfb41) --- src/osd/scheduler/mClockScheduler.cc | 108 ++++++++++++++++++++++++++- src/osd/scheduler/mClockScheduler.h | 16 +++- src/test/osd/TestMClockScheduler.cc | 11 ++- 3 files changed, 132 insertions(+), 3 deletions(-) diff --git a/src/osd/scheduler/mClockScheduler.cc b/src/osd/scheduler/mClockScheduler.cc index f72683d527923..7efa46dacf226 100644 --- a/src/osd/scheduler/mClockScheduler.cc +++ b/src/osd/scheduler/mClockScheduler.cc @@ -30,13 +30,98 @@ using namespace std::placeholders; namespace ceph::osd::scheduler { +void mClockScheduler::_get_mclock_counter(scheduler_id_t id) +{ + if (!logger) { + return; + } + + /* op enter mclock queue will +1 */ + logger->inc(l_mclock_all_type_queue_len); + + switch (id.class_id) { + case op_scheduler_class::immediate: + logger->inc(l_mclock_immediate_queue_len); + break; + case op_scheduler_class::client: + logger->inc(l_mclock_client_queue_len); + break; + case op_scheduler_class::background_recovery: + logger->inc(l_mclock_recovery_queue_len); + break; + case op_scheduler_class::background_best_effort: + logger->inc(l_mclock_best_effort_queue_len); + break; + default: + derr << __func__ << " unknown class_id=" << id.class_id + << " unknown id=" << id << dendl; + break; + } +} + +void mClockScheduler::_put_mclock_counter(scheduler_id_t id) +{ + if (!logger) { + return; + } + + /* op leave mclock queue will -1 */ + logger->dec(l_mclock_all_type_queue_len); + + switch (id.class_id) { + case op_scheduler_class::immediate: + logger->dec(l_mclock_immediate_queue_len); + break; + case op_scheduler_class::client: + logger->dec(l_mclock_client_queue_len); + break; + case op_scheduler_class::background_recovery: + logger->dec(l_mclock_recovery_queue_len); + break; + case op_scheduler_class::background_best_effort: + logger->dec(l_mclock_best_effort_queue_len); + break; + default: + derr << __func__ << " unknown class_id=" << id.class_id + << " unknown id=" << id << dendl; + break; + } +} + +void mClockScheduler::_init_logger() +{ + PerfCountersBuilder m(cct, "mclock-shard-queue-" + std::to_string(shard_id), + l_mclock_first, l_mclock_last); + + m.add_u64_counter(l_mclock_immediate_queue_len, "mclock_immediate_queue_len", + "high_priority op count in mclock queue"); + m.add_u64_counter(l_mclock_client_queue_len, "mclock_client_queue_len", + "client type op count in mclock queue"); + m.add_u64_counter(l_mclock_recovery_queue_len, "mclock_recovery_queue_len", + "background_recovery type op count in mclock queue"); + m.add_u64_counter(l_mclock_best_effort_queue_len, "mclock_best_effort_queue_len", + "background_best_effort type op count in mclock queue"); + m.add_u64_counter(l_mclock_all_type_queue_len, "mclock_all_type_queue_len", + "all type op count in mclock queue"); + + logger = m.create_perf_counters(); + cct->get_perfcounters_collection()->add(logger); + + logger->set(l_mclock_immediate_queue_len, 0); + logger->set(l_mclock_client_queue_len, 0); + logger->set(l_mclock_recovery_queue_len, 0); + logger->set(l_mclock_best_effort_queue_len, 0); + logger->set(l_mclock_all_type_queue_len, 0); +} + mClockScheduler::mClockScheduler(CephContext *cct, int whoami, uint32_t num_shards, int shard_id, bool is_rotational, unsigned cutoff_priority, - MonClient *monc) + MonClient *monc, + bool init_perfcounter) : cct(cct), whoami(whoami), num_shards(num_shards), @@ -57,6 +142,9 @@ mClockScheduler::mClockScheduler(CephContext *cct, set_config_defaults_from_profile(); client_registry.update_from_config( cct->_conf, osd_bandwidth_capacity_per_shard); + if (init_perfcounter) { + _init_logger(); + } } /* ClientRegistry holds the dmclock::ClientInfo configuration parameters @@ -406,6 +494,7 @@ void mClockScheduler::enqueue(OpSchedulerItem&& item) std::move(item), id, cost); + _get_mclock_counter(id); } dout(20) << __func__ << " client_count: " << scheduler.client_count() @@ -446,6 +535,12 @@ void mClockScheduler::enqueue_high(unsigned priority, } else { high_priority[priority].push_front(std::move(item)); } + + scheduler_id_t id = scheduler_id_t { + op_scheduler_class::immediate, + client_profile_id_t() + }; + _get_mclock_counter(id); } WorkItem mClockScheduler::dequeue() @@ -461,6 +556,12 @@ WorkItem mClockScheduler::dequeue() high_priority.erase(iter); } ceph_assert(std::get_if(&ret)); + + scheduler_id_t id = scheduler_id_t { + op_scheduler_class::immediate, + client_profile_id_t() + }; + _put_mclock_counter(id); return ret; } else { mclock_queue_t::PullReq result = scheduler.pull_request(); @@ -474,6 +575,7 @@ WorkItem mClockScheduler::dequeue() ceph_assert(result.is_retn()); auto &retn = result.get_retn(); + _put_mclock_counter(retn.client); return std::move(*retn.request); } } @@ -594,6 +696,10 @@ void mClockScheduler::handle_conf_change( mClockScheduler::~mClockScheduler() { cct->_conf.remove_observer(this); + if (logger) { + delete logger; + logger = nullptr; + } } } diff --git a/src/osd/scheduler/mClockScheduler.h b/src/osd/scheduler/mClockScheduler.h index 7d3eb64afa40f..839ae35bdfb88 100644 --- a/src/osd/scheduler/mClockScheduler.h +++ b/src/osd/scheduler/mClockScheduler.h @@ -30,6 +30,16 @@ #include "osd/scheduler/OpSchedulerItem.h" +enum { + l_mclock_first = 15000, + l_mclock_immediate_queue_len, + l_mclock_client_queue_len, + l_mclock_recovery_queue_len, + l_mclock_best_effort_queue_len, + l_mclock_all_type_queue_len, + l_mclock_last, +}; + namespace ceph::osd::scheduler { constexpr double default_min = 0.0; @@ -98,6 +108,7 @@ class mClockScheduler : public OpScheduler, md_config_obs_t { const bool is_rotational; const unsigned cutoff_priority; MonClient *monc; + PerfCounters *logger = nullptr; /** * osd_bandwidth_cost_per_io @@ -219,7 +230,7 @@ class mClockScheduler : public OpScheduler, md_config_obs_t { public: mClockScheduler(CephContext *cct, int whoami, uint32_t num_shards, int shard_id, bool is_rotational, unsigned cutoff_priority, - MonClient *monc); + MonClient *monc, bool init_perfcounter=true); ~mClockScheduler() override; /// Calculate scaled cost per item @@ -268,6 +279,9 @@ public: private: // Enqueue the op to the high priority queue void enqueue_high(unsigned prio, OpSchedulerItem &&item, bool front = false); + void _init_logger(); + void _get_mclock_counter(scheduler_id_t id); + void _put_mclock_counter(scheduler_id_t id); }; } diff --git a/src/test/osd/TestMClockScheduler.cc b/src/test/osd/TestMClockScheduler.cc index 325ebe77e802f..1499493159bb1 100644 --- a/src/test/osd/TestMClockScheduler.cc +++ b/src/test/osd/TestMClockScheduler.cc @@ -33,6 +33,14 @@ public: bool is_rotational; unsigned cutoff_priority; MonClient *monc; + /* + * FIXME: + * init_perfcounter = false + * src/common: dead loop in PerfCountersCollectionImpl::add() + * when adding the same logger_ptr to logger_collection. + * see https://tracker.ceph.com/issues/66758. + */ + bool init_perfcounter; mClockScheduler q; uint64_t client1; @@ -46,8 +54,9 @@ public: is_rotational(false), cutoff_priority(12), monc(nullptr), + init_perfcounter(false), q(g_ceph_context, whoami, num_shards, shard_id, is_rotational, - cutoff_priority, monc), + cutoff_priority, monc, init_perfcounter), client1(1001), client2(9999), client3(100000001) -- 2.39.5