From 5ebc7d319bd2e48e26e29e59537cc5cf6991602f Mon Sep 17 00:00:00 2001 From: Samuel Just Date: Thu, 6 Apr 2023 20:50:48 +0000 Subject: [PATCH] osd/: add per-op latency averages for each recovery related message Signed-off-by: Samuel Just --- src/osd/osd_perf_counters.cc | 25 +++++++++++++++++++++++++ src/osd/osd_perf_counters.h | 7 +++++++ src/osd/scheduler/OpSchedulerItem.cc | 15 +++++++++++++++ src/osd/scheduler/OpSchedulerItem.h | 4 +++- 4 files changed, 50 insertions(+), 1 deletion(-) diff --git a/src/osd/osd_perf_counters.cc b/src/osd/osd_perf_counters.cc index ed63b4d3f6784..b1c1413bf61d9 100644 --- a/src/osd/osd_perf_counters.cc +++ b/src/osd/osd_perf_counters.cc @@ -162,6 +162,31 @@ PerfCounters *build_osd_logger(CephContext *cct) { "recovery bytes", "rbt", PerfCountersBuilder::PRIO_INTERESTING); + osd_plb.add_time_avg( + l_osd_recovery_push_queue_lat, + "l_osd_recovery_push_queue_latency", + "MOSDPGPush queue latency"); + osd_plb.add_time_avg( + l_osd_recovery_push_reply_queue_lat, + "l_osd_recovery_push_reply_queue_latency", + "MOSDPGPushReply queue latency"); + osd_plb.add_time_avg( + l_osd_recovery_pull_queue_lat, + "l_osd_recovery_pull_queue_latency", + "MOSDPGPull queue latency"); + osd_plb.add_time_avg( + l_osd_recovery_backfill_queue_lat, + "l_osd_recovery_backfill_queue_latency", + "MOSDPGBackfill queue latency"); + osd_plb.add_time_avg( + l_osd_recovery_backfill_remove_queue_lat, + "l_osd_recovery_backfill_remove_queue_latency", + "MOSDPGBackfillDelete queue latency"); + osd_plb.add_time_avg( + l_osd_recovery_scan_queue_lat, + "l_osd_recovery_scan_queue_latency", + "MOSDPGScan queue latency"); + osd_plb.add_u64(l_osd_loadavg, "loadavg", "CPU load"); osd_plb.add_u64( l_osd_cached_crc, "cached_crc", "Total number getting crc from crc_cache"); diff --git a/src/osd/osd_perf_counters.h b/src/osd/osd_perf_counters.h index 9966a7f7df3c2..f8af01e0e4f56 100644 --- a/src/osd/osd_perf_counters.h +++ b/src/osd/osd_perf_counters.h @@ -58,6 +58,13 @@ enum { l_osd_rop, l_osd_rbytes, + l_osd_recovery_push_queue_lat, + l_osd_recovery_push_reply_queue_lat, + l_osd_recovery_pull_queue_lat, + l_osd_recovery_backfill_queue_lat, + l_osd_recovery_backfill_remove_queue_lat, + l_osd_recovery_scan_queue_lat, + l_osd_loadavg, l_osd_cached_crc, l_osd_cached_crc_adjusted, diff --git a/src/osd/scheduler/OpSchedulerItem.cc b/src/osd/scheduler/OpSchedulerItem.cc index 4a08bca4e4f14..d1df432005735 100644 --- a/src/osd/scheduler/OpSchedulerItem.cc +++ b/src/osd/scheduler/OpSchedulerItem.cc @@ -253,6 +253,21 @@ void PGRecoveryMsg::run( PGRef& pg, ThreadPool::TPHandle &handle) { + auto latency = time_queued - ceph_clock_now(); + switch (op->get_req()->get_type()) { + case MSG_OSD_PG_PUSH: + osd->logger->tinc(l_osd_recovery_push_queue_lat, latency); + case MSG_OSD_PG_PUSH_REPLY: + osd->logger->tinc(l_osd_recovery_push_reply_queue_lat, latency); + case MSG_OSD_PG_PULL: + osd->logger->tinc(l_osd_recovery_pull_queue_lat, latency); + case MSG_OSD_PG_BACKFILL: + osd->logger->tinc(l_osd_recovery_backfill_queue_lat, latency); + case MSG_OSD_PG_BACKFILL_REMOVE: + osd->logger->tinc(l_osd_recovery_backfill_remove_queue_lat, latency); + case MSG_OSD_PG_SCAN: + osd->logger->tinc(l_osd_recovery_scan_queue_lat, latency); + } osd->dequeue_op(pg, op, handle); pg->unlock(); } diff --git a/src/osd/scheduler/OpSchedulerItem.h b/src/osd/scheduler/OpSchedulerItem.h index f41e66880e299..df0c2e1320cc0 100644 --- a/src/osd/scheduler/OpSchedulerItem.h +++ b/src/osd/scheduler/OpSchedulerItem.h @@ -572,10 +572,12 @@ public: }; class PGRecoveryMsg : public PGOpQueueable { + utime_t time_queued; OpRequestRef op; public: - PGRecoveryMsg(spg_t pg, OpRequestRef op) : PGOpQueueable(pg), op(std::move(op)) {} + PGRecoveryMsg(spg_t pg, OpRequestRef op) + : PGOpQueueable(pg), time_queued(ceph_clock_now()), op(std::move(op)) {} static bool is_recovery_msg(OpRequestRef &op) { switch (op->get_req()->get_type()) { -- 2.39.5