]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: add a perf counter to record slow replies 45631/head
authorhaoyixing <haoyixing@kuaishou.com>
Fri, 25 Mar 2022 03:02:13 +0000 (03:02 +0000)
committerhaoyixing <haoyixing@kuaishou.com>
Thu, 7 Apr 2022 08:11:26 +0000 (08:11 +0000)
Though we have MDS_HEALTH_SLOW_METADATA_IO and MDS_HEALTH_SLOW_REQUEST health alert, but those are not
precise nor accumulated. With slow reply counter compared to reply counter, we can find out the ratio
of slow requests through perf dump.

Fixes: https://tracker.ceph.com/issues/55126
Signed-off-by: haoyixing <haoyixing@kuaishou.com>
src/mds/MDSRank.cc
src/mds/MDSRank.h
src/mds/Server.cc

index 053799c0df077b24eb983f9b82445804c40e29a4..5eccd0acd75c578f29f9d04a4b9c5a89cab2bc94 100644 (file)
@@ -3350,6 +3350,8 @@ void MDSRank::create_logger()
                             "exi", PerfCountersBuilder::PRIO_INTERESTING);
     mds_plb.add_u64_counter(l_mds_imported_inodes, "imported_inodes", "Imported inodes",
                             "imi", PerfCountersBuilder::PRIO_INTERESTING);
+    mds_plb.add_u64_counter(l_mds_slow_reply, "slow_reply", "Slow replies", "slr",
+                              PerfCountersBuilder::PRIO_INTERESTING);
 
     // caps msg stats
     mds_plb.add_u64_counter(l_mdss_handle_client_caps, "handle_client_caps",
index 3df1abb5d13a78d81ffebc96d6697f65e3267047..f6f9a97bc6b8df69f0bacae9eb812a1025db8364 100644 (file)
@@ -53,6 +53,7 @@ enum {
   l_mds_request,
   l_mds_reply,
   l_mds_reply_latency,
+  l_mds_slow_reply,
   l_mds_forward,
   l_mds_dir_fetch_complete,
   l_mds_dir_fetch_keys,
@@ -705,4 +706,3 @@ public:
 };
 
 #endif // MDS_RANK_H_
-
index 5b54e8112a41acb08456602a72c77990743d0912..8a53beac9543783e4be5eb5fca797cf46347c2c0 100644 (file)
@@ -2141,6 +2141,9 @@ void Server::early_reply(MDRequestRef& mdr, CInode *tracei, CDentry *tracedn)
   mds->logger->inc(l_mds_reply);
   utime_t lat = ceph_clock_now() - req->get_recv_stamp();
   mds->logger->tinc(l_mds_reply_latency, lat);
+  if (lat >= g_conf()->mds_op_complaint_time) {
+    mds->logger->inc(l_mds_slow_reply);
+  }
   if (client_inst.name.is_client()) {
     mds->sessionmap.hit_session(mdr->session);
   }
@@ -2199,6 +2202,9 @@ void Server::reply_client_request(MDRequestRef& mdr, const ref_t<MClientReply> &
     mds->logger->inc(l_mds_reply);
     utime_t lat = ceph_clock_now() - mdr->client_request->get_recv_stamp();
     mds->logger->tinc(l_mds_reply_latency, lat);
+    if (lat >= g_conf()->mds_op_complaint_time) {
+      mds->logger->inc(l_mds_slow_reply);
+    }
     if (session && client_inst.name.is_client()) {
       mds->sessionmap.hit_session(session);
     }