]> git-server-git.apps.pok.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: adhere to mds_health_summarize_threshold config when reporting laggy clients 53578/head
authorVenky Shankar <vshankar@redhat.com>
Thu, 21 Sep 2023 06:14:13 +0000 (02:14 -0400)
committerVenky Shankar <vshankar@redhat.com>
Thu, 18 Jan 2024 08:37:03 +0000 (14:07 +0530)
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/mds/Beacon.cc
src/messages/MMDSBeacon.h

index 4990f0b249f94856aeb0106fbcb6d1f250760842..c71030d7d2115eab3f9430c228760957d86231ea 100644 (file)
@@ -506,17 +506,26 @@ void Beacon::notify_health(MDSRank const *mds)
       return map.any_osd_laggy(); });
     auto&& laggy_clients = mds->server->get_laggy_clients();
     if (defer_client_eviction && !laggy_clients.empty()) {
-      std::vector<MDSHealthMetric> laggy_clients_metrics;
-      for (const auto& laggy_client: laggy_clients) {
-        CachedStackStringStream css;
-        *css << "Client " << laggy_client << " is laggy; not evicted"
-             << " because some OSD(s) is/are laggy";
-        MDSHealthMetric m(MDS_HEALTH_CLIENTS_LAGGY, HEALTH_WARN, css->strv());
-        laggy_clients_metrics.emplace_back(std::move(m));
+      if (laggy_clients.size() <= (size_t)g_conf()->mds_health_summarize_threshold) {
+       std::vector<MDSHealthMetric> laggy_clients_metrics;
+       for (const auto& laggy_client: laggy_clients) {
+         CachedStackStringStream css;
+         *css << "Client " << laggy_client << " is laggy; not evicted"
+              << " because some OSD(s) is/are laggy";
+         MDSHealthMetric m(MDS_HEALTH_CLIENTS_LAGGY, HEALTH_WARN, css->strv());
+         laggy_clients_metrics.emplace_back(std::move(m));
+       }
+       auto&& m = laggy_clients_metrics;
+       health.metrics.insert(std::end(health.metrics), std::cbegin(m),
+                             std::cend(m));
+      } else {
+       CachedStackStringStream css;
+       *css << "Many client (" << laggy_clients.size()
+            << ") are laggy; not evicting since some OSD(s) are laggy";
+       MDSHealthMetric m(MDS_HEALTH_CLIENTS_LAGGY_MANY, HEALTH_WARN, css->strv());
+       m.metadata["client_count"] = stringify(laggy_clients.size());
+       health.metrics.push_back(std::move(m));
       }
-      auto&& m = laggy_clients_metrics;
-      health.metrics.insert(std::end(health.metrics), std::cbegin(m),
-                            std::cend(m));
     }
   }
 }
index d843b0e766107dce9448944b3b6a308b8c01664d..ba2d52d11ee3f98a5ff259c649c4ee4ce0b51daa 100644 (file)
@@ -46,6 +46,7 @@ enum mds_metric_t {
   MDS_HEALTH_CACHE_OVERSIZED,
   MDS_HEALTH_SLOW_METADATA_IO,
   MDS_HEALTH_CLIENTS_LAGGY,
+  MDS_HEALTH_CLIENTS_LAGGY_MANY,
   MDS_HEALTH_DUMMY, // not a real health warning, for testing
 };
 
@@ -65,6 +66,7 @@ inline const char *mds_metric_name(mds_metric_t m)
   case MDS_HEALTH_CACHE_OVERSIZED: return "MDS_CACHE_OVERSIZED";
   case MDS_HEALTH_SLOW_METADATA_IO: return "MDS_SLOW_METADATA_IO";
   case MDS_HEALTH_CLIENTS_LAGGY: return "MDS_CLIENTS_LAGGY";
+  case MDS_HEALTH_CLIENTS_LAGGY_MANY: return "MDS_CLIENTS_LAGGY_MANY";
   case MDS_HEALTH_DUMMY: return "MDS_DUMMY";
   default:
     return "???";