osd/OSD: Log aggregated slow ops detail to cluster logs

author Prashant D <pdhange@redhat.com>

Fri, 29 Oct 2021 13:09:24 +0000 (09:09 -0400)

committer Ponnuvel Palaniyappan <pponnuvel@gmail.com>

Fri, 20 May 2022 08:12:04 +0000 (09:12 +0100)
author Prashant D <pdhange@redhat.com>
Fri, 29 Oct 2021 13:09:24 +0000 (09:09 -0400)
committer Ponnuvel Palaniyappan <pponnuvel@gmail.com>
Fri, 20 May 2022 08:12:04 +0000 (09:12 +0100)
diff --git a/src/common/legacy_config_opts.h b/src/common/legacy_config_opts.h

index 832af68a8ef477dbccb6ec4153404faaea43f0e5..b115363860654117952aac2a993bde388a9f05cd 100644 (file)
--- a/src/common/legacy_config_opts.h
+++ b/src/common/legacy_config_opts.h
@@ -536,6 +536,9 @@ OPTION(osd_agent_min_evict_effort, OPT_FLOAT)
  OPTION(osd_agent_quantize_effort, OPT_FLOAT)
  OPTION(osd_agent_delay_time, OPT_FLOAT)
  
+// Allow OSD daemon to send an aggregated slow ops to the cluster log
+OPTION(osd_aggregated_slow_ops_logging, OPT_BOOL)
+
  // osd ignore history.last_epoch_started in find_best_info
  OPTION(osd_find_best_info_ignore_history_les, OPT_BOOL)
  
diff --git a/src/common/options.cc b/src/common/options.cc

index 2397748e791f25f8daa15a031a9c69cf99be5c8c..9467b6fafcba89cddb431f178ca59647015f56e9 100644 (file)
--- a/src/common/options.cc
+++ b/src/common/options.cc
@@ -2527,6 +2527,11 @@ std::vector<Option> get_global_options() {
      .set_default(.02)
      .set_description("slop factor to avoid switching tiering flush and eviction mode"),
  
+    Option("osd_aggregated_slow_ops_logging", Option::TYPE_BOOL, Option::LEVEL_ADVANCED)
+    .set_default(true)
+    .set_description("Allow OSD daemon to send an aggregated slow ops to the cluster log")
+    .set_long_description("If set to ``true``, the OSD daemon will send slow ops information in an aggregated format to the cluster log else sends every slow op to the cluster log."),
+
      Option("osd_uuid", Option::TYPE_UUID, Option::LEVEL_ADVANCED)
      .set_default(uuid_d())
      .set_flag(Option::FLAG_CREATE)
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc

index 4ba6ef84f90850767fca53e2ab15e0764e02b67f..fa94111beed7f325bcb059dff96b02ae92fc1f6b 100644 (file)
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -7601,6 +7601,15 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
      too_old -= cct->_conf.get_val<double>("osd_op_complaint_time");
      int slow = 0;
      TrackedOpRef oldest_op;
+    OSDMapRef osdmap = get_osdmap();
+    // map of slow op counts by slow op event type for an aggregated logging to
+    // the cluster log.
+    map<uint8_t, int> slow_op_types;
+    // map of slow op counts by pool for reporting a pool name with highest
+    // slow ops.
+    map<uint64_t, int> slow_op_pools;
+    bool log_aggregated_slow_op =
+           cct->_conf.get_val<bool>("osd_aggregated_slow_ops_logging");
      auto count_slow_ops = [&](TrackedOp& op) {
        if (op.get_initiated() < too_old) {
          stringstream ss;
@@ -7610,7 +7619,19 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
             << " currently "
             << op.state_string();
          lgeneric_subdout(cct,osd,20) << ss.str() << dendl;
-        clog->warn() << ss.str();
+        if (log_aggregated_slow_op) {
+          if (const OpRequest *req = dynamic_cast<const OpRequest *>(&op)) {
+            uint8_t op_type = req->state_flag();
+            auto m = req->get_req<MOSDFastDispatchOp>();
+            uint64_t poolid = m->get_spg().pgid.m_pool;
+            slow_op_types[op_type]++;
+            if (poolid > 0 && poolid <= (uint64_t) osdmap->get_pool_max()) {
+              slow_op_pools[poolid]++;
+            }
+          }
+        } else {
+          clog->warn() << ss.str();
+        }
         slow++;
         if (!oldest_op || op.get_initiated() < oldest_op->get_initiated()) {
           oldest_op = &op;
@@ -7624,6 +7645,32 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
        if (slow) {
         derr << __func__ << " reporting " << slow << " slow ops, oldest is "
              << oldest_op->get_desc() << dendl;
+        if (log_aggregated_slow_op &&
+             slow_op_types.size() > 0) {
+          stringstream ss;
+          ss << slow << " slow requests (by type [ ";
+          for (const auto& [op_type, count] : slow_op_types) {
+            ss << "'" << OpRequest::get_state_string(op_type)
+               << "' : " << count
+               << " ";
+          }
+          auto slow_pool_it = std::max_element(slow_op_pools.begin(), slow_op_pools.end(),
+                                 [](std::pair<uint64_t, int> p1, std::pair<uint64_t, int> p2) {
+                                   return p1.second < p2.second;
+                                 });
+          if (osdmap->get_pools().find(slow_pool_it->first) != osdmap->get_pools().end()) {
+            string pool_name = osdmap->get_pool_name(slow_pool_it->first);
+            ss << "] most affected pool [ '"
+               << pool_name
+               << "' : "
+               << slow_pool_it->second
+               << " ])";
+          } else {
+            ss << "])";
+          }
+          lgeneric_subdout(cct,osd,20) << ss.str() << dendl;
+          clog->warn() << ss.str();
+        }
        }
        metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs);
      } else {
diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h

index dc5b5e34b1f2d28cac3f653736885c4baddec828..443dbf292e2bf027336038a2ac27cad48c95f363 100644 (file)
--- a/src/osd/OpRequest.h
+++ b/src/osd/OpRequest.h
@@ -102,6 +102,9 @@ public:
        return entity_name_t();
      }
    }
+  uint8_t state_flag() const {
+    return latest_flag_point;
+  }
  
    std::string_view state_string() const override {
      switch(latest_flag_point) {
@@ -116,6 +119,32 @@ public:
      return "no flag points reached";
    }
  
+  static std::string get_state_string(uint8_t flag) {
+    std::string flag_point;
+
+    switch(flag) {
+      case flag_queued_for_pg:
+        flag_point = "queued for pg";
+        break;
+      case flag_reached_pg:
+        flag_point = "reached pg";
+        break;
+      case flag_delayed:
+        flag_point = "delayed";
+        break;
+      case flag_started:
+        flag_point = "started";
+        break;
+      case flag_sub_op_sent:
+        flag_point = "waiting for sub ops";
+        break;
+      case flag_commit_sent:
+        flag_point = "commit sent; apply or cleanup";
+        break;
+    }
+    return flag_point;
+  }
+
    void mark_queued_for_pg() {
      mark_flag_point(flag_queued_for_pg, "queued_for_pg");
    }
author	Prashant D <pdhange@redhat.com>
	Fri, 29 Oct 2021 13:09:24 +0000 (09:09 -0400)
committer	Ponnuvel Palaniyappan <pponnuvel@gmail.com>
	Fri, 20 May 2022 08:12:04 +0000 (09:12 +0100)
src/common/legacy_config_opts.h		patch \| blob \| history
src/common/options.cc		patch \| blob \| history
src/osd/OSD.cc		patch \| blob \| history
src/osd/OpRequest.h		patch \| blob \| history