]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
osd/OSD: Log aggregated slow ops detail to cluster logs 43732/head
authorPrashant D <pdhange@redhat.com>
Fri, 29 Oct 2021 13:09:24 +0000 (09:09 -0400)
committerPrashant D <pdhange@redhat.com>
Wed, 12 Jan 2022 13:29:02 +0000 (08:29 -0500)
Slow requests can overwhelm a cluster log with every slow op in
detail and also fills up the monitor db. Instead, log slow ops
details in aggregated format.

Fixes: https://tracker.ceph.com/issues/52424
Signed-off-by: Prashant D <pdhange@redhat.com>
src/common/options/osd.yaml.in
src/osd/OSD.cc
src/osd/OpRequest.h

index e5ce3648e921c94c2ba8b7fb56c147f6d074935b..8360e4a922390eeeba948483861f9a0753e818ad 100644 (file)
@@ -739,6 +739,15 @@ options:
   default: 32
   flags:
   - startup
+- name: osd_aggregated_slow_ops_logging
+  type: bool
+  level: advanced
+  desc: Allow OSD daemon to send an aggregated slow ops to the cluster log
+  fmt_desc: If set to ``true``, the OSD daemon will send slow ops information in 
+    an aggregated format to the cluster log else sends every slow op to the
+    cluster log.
+  default: true
+  with_legacy: true
 - name: osd_op_num_threads_per_shard
   type: int
   level: advanced
index 8977b19c85aa5714e5878bcd2d1c8924e409ccfc..889dd13c59de99ab722802aa620663f489f4dd40 100644 (file)
@@ -7647,6 +7647,15 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
     too_old -= cct->_conf.get_val<double>("osd_op_complaint_time");
     int slow = 0;
     TrackedOpRef oldest_op;
+    OSDMapRef osdmap = get_osdmap();
+    // map of slow op counts by slow op event type for an aggregated logging to
+    // the cluster log.
+    map<uint8_t, int> slow_op_types;
+    // map of slow op counts by pool for reporting a pool name with highest
+    // slow ops.
+    map<uint64_t, int> slow_op_pools;
+    bool log_aggregated_slow_op =
+           cct->_conf.get_val<bool>("osd_aggregated_slow_ops_logging");
     auto count_slow_ops = [&](TrackedOp& op) {
       if (op.get_initiated() < too_old) {
         stringstream ss;
@@ -7656,7 +7665,19 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
            << " currently "
            << op.state_string();
         lgeneric_subdout(cct,osd,20) << ss.str() << dendl;
-        clog->warn() << ss.str();
+        if (log_aggregated_slow_op) {
+          if (const OpRequest *req = dynamic_cast<const OpRequest *>(&op)) {
+            uint8_t op_type = req->state_flag();
+            auto m = req->get_req<MOSDFastDispatchOp>();
+            uint64_t poolid = m->get_spg().pgid.m_pool;
+            slow_op_types[op_type]++;
+            if (poolid > 0 && poolid <= (uint64_t) osdmap->get_pool_max()) {
+              slow_op_pools[poolid]++;
+            }
+          }
+        } else {
+          clog->warn() << ss.str();
+        }
        slow++;
        if (!oldest_op || op.get_initiated() < oldest_op->get_initiated()) {
          oldest_op = &op;
@@ -7670,6 +7691,32 @@ vector<DaemonHealthMetric> OSD::get_health_metrics()
       if (slow) {
        derr << __func__ << " reporting " << slow << " slow ops, oldest is "
             << oldest_op->get_desc() << dendl;
+        if (log_aggregated_slow_op &&
+             slow_op_types.size() > 0) {
+          stringstream ss;
+          ss << slow << " slow requests (by type [ ";
+          for (const auto& [op_type, count] : slow_op_types) {
+            ss << "'" << OpRequest::get_state_string(op_type)
+               << "' : " << count
+               << " ";
+          }
+          auto slow_pool_it = std::max_element(slow_op_pools.begin(), slow_op_pools.end(),
+                                 [](std::pair<uint64_t, int> p1, std::pair<uint64_t, int> p2) {
+                                   return p1.second < p2.second;
+                                 });
+          if (osdmap->get_pools().find(slow_pool_it->first) != osdmap->get_pools().end()) {
+            string pool_name = osdmap->get_pool_name(slow_pool_it->first);
+            ss << "] most affected pool [ '"
+               << pool_name
+               << "' : "
+               << slow_pool_it->second
+               << " ])";
+          } else {
+            ss << "])";
+          }
+          lgeneric_subdout(cct,osd,20) << ss.str() << dendl;
+          clog->warn() << ss.str();
+        }
       }
       metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs);
     } else {
index 79aa21cd392ceca598a9375a4b5ded50ee724f3a..e0bc232a53b2d8428b4aeb6d19d4a8f13cfc80d7 100644 (file)
@@ -103,6 +103,9 @@ public:
       return entity_name_t();
     }
   }
+  uint8_t state_flag() const {
+    return latest_flag_point;
+  }
 
   std::string_view state_string() const override {
     switch(latest_flag_point) {
@@ -117,6 +120,32 @@ public:
     return "no flag points reached";
   }
 
+  static std::string get_state_string(uint8_t flag) {
+    std::string flag_point;
+
+    switch(flag) {
+      case flag_queued_for_pg:
+        flag_point = "queued for pg";
+        break;
+      case flag_reached_pg:
+        flag_point = "reached pg";
+        break;
+      case flag_delayed:
+        flag_point = "delayed";
+        break;
+      case flag_started:
+        flag_point = "started";
+        break;
+      case flag_sub_op_sent:
+        flag_point = "waiting for sub ops";
+        break;
+      case flag_commit_sent:
+        flag_point = "commit sent; apply or cleanup";
+        break;
+    }
+    return flag_point;
+  }
+
   void mark_queued_for_pg() {
     mark_flag_point(flag_queued_for_pg, "queued_for_pg");
   }