From: Prashant D Date: Fri, 29 Oct 2021 13:09:24 +0000 (-0400) Subject: osd/OSD: Log aggregated slow ops detail to cluster logs X-Git-Tag: v17.1.0~34^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=refs%2Fpull%2F44764%2Fhead;p=ceph.git osd/OSD: Log aggregated slow ops detail to cluster logs Slow requests can overwhelm a cluster log with every slow op in detail and also fills up the monitor db. Instead, log slow ops details in aggregated format. Fixes: https://tracker.ceph.com/issues/52424 Signed-off-by: Prashant D (cherry picked from commit 9319dc9273b36dc4f4bef1261b3c57690336a8cc) --- diff --git a/src/common/options/osd.yaml.in b/src/common/options/osd.yaml.in index e5ce3648e921..8360e4a92239 100644 --- a/src/common/options/osd.yaml.in +++ b/src/common/options/osd.yaml.in @@ -739,6 +739,15 @@ options: default: 32 flags: - startup +- name: osd_aggregated_slow_ops_logging + type: bool + level: advanced + desc: Allow OSD daemon to send an aggregated slow ops to the cluster log + fmt_desc: If set to ``true``, the OSD daemon will send slow ops information in + an aggregated format to the cluster log else sends every slow op to the + cluster log. + default: true + with_legacy: true - name: osd_op_num_threads_per_shard type: int level: advanced diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc index b6027fe4d733..8b3205a2160a 100644 --- a/src/osd/OSD.cc +++ b/src/osd/OSD.cc @@ -7647,6 +7647,15 @@ vector OSD::get_health_metrics() too_old -= cct->_conf.get_val("osd_op_complaint_time"); int slow = 0; TrackedOpRef oldest_op; + OSDMapRef osdmap = get_osdmap(); + // map of slow op counts by slow op event type for an aggregated logging to + // the cluster log. + map slow_op_types; + // map of slow op counts by pool for reporting a pool name with highest + // slow ops. + map slow_op_pools; + bool log_aggregated_slow_op = + cct->_conf.get_val("osd_aggregated_slow_ops_logging"); auto count_slow_ops = [&](TrackedOp& op) { if (op.get_initiated() < too_old) { stringstream ss; @@ -7656,7 +7665,19 @@ vector OSD::get_health_metrics() << " currently " << op.state_string(); lgeneric_subdout(cct,osd,20) << ss.str() << dendl; - clog->warn() << ss.str(); + if (log_aggregated_slow_op) { + if (const OpRequest *req = dynamic_cast(&op)) { + uint8_t op_type = req->state_flag(); + auto m = req->get_req(); + uint64_t poolid = m->get_spg().pgid.m_pool; + slow_op_types[op_type]++; + if (poolid > 0 && poolid <= (uint64_t) osdmap->get_pool_max()) { + slow_op_pools[poolid]++; + } + } + } else { + clog->warn() << ss.str(); + } slow++; if (!oldest_op || op.get_initiated() < oldest_op->get_initiated()) { oldest_op = &op; @@ -7670,6 +7691,32 @@ vector OSD::get_health_metrics() if (slow) { derr << __func__ << " reporting " << slow << " slow ops, oldest is " << oldest_op->get_desc() << dendl; + if (log_aggregated_slow_op && + slow_op_types.size() > 0) { + stringstream ss; + ss << slow << " slow requests (by type [ "; + for (const auto& [op_type, count] : slow_op_types) { + ss << "'" << OpRequest::get_state_string(op_type) + << "' : " << count + << " "; + } + auto slow_pool_it = std::max_element(slow_op_pools.begin(), slow_op_pools.end(), + [](std::pair p1, std::pair p2) { + return p1.second < p2.second; + }); + if (osdmap->get_pools().find(slow_pool_it->first) != osdmap->get_pools().end()) { + string pool_name = osdmap->get_pool_name(slow_pool_it->first); + ss << "] most affected pool [ '" + << pool_name + << "' : " + << slow_pool_it->second + << " ])"; + } else { + ss << "])"; + } + lgeneric_subdout(cct,osd,20) << ss.str() << dendl; + clog->warn() << ss.str(); + } } metrics.emplace_back(daemon_metric::SLOW_OPS, slow, oldest_secs); } else { diff --git a/src/osd/OpRequest.h b/src/osd/OpRequest.h index 79aa21cd392c..e0bc232a53b2 100644 --- a/src/osd/OpRequest.h +++ b/src/osd/OpRequest.h @@ -103,6 +103,9 @@ public: return entity_name_t(); } } + uint8_t state_flag() const { + return latest_flag_point; + } std::string_view state_string() const override { switch(latest_flag_point) { @@ -117,6 +120,32 @@ public: return "no flag points reached"; } + static std::string get_state_string(uint8_t flag) { + std::string flag_point; + + switch(flag) { + case flag_queued_for_pg: + flag_point = "queued for pg"; + break; + case flag_reached_pg: + flag_point = "reached pg"; + break; + case flag_delayed: + flag_point = "delayed"; + break; + case flag_started: + flag_point = "started"; + break; + case flag_sub_op_sent: + flag_point = "waiting for sub ops"; + break; + case flag_commit_sent: + flag_point = "commit sent; apply or cleanup"; + break; + } + return flag_point; + } + void mark_queued_for_pg() { mark_flag_point(flag_queued_for_pg, "queued_for_pg"); }