From a1ea8776353918b97cc074a452bb41ec327ff13f Mon Sep 17 00:00:00 2001 From: "Adam C. Emerson" Date: Wed, 12 Oct 2022 19:31:20 -0400 Subject: [PATCH] rgw: LatencyConcurrencyControl warns on very high latency Limited to only warn every five minutes. Signed-off-by: Adam C. Emerson --- src/rgw/driver/rados/rgw_data_sync.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/rgw/driver/rados/rgw_data_sync.h b/src/rgw/driver/rados/rgw_data_sync.h index 9d1b0e77f8e1e..ccaa20884eb56 100644 --- a/src/rgw/driver/rados/rgw_data_sync.h +++ b/src/rgw/driver/rados/rgw_data_sync.h @@ -356,6 +356,8 @@ void pretty_print(const RGWDataSyncEnv* env, const S& fmt, T&& ...t) { /// Keep a running average of operation latency and scale concurrency /// down when latency rises. class LatencyConcurrencyControl : public LatencyMonitor { + static constexpr auto dout_subsys = ceph_subsys_rgw; + ceph::coarse_mono_time last_warning; public: CephContext* cct; @@ -373,6 +375,14 @@ public: auto threshold = (cct->_conf->rgw_sync_lease_period * 1s) / 12; if (avg_latency() >= 2 * threshold) [[unlikely]] { + auto now = ceph::coarse_mono_clock::now(); + if (now - last_warning > 5min) { + ldout(cct, -1) + << "WARNING: The OSD cluster is overloaded and struggling to " + << "complete ops. You need more capacity to serve this level " + << "of demand." << dendl; + last_warning = now; + } return 1; } else if (avg_latency() >= threshold) [[unlikely]] { return concurrency / 2; -- 2.39.5