configured ratio of the main OSD data device size. This warning is
informational and can be muted with:
``ceph health mute BLUESTORE_BLUEFS_OVERSIZED``
+* MGR: The Manager now automatically increases ``mgr_stats_period`` when its
+ message queue is congested, reducing daemon reporting frequency to prevent
+ overload. The period recovers automatically once the queue clears. This
+ behavior is controlled by the new ``mgr_stats_period_autotune`` (default:
+ ``true``) and ``mgr_stats_period_autotune_queue_threshold`` (default: ``100``)
+ config options.
>=20.0.0
perf counters of a mgr module. In ``mgr.cache_hit`` and ``mgr.cache_miss``
you'll find the hit/miss ratio of the mgr cache.
+
+Automatic Stats Period Tuning
+------------------------------
+
+The Manager automatically adjusts :confval:`mgr_stats_period` based on message queue
+depth to prevent overload during high cluster activity. This feature is enabled by
+default and can be controlled with the following settings:
+
+- :confval:`mgr_stats_period_autotune` (boolean, default: true): Enable or disable
+ automatic tuning of the stats period.
+- :confval:`mgr_stats_period_autotune_queue_threshold` (integer, default: 100):
+ The message queue depth threshold that triggers an increase in the stats period.
+
+When the queue depth exceeds this threshold, the stats period is increased to
+reduce load. Conversely, if the queue depth remains low and the stats period is
+above the baseline, the period is decreased to improve responsiveness. In order
+to ensure timely updates, the effective stats period will not exceed 60 seconds
+regardless of these settings.
+
+
Using modules
-------------
.. confval:: mgr_data
.. confval:: mgr_tick_period
.. confval:: mon_mgr_beacon_grace
+.. confval:: mgr_stats_period
+.. confval:: mgr_stats_period_autotune
+.. confval:: mgr_stats_period_autotune_queue_threshold
.. _Modifying User Capabilities: ../../rados/operations/user-management/#modify-user-capabilities
services:
- mgr
- common
+- name: mgr_stats_period_autotune
+ type: bool
+ level: basic
+ desc: Automatically adjust mgr_stats_period based on Manager message queue depth
+ long_desc: When enabled, the Manager monitors its incoming message queue and automatically
+ increases mgr_stats_period when the queue backs up beyond the configured threshold,
+ reducing daemon reporting frequency to prevent Manager overload. The period is
+ gradually decreased back to the original value when the queue depth recovers.
+ This prevents performance degradation during high cluster activity without requiring
+ manual intervention. When disabled, mgr_stats_period remains at the manually
+ configured value.
+ default: true
+ services:
+ - mgr
+ see_also:
+ - mgr_stats_period
+- name: mgr_stats_period_autotune_queue_threshold
+ type: int
+ level: advanced
+ desc: Message queue depth that triggers automatic increase of mgr_stats_period
+ long_desc: When mgr_stats_period_autotune is enabled, the Manager will increase
+ the stats reporting period if the incoming message queue exceeds this threshold.
+ Higher values make the system less sensitive to temporary queue spikes but may
+ allow longer periods of Manager overload.
+ default: 100
+ services:
+ - mgr
+ see_also:
+ - mgr_stats_period
- name: mgr_client_bytes
type: size
level: dev
mds_perf_metric_collector_listener(this),
mds_perf_metric_collector(mds_perf_metric_collector_listener),
op_tracker(g_ceph_context, g_ceph_context->_conf->mgr_enable_op_tracker,
- g_ceph_context->_conf->mgr_num_op_tracker_shard)
+ g_ceph_context->_conf->mgr_num_op_tracker_shard),
+ stats_autotuner(std::make_unique<StatsAutotuner>(
+ g_conf().get_val<int64_t>("mgr_stats_period")))
{
g_conf().add_observer(this);
/* define op size and time for mgr daemon */
void DaemonServer::tick()
{
dout(10) << dendl;
+ auto tick_period = g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count();
+ utime_t now = ceph_clock_now();
+
+ if (g_conf().get_val<bool>("mgr_stats_period_autotune") &&
+ stats_autotuner->should_check_now(now, tick_period)) {
+ dout(20) << "checking whether to adjust stats period" << dendl;
+ maybe_adjust_stats_period();
+ }
send_report();
adjust_pgs();
g_conf().get_val<std::chrono::seconds>("mgr_tick_period").count());
}
+void DaemonServer::maybe_adjust_stats_period() {
+ int64_t queue_depth = msgr->get_dispatch_queue_len();
+ int64_t current_period = g_conf().get_val<int64_t>("mgr_stats_period");
+ int64_t queue_threshold = g_conf().get_val<int64_t>("mgr_stats_period_autotune_queue_threshold");
+ auto result = stats_autotuner->evaluate_adjustment(queue_depth, current_period, queue_threshold);
+
+ if (result.new_period != current_period) {
+ dout(10) << "Adjusting mgr_stats_period from " << current_period
+ << " to " << result.new_period << " seconds ("
+ << result.reason_str()
+ << ")" << dendl;
+
+ std::stringstream ss;
+ int r = cct->_conf.set_val("mgr_stats_period", std::to_string(result.new_period), &ss);
+ if (r != 0) {
+ derr << "Failed to update mgr_stats_period: " << ss.str() << dendl;
+ return;
+ }
+ stats_autotuner->record_our_change(result.new_period); // Track that we made this change
+ cct->_conf.apply_changes(nullptr);
+ }
+}
+
// Currently modules do not set health checks in response to events delivered to
// all modules (e.g. notify) so we do not risk a thundering hurd situation here.
// if this pattern emerges in the future, this scheduler could be modified to
if (changed.count("mgr_stats_threshold") || changed.count("mgr_stats_period")) {
dout(4) << "Updating stats threshold/period on "
<< daemon_connections.size() << " clients" << dendl;
+ if (changed.count("mgr_stats_period")) {
+ int64_t new_period = g_conf().get_val<int64_t>("mgr_stats_period");
+ if (stats_autotuner->was_changed_by_user(new_period)) {
+ stats_autotuner->set_baseline_period(new_period); // user changed
+ }
+ }
// Send a fresh MMgrConfigure to all clients, so that they can follow
// the new policy for transmitting stats
finisher.queue(new LambdaContext([this](int r) {
#include <map>
#include <set>
#include <string>
+#include <string_view>
#include <unordered_map>
#include <vector>
class CommandContext;
struct OSDPerfMetricQuery;
struct MDSPerfMetricQuery;
+class StatsAutotuner;
struct offline_pg_report {
SafeTimer timer;
Context *tick_event;
void tick();
+ void maybe_adjust_stats_period();
void schedule_tick_locked(double delay_sec);
class OSDPerfMetricCollectorListener : public MetricListener {
private:
// -- op tracking --
OpTracker op_tracker;
-
+ std::unique_ptr<StatsAutotuner> stats_autotuner;
public:
int init(uint64_t gid, entity_addrvec_t client_addrs);
std::ostream& ss);
};
+class StatsAutotuner {
+private:
+ int64_t baseline_period;
+ int64_t changed_stats_period;
+ utime_t last_period_check;
+
+ static constexpr int64_t MAX_PERIOD = 60;
+ static constexpr int64_t RECOVERY_THRESHOLD = 20;
+ static constexpr int64_t MIN_QUEUE_DEPTH = 5;
+
+public:
+ explicit StatsAutotuner(int64_t baseline)
+ : baseline_period(baseline), changed_stats_period(baseline) {}
+
+ void set_baseline_period(int64_t period) {
+ baseline_period = changed_stats_period = period;
+ }
+
+ void record_our_change(int64_t new_period) {
+ changed_stats_period = new_period; // We changed it
+ }
+
+ bool was_changed_by_user(int64_t current_period) const {
+ return changed_stats_period != current_period;
+ }
+
+ bool should_check_now(utime_t now, double tick_period) {
+ if (now - last_period_check > tick_period * 5) {
+ last_period_check = now;
+ return true;
+ }
+ return false;
+ }
+
+
+ // Add enum reasons
+ enum class AdjustmentReason : uint8_t {
+ high_queue_depth = 0,
+ performance_recovered,
+ no_adjustment_needed
+ };
+
+ struct AdjustmentResult {
+ int64_t new_period = 0;
+ AdjustmentReason reason_code = AdjustmentReason::no_adjustment_needed;
+
+ std::string_view reason_str() const {
+ switch (reason_code) {
+ case AdjustmentReason::high_queue_depth:
+ return "high_queue_depth";
+ case AdjustmentReason::performance_recovered:
+ return "performance_recovered";
+ case AdjustmentReason::no_adjustment_needed:
+ return "no_adjustment_needed";
+ default:
+ return "unknown_reason";
+ }
+ }
+ };
+
+ AdjustmentResult evaluate_adjustment(
+ int64_t queue_depth,
+ int64_t current_period,
+ int64_t queue_threshold) {
+
+ if (queue_depth > queue_threshold) {
+ int64_t increment = std::max(MIN_QUEUE_DEPTH, current_period / 4);
+ int64_t new_period = std::min(current_period + increment, MAX_PERIOD);
+
+ if (new_period > current_period) {
+ return {new_period, AdjustmentReason::high_queue_depth};
+ }
+ } else if (current_period > baseline_period && queue_depth < RECOVERY_THRESHOLD) {
+ int64_t new_period = std::max(current_period / 2, baseline_period);
+
+ if (new_period < current_period) {
+ return {new_period, AdjustmentReason::performance_recovered};
+ }
+ }
+
+ return {current_period, AdjustmentReason::no_adjustment_needed};
+ }
+};
#endif