From: Shraddha Agrawal Date: Thu, 22 May 2025 10:26:41 +0000 (+0530) Subject: mon/MgrStatMonitor: ignore duration for which feature is off X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=d81d2af8fcd708f20f54f863dd613fade57af6e5;p=ceph.git mon/MgrStatMonitor: ignore duration for which feature is off When the availability tracking feature is disabled, we should not be updating the score. We should start recalculating the score when the user enables the features again. Essentially, for the purpose of calculating the score, we need to ignore the duration for which the feature was turned off. The score is calculated from the uptime and downtime durations recorded in `pool_availability` object. These durations are updated in `calc_pool_availability` by adding the diff between last_uptime/ last_downtime and now. To discard the duration for which the feature was turned off, we need to offset the uptime/downtime by this duration. A simple way to do this is to update the last_uptime and last_downtime to the timestamp when the feature is toggled on again. To implement the same, we record the time at which the feature is toggled from off to on. When `calc_pool_availability` is invoked, if a reset is required, it resets last_uptime and last_downtime before proceeding with availability calculations. We only care about the state when the feature is toggled from off to on. All other toggle states for the config option will not have any effect on the score. Fixes: https://tracker.ceph.com/issues/71494 Signed-off-by: Shraddha Agrawal --- diff --git a/src/mon/MgrStatMonitor.cc b/src/mon/MgrStatMonitor.cc index da31318b44191..224651a2eb6ee 100644 --- a/src/mon/MgrStatMonitor.cc +++ b/src/mon/MgrStatMonitor.cc @@ -71,8 +71,24 @@ void MgrStatMonitor::handle_conf_change( const ConfigProxy& conf, const std::set& changed) { - // implement changes here - dout(10) << __func__ << " enable_availability_tracking config option is changed." << dendl; + if (changed.count("enable_availability_tracking")) { + std::scoped_lock l(lock); + bool oldval = enable_availability_tracking; + bool newval = g_conf().get_val("enable_availability_tracking"); + dout(10) << __func__ << " enable_availability_tracking config option is changed from " + << oldval << " to " << newval + << dendl; + + // if fetaure is toggled from off to on, + // store the new value of last_uptime and last_downtime + // (to be updated in calc_pool_availability) + if (newval > oldval) { + reset_availability_last_uptime_downtime_val = ceph_clock_now(); + dout(10) << __func__ << " reset_availability_last_uptime_downtime_val " + << reset_availability_last_uptime_downtime_val << dendl; + } + enable_availability_tracking = newval; + } } void MgrStatMonitor::create_initial() @@ -88,14 +104,29 @@ void MgrStatMonitor::create_initial() void MgrStatMonitor::calc_pool_availability() { dout(20) << __func__ << dendl; + std::scoped_lock l(lock); // if feature is disabled by user, do not update the uptime // and downtime, exit early - if (!g_conf().get_val("enable_availability_tracking")) { + if (!enable_availability_tracking) { dout(20) << __func__ << " tracking availability score is disabled" << dendl; return; } + // if reset_availability_last_uptime_downtime_val is not utime_t(1, 2), + // update last_uptime and last_downtime for all pools to the + // recorded values + if (reset_availability_last_uptime_downtime_val.has_value()) { + for (const auto& i : pool_availability) { + const auto& poolid = i.first; + pool_availability[poolid].last_downtime = reset_availability_last_uptime_downtime_val.value(); + pool_availability[poolid].last_uptime = reset_availability_last_uptime_downtime_val.value(); + } + dout(20) << __func__ << " reset last_uptime and last_downtime to " + << reset_availability_last_uptime_downtime_val << dendl; + reset_availability_last_uptime_downtime_val.reset(); + } + auto pool_avail_end = pool_availability.end(); for (const auto& i : digest.pool_pg_unavailable_map) { const auto& poolid = i.first; diff --git a/src/mon/MgrStatMonitor.h b/src/mon/MgrStatMonitor.h index c02cbc4976b5d..5397a1600877f 100644 --- a/src/mon/MgrStatMonitor.h +++ b/src/mon/MgrStatMonitor.h @@ -28,6 +28,8 @@ public: MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name); ~MgrStatMonitor() override; + ceph::mutex lock = ceph::make_mutex("MgrStatMonitor::lock"); + void init() override {} void on_shutdown() override {} @@ -53,7 +55,9 @@ public: bool preprocess_statfs(MonOpRequestRef op); void calc_pool_availability(); - + bool enable_availability_tracking = g_conf().get_val("enable_availability_tracking"); ///< tracking availability score feature + std::optional reset_availability_last_uptime_downtime_val; + void check_sub(Subscription *sub); void check_subs(); void send_digests();