]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mon/MgrStatMonitor: ignore duration for which feature is off
authorShraddha Agrawal <shraddha.agrawal000@gmail.com>
Thu, 22 May 2025 10:26:41 +0000 (15:56 +0530)
committerShraddha Agrawal <shraddha.agrawal000@gmail.com>
Thu, 12 Jun 2025 10:50:25 +0000 (16:20 +0530)
When the availability tracking feature is disabled, we should not
be updating the score. We should start recalculating the score
when the user enables the features again. Essentially, for the
purpose of calculating the score, we need to ignore the duration
for which the feature was turned off.

The score is calculated from the uptime and downtime durations
recorded in `pool_availability` object. These durations are updated
in `calc_pool_availability` by adding the diff between last_uptime/
last_downtime and now.

To discard the duration for which the feature was turned off, we
need to offset the uptime/downtime by this duration. A simple way
to do this is to update the last_uptime and last_downtime to the
timestamp when the feature is toggled on again. To implement the
same, we record the time at which the feature is toggled from off
to on. When `calc_pool_availability` is invoked, if a reset is
required, it resets last_uptime and last_downtime before proceeding
with availability calculations.

We only care about the state when the feature is toggled from off to
on. All other toggle states for the config option will not have any
effect on the score.

Fixes: https://tracker.ceph.com/issues/71494
Signed-off-by: Shraddha Agrawal <shraddhaag@ibm.com>
src/mon/MgrStatMonitor.cc
src/mon/MgrStatMonitor.h

index da31318b44191f1577e1c7a91a569a439fc7273c..224651a2eb6ee06f2cfdda9b37bedbcb3d8d024d 100644 (file)
@@ -71,8 +71,24 @@ void MgrStatMonitor::handle_conf_change(
   const ConfigProxy& conf,
   const std::set<std::string>& changed)
 {
-  // implement changes here 
-  dout(10) << __func__ << " enable_availability_tracking config option is changed." << dendl;
+  if (changed.count("enable_availability_tracking")) {
+    std::scoped_lock l(lock);
+    bool oldval = enable_availability_tracking;
+    bool newval = g_conf().get_val<bool>("enable_availability_tracking");
+    dout(10) << __func__ << " enable_availability_tracking config option is changed from " 
+             << oldval << " to " << newval
+             << dendl;
+
+    // if fetaure is toggled from off to on, 
+    // store the new value of last_uptime and last_downtime 
+    // (to be updated in calc_pool_availability) 
+    if (newval > oldval) {
+      reset_availability_last_uptime_downtime_val = ceph_clock_now();
+      dout(10) << __func__ << " reset_availability_last_uptime_downtime_val " 
+               <<  reset_availability_last_uptime_downtime_val << dendl; 
+    }
+    enable_availability_tracking = newval;
+  }
 }
 
 void MgrStatMonitor::create_initial()
@@ -88,14 +104,29 @@ void MgrStatMonitor::create_initial()
 void MgrStatMonitor::calc_pool_availability()
 {
   dout(20) << __func__ << dendl;
+  std::scoped_lock l(lock);
 
   // if feature is disabled by user, do not update the uptime 
   // and downtime, exit early
-  if (!g_conf().get_val<bool>("enable_availability_tracking")) {
+  if (!enable_availability_tracking) {
     dout(20) << __func__ << " tracking availability score is disabled" << dendl;
     return;
   }
 
+  // if reset_availability_last_uptime_downtime_val is not utime_t(1, 2), 
+  // update last_uptime and last_downtime for all pools to the 
+  // recorded values
+  if (reset_availability_last_uptime_downtime_val.has_value()) {
+    for (const auto& i : pool_availability) {
+      const auto& poolid = i.first;
+      pool_availability[poolid].last_downtime = reset_availability_last_uptime_downtime_val.value();
+      pool_availability[poolid].last_uptime = reset_availability_last_uptime_downtime_val.value();
+    }
+    dout(20) << __func__ << " reset last_uptime and last_downtime to " 
+             << reset_availability_last_uptime_downtime_val << dendl;
+    reset_availability_last_uptime_downtime_val.reset();
+  }
+
   auto pool_avail_end = pool_availability.end();
   for (const auto& i : digest.pool_pg_unavailable_map) {
     const auto& poolid = i.first;
index c02cbc4976b5dd7ce8a8c1034f2be3d62f8493b5..5397a1600877f82a07fcc365c52a72e706241aa4 100644 (file)
@@ -28,6 +28,8 @@ public:
   MgrStatMonitor(Monitor &mn, Paxos &p, const std::string& service_name);
   ~MgrStatMonitor() override;
 
+  ceph::mutex lock = ceph::make_mutex("MgrStatMonitor::lock");
+
   void init() override {}
   void on_shutdown() override {}
 
@@ -53,7 +55,9 @@ public:
   bool preprocess_statfs(MonOpRequestRef op);
 
   void calc_pool_availability();
-
+  bool enable_availability_tracking = g_conf().get_val<bool>("enable_availability_tracking"); ///< tracking availability score feature 
+  std::optional<utime_t> reset_availability_last_uptime_downtime_val;
+  
   void check_sub(Subscription *sub);
   void check_subs();
   void send_digests();