]> git.apps.os.sepia.ceph.com Git - ceph.git/commitdiff
mds: add mdlog trimming threshold and decay counter
authorVenky Shankar <vshankar@redhat.com>
Mon, 24 Jul 2023 04:33:47 +0000 (00:33 -0400)
committerVenky Shankar <vshankar@redhat.com>
Tue, 30 Jan 2024 05:28:48 +0000 (10:58 +0530)
Fixes: http://tracker.ceph.com/issues/61908
Signed-off-by: Venky Shankar <vshankar@redhat.com>
src/common/options/mds.yaml.in
src/mds/MDLog.cc
src/mds/MDLog.h

index 2599b6532b5dc9a16c1e345178d755ba6ec5ef75..08d221835716ed6cf6d16535cacba0ea0b991bcd 100644 (file)
@@ -1597,3 +1597,31 @@ options:
   - mds
   flags:
   - runtime
+- name: mds_log_trim_threshold
+  type: size
+  level: advanced
+  desc: MDS log trim threshold
+  long_desc: The threshold of the number of log segment that can be trimmed.
+  default: 128
+  min: 1
+  services:
+  - mds
+  see_also:
+  - mds_log_max_events
+  - mds_log_max_segments
+  flags:
+  - runtime
+- name: mds_log_trim_decay_rate
+  type: float
+  level: advanced
+  desc: MDS log trim decay rate
+  long_desc: The decay rate for trimming the MDS log. Increasing this value leads to the MDS spending less time in trimming the log.
+  default: 1.0
+  min: 0.01
+  services:
+  - mds
+  see_also:
+  - mds_log_max_events
+  - mds_log_max_segments
+  flags:
+  - runtime
index 14b998850da5b79792bd4ca8f2705c2b722ed346..c174428a1f922e8991738cc76080f7b086e58fff 100644 (file)
@@ -24,6 +24,7 @@
 #include "common/entity_name.h"
 #include "common/perf_counters.h"
 #include "common/Cond.h"
+#include "common/ceph_time.h"
 
 #include "events/ESubtreeMap.h"
 #include "events/ESegment.h"
@@ -45,7 +46,8 @@ MDLog::MDLog(MDSRank* m)
     mds(m),
     replay_thread(this),
     recovery_thread(this),
-    submit_thread(this)
+    submit_thread(this),
+    log_trim_counter(DecayCounter(g_conf().get_val<double>("mds_log_trim_decay_rate")))
 {
   debug_subtrees = g_conf().get_val<bool>("mds_debug_subtrees");
   event_large_threshold = g_conf().get_val<uint64_t>("mds_log_event_large_threshold");
@@ -68,7 +70,6 @@ MDLog::~MDLog()
   }
 }
 
-
 void MDLog::create_logger()
 {
   PerfCountersBuilder plb(g_ceph_context, "mds_log", l_mdl_first, l_mdl_last);
@@ -636,10 +637,6 @@ void MDLog::trim(int m)
     return;
   }
 
-  // hack: only trim for a few seconds at a time
-  utime_t stop = ceph_clock_now();
-  stop += 2.0;
-
   int op_prio = CEPH_MSG_PRIO_LOW +
                (CEPH_MSG_PRIO_HIGH - CEPH_MSG_PRIO_LOW) *
                expiring_segments.size() / max_segments;
@@ -654,16 +651,39 @@ void MDLog::trim(int m)
     ceph_assert(segments.size() >= pre_segments_size);
     max_expiring_segments = std::max<unsigned>(max_expiring_segments,segments.size() - pre_segments_size);
   }
-  
+
   map<uint64_t,LogSegment*>::iterator p = segments.begin();
+
+  auto trim_start = ceph::coarse_mono_clock::now();
+  std::optional<ceph::coarse_mono_time> trim_end;
+
+  auto log_trim_counter_start = log_trim_counter.get();
+  auto log_trim_threshold = g_conf().get_val<Option::size_t>("mds_log_trim_threshold");
+
   while (p != segments.end()) {
-    if (stop < ceph_clock_now())
+    // throttle - break out of trimmming if we've hit the threshold
+    if (log_trim_counter_start + new_expiring_segments >= log_trim_threshold) {
+      auto time_spent = std::chrono::duration<double>::zero();
+      if (trim_end) {
+       time_spent = std::chrono::duration<double>(*trim_end - trim_start);
+      }
+      dout(10) << __func__ << ": breaking out of trim loop - trimmed "
+              << new_expiring_segments << " segment(s) in " << time_spent.count()
+              << "s" << dendl;
       break;
+    }
 
     unsigned num_remaining_segments = (segments.size() - expired_segments.size() - expiring_segments.size());
+    dout(10) << __func__ << ": new_expiring_segments=" << new_expiring_segments
+            << ", num_remaining_segments=" << num_remaining_segments
+            << ", max_segments=" << max_segments << dendl;
+
     if ((num_remaining_segments <= max_segments) &&
-       (max_ev < 0 || (num_events - expiring_events - expired_events) <= (uint64_t)max_ev))
+       (max_ev < 0 || (num_events - expiring_events - expired_events) <= (uint64_t)max_ev)) {
+      dout(10) << __func__ << ": breaking out of trim loop - segments/events fell below ceiling"
+              << " max_segments/max_ev" << dendl;
       break;
+    }
 
     // Do not trim too many segments at once for peak workload. If mds keeps creating N segments each tick,
     // the upper bound of 'num_remaining_segments - max_segments' is '2 * N'
@@ -699,6 +719,8 @@ void MDLog::trim(int m)
 
       uint64_t last_seq = ls->seq;
       try_expire(ls, op_prio);
+      log_trim_counter.hit();
+      trim_end = ceph::coarse_mono_clock::now();
 
       submit_mutex.lock();
       p = segments.lower_bound(last_seq + 1);
index 5f8b78620ef17ca1ebbc6fc5922a13cf12129bd0..c12a4b8d5338d349746f40a6f4d9e0a5db081b7c 100644 (file)
@@ -46,6 +46,7 @@ enum {
 
 #include "MDSContext.h"
 #include "common/Cond.h"
+#include "common/DecayCounter.h"
 #include "common/Finisher.h"
 #include "common/Thread.h"
 
@@ -65,6 +66,7 @@ class ESubtreeMap;
 
 class MDLog {
 public:
+
   MDLog(MDSRank *m);
   ~MDLog();
 
@@ -301,5 +303,8 @@ private:
   std::set<LogSegment*> expired_segments;
   std::set<LogSegment*> expiring_segments;
   uint64_t events_since_last_major_segment = 0;
+
+  // log trimming decay counter
+  DecayCounter log_trim_counter;
 };
 #endif