From: Venky Shankar Date: Mon, 24 Jul 2023 04:33:47 +0000 (-0400) Subject: mds: add mdlog trimming threshold and decay counter X-Git-Tag: v19.3.0~120^2~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=29610577eece04c028c412f112a66fafa8f70316;p=ceph.git mds: add mdlog trimming threshold and decay counter Fixes: http://tracker.ceph.com/issues/61908 Signed-off-by: Venky Shankar --- diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 2599b6532b5d..08d221835716 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1597,3 +1597,31 @@ options: - mds flags: - runtime +- name: mds_log_trim_threshold + type: size + level: advanced + desc: MDS log trim threshold + long_desc: The threshold of the number of log segment that can be trimmed. + default: 128 + min: 1 + services: + - mds + see_also: + - mds_log_max_events + - mds_log_max_segments + flags: + - runtime +- name: mds_log_trim_decay_rate + type: float + level: advanced + desc: MDS log trim decay rate + long_desc: The decay rate for trimming the MDS log. Increasing this value leads to the MDS spending less time in trimming the log. + default: 1.0 + min: 0.01 + services: + - mds + see_also: + - mds_log_max_events + - mds_log_max_segments + flags: + - runtime diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 14b998850da5..c174428a1f92 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -24,6 +24,7 @@ #include "common/entity_name.h" #include "common/perf_counters.h" #include "common/Cond.h" +#include "common/ceph_time.h" #include "events/ESubtreeMap.h" #include "events/ESegment.h" @@ -45,7 +46,8 @@ MDLog::MDLog(MDSRank* m) mds(m), replay_thread(this), recovery_thread(this), - submit_thread(this) + submit_thread(this), + log_trim_counter(DecayCounter(g_conf().get_val("mds_log_trim_decay_rate"))) { debug_subtrees = g_conf().get_val("mds_debug_subtrees"); event_large_threshold = g_conf().get_val("mds_log_event_large_threshold"); @@ -68,7 +70,6 @@ MDLog::~MDLog() } } - void MDLog::create_logger() { PerfCountersBuilder plb(g_ceph_context, "mds_log", l_mdl_first, l_mdl_last); @@ -636,10 +637,6 @@ void MDLog::trim(int m) return; } - // hack: only trim for a few seconds at a time - utime_t stop = ceph_clock_now(); - stop += 2.0; - int op_prio = CEPH_MSG_PRIO_LOW + (CEPH_MSG_PRIO_HIGH - CEPH_MSG_PRIO_LOW) * expiring_segments.size() / max_segments; @@ -654,16 +651,39 @@ void MDLog::trim(int m) ceph_assert(segments.size() >= pre_segments_size); max_expiring_segments = std::max(max_expiring_segments,segments.size() - pre_segments_size); } - + map::iterator p = segments.begin(); + + auto trim_start = ceph::coarse_mono_clock::now(); + std::optional trim_end; + + auto log_trim_counter_start = log_trim_counter.get(); + auto log_trim_threshold = g_conf().get_val("mds_log_trim_threshold"); + while (p != segments.end()) { - if (stop < ceph_clock_now()) + // throttle - break out of trimmming if we've hit the threshold + if (log_trim_counter_start + new_expiring_segments >= log_trim_threshold) { + auto time_spent = std::chrono::duration::zero(); + if (trim_end) { + time_spent = std::chrono::duration(*trim_end - trim_start); + } + dout(10) << __func__ << ": breaking out of trim loop - trimmed " + << new_expiring_segments << " segment(s) in " << time_spent.count() + << "s" << dendl; break; + } unsigned num_remaining_segments = (segments.size() - expired_segments.size() - expiring_segments.size()); + dout(10) << __func__ << ": new_expiring_segments=" << new_expiring_segments + << ", num_remaining_segments=" << num_remaining_segments + << ", max_segments=" << max_segments << dendl; + if ((num_remaining_segments <= max_segments) && - (max_ev < 0 || (num_events - expiring_events - expired_events) <= (uint64_t)max_ev)) + (max_ev < 0 || (num_events - expiring_events - expired_events) <= (uint64_t)max_ev)) { + dout(10) << __func__ << ": breaking out of trim loop - segments/events fell below ceiling" + << " max_segments/max_ev" << dendl; break; + } // Do not trim too many segments at once for peak workload. If mds keeps creating N segments each tick, // the upper bound of 'num_remaining_segments - max_segments' is '2 * N' @@ -699,6 +719,8 @@ void MDLog::trim(int m) uint64_t last_seq = ls->seq; try_expire(ls, op_prio); + log_trim_counter.hit(); + trim_end = ceph::coarse_mono_clock::now(); submit_mutex.lock(); p = segments.lower_bound(last_seq + 1); diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index 5f8b78620ef1..c12a4b8d5338 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -46,6 +46,7 @@ enum { #include "MDSContext.h" #include "common/Cond.h" +#include "common/DecayCounter.h" #include "common/Finisher.h" #include "common/Thread.h" @@ -65,6 +66,7 @@ class ESubtreeMap; class MDLog { public: + MDLog(MDSRank *m); ~MDLog(); @@ -301,5 +303,8 @@ private: std::set expired_segments; std::set expiring_segments; uint64_t events_since_last_major_segment = 0; + + // log trimming decay counter + DecayCounter log_trim_counter; }; #endif