From 9c999db9c6c051739f3815fbe3a6d95f4757b1f7 Mon Sep 17 00:00:00 2001 From: Venky Shankar Date: Tue, 8 Aug 2023 01:48:45 -0400 Subject: [PATCH] mds: drive mdlog trimming via a separate thread Signed-off-by: Venky Shankar --- src/common/options/mds.yaml.in | 10 ++++++++++ src/mds/MDLog.cc | 27 ++++++++++++++++++++++++--- src/mds/MDLog.h | 10 +++++++++- src/mds/MDSRank.cc | 5 ----- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 08d221835716e..77d5fe253d557 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -1625,3 +1625,13 @@ options: - mds_log_max_segments flags: - runtime +- name: mds_log_trim_upkeep_interval + type: millisecs + level: advanced + desc: MDS log trimming interval + long_desc: Interval in milliseconds to trim MDS logs. + default: 1000 + services: + - mds + flags: + - runtime diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index 544ddde8e05ff..5b1146ccd9d59 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -58,6 +58,7 @@ MDLog::MDLog(MDSRank* m) max_events = g_conf().get_val("mds_log_max_events"); skip_corrupt_events = g_conf().get_val("mds_log_skip_corrupt_events"); skip_unbounded_events = g_conf().get_val("mds_log_skip_unbounded_events"); + upkeep_thread = std::thread(&MDLog::log_trim_upkeep, this); } MDLog::~MDLog() @@ -556,6 +557,13 @@ void MDLog::shutdown() } } + upkeep_log_trim_shutdown = true; + cond.notify_one(); + + mds->mds_lock.unlock(); + upkeep_thread.join(); + mds->mds_lock.lock(); + // Replay thread can be stuck inside e.g. Journaler::wait_for_readable, // so we need to shutdown the journaler first. if (journaler) { @@ -606,11 +614,23 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq) } } -void MDLog::trim(int m) +void MDLog::log_trim_upkeep(void) { + dout(10) << dendl; + + std::unique_lock mds_lock(mds->mds_lock); + while (!upkeep_log_trim_shutdown.load()) { + if (mds->is_active() || mds->is_stopping()) { + trim(); + } + + cond.wait_for(mds_lock, g_conf().get_val("mds_log_trim_upkeep_interval")); + } + dout(10) << __func__ << ": finished" << dendl; +} + +void MDLog::trim() { int max_ev = max_events; - if (m >= 0) - max_ev = m; if (mds->mdcache->is_readonly()) { dout(10) << "trim, ignoring read-only FS" << dendl; @@ -794,6 +814,7 @@ int MDLog::trim_all() void MDLog::try_expire(LogSegment *ls, int op_prio) { + ceph_assert(ceph_mutex_is_locked(mds->mds_lock)); MDSGatherBuilder gather_bld(g_ceph_context); ls->try_to_expire(mds, gather_bld, op_prio); diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index c12a4b8d5338d..33eba835c4023 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -147,7 +147,6 @@ public: } void trim_expired_segments(); - void trim(int max=-1); int trim_all(); void create(MDSContext *onfinish); // fresh, empty log! @@ -289,6 +288,9 @@ private: void _trim_expired_segments(); void write_head(MDSContext *onfinish); + void trim(); + void log_trim_upkeep(void); + bool debug_subtrees; std::atomic_uint64_t event_large_threshold; // accessed by submit thread uint64_t events_per_segment; @@ -306,5 +308,11 @@ private: // log trimming decay counter DecayCounter log_trim_counter; + + // log trimming upkeeper thread + std::thread upkeep_thread; + // guarded by mds_lock + std::condition_variable_any cond; + std::atomic upkeep_log_trim_shutdown{false}; }; #endif diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 9bbfc0433fa2e..6f8439470a655 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -743,10 +743,6 @@ void MDSRankDispatcher::tick() // update average session uptime sessionmap.update_average_session_age(); - if (is_active() || is_stopping()) { - mdlog->trim(); // NOT during recovery! - } - // ... if (is_clientreplay() || is_active() || is_stopping()) { server->clear_laggy_clients(); @@ -789,7 +785,6 @@ void MDSRankDispatcher::tick() // shut down? if (is_stopping()) { - mdlog->trim(); if (mdcache->shutdown_pass()) { uint64_t pq_progress = 0 ; uint64_t pq_total = 0; -- 2.39.5