From 218bdb66d27ed2ad941bb98a562df8b5836403c2 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 1 Feb 2023 10:08:19 -0500 Subject: [PATCH] mds: handle conf changes in mdlog Signed-off-by: Patrick Donnelly --- src/common/options/mds.yaml.in | 6 +--- src/mds/Beacon.cc | 8 +++-- src/mds/MDLog.cc | 58 +++++++++++++++++++++++++++------- src/mds/MDLog.h | 21 +++++++++--- src/mds/MDSRank.cc | 8 +++-- src/mds/journal.cc | 2 +- 6 files changed, 76 insertions(+), 27 deletions(-) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 426484a75aa..cfa9cb63540 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -497,7 +497,6 @@ options: default: false services: - mds - with_legacy: true - name: mds_log_skip_corrupt_events type: bool level: dev @@ -516,16 +515,14 @@ options: default: -1 services: - mds - with_legacy: true - name: mds_log_events_per_segment - type: int + type: uint level: advanced desc: maximum number of events in an MDS journal segment default: 1024 min: 1 services: - mds - with_legacy: true # segment size for mds log, default to default file_layout_t - name: mds_log_segment_size type: size @@ -545,7 +542,6 @@ options: min: 8 services: - mds - with_legacy: true - name: mds_log_warn_factor type: float level: advanced diff --git a/src/mds/Beacon.cc b/src/mds/Beacon.cc index 4c232707ce3..861cfa4378f 100644 --- a/src/mds/Beacon.cc +++ b/src/mds/Beacon.cc @@ -328,14 +328,16 @@ void Beacon::notify_health(MDSRank const *mds) // Detect MDS_HEALTH_TRIM condition // Indicates MDS is not trimming promptly { - if (mds->mdlog->get_num_segments() > (size_t)(g_conf()->mds_log_max_segments * g_conf().get_val("mds_log_warn_factor"))) { + const auto log_max_segments = mds->mdlog->get_max_segments(); + const auto log_warn_factor = g_conf().get_val("mds_log_warn_factor"); + if (mds->mdlog->get_num_segments() > (size_t)(log_max_segments * log_warn_factor)) { CachedStackStringStream css; *css << "Behind on trimming (" << mds->mdlog->get_num_segments() - << "/" << g_conf()->mds_log_max_segments << ")"; + << "/" << log_max_segments << ")"; MDSHealthMetric m(MDS_HEALTH_TRIM, HEALTH_WARN, css->strv()); m.metadata["num_segments"] = stringify(mds->mdlog->get_num_segments()); - m.metadata["max_segments"] = stringify(g_conf()->mds_log_max_segments); + m.metadata["max_segments"] = stringify(log_max_segments); health.metrics.push_back(m); } } diff --git a/src/mds/MDLog.cc b/src/mds/MDLog.cc index ecf39af59b0..b870ce0398c 100644 --- a/src/mds/MDLog.cc +++ b/src/mds/MDLog.cc @@ -38,6 +38,20 @@ using namespace std; +MDLog::MDLog(MDSRank* m) + : + mds(m), + replay_thread(this), + recovery_thread(this), + submit_thread(this) +{ + debug_subtrees = g_conf().get_val("mds_debug_subtrees"); + events_per_segment = g_conf().get_val("mds_log_events_per_segment"); + pause = g_conf().get_val("mds_log_pause"); + max_segments = g_conf().get_val("mds_log_max_segments"); + max_events = g_conf().get_val("mds_log_max_events"); +} + MDLog::~MDLog() { if (journaler) { delete journaler; journaler = 0; } @@ -314,11 +328,11 @@ void MDLog::_submit_entry(LogEvent *le, MDSLogContextBase *c) // disambiguate imports. Because the ESubtreeMap reflects the subtree // state when all EImportFinish events are replayed. } else if (ls->end/period != ls->offset/period || - ls->num_events >= g_conf()->mds_log_events_per_segment) { + ls->num_events >= events_per_segment) { dout(10) << "submit_entry also starting new segment: last = " << ls->seq << "/" << ls->offset << ", event seq = " << event_seq << dendl; _start_new_segment(); - } else if (g_conf()->mds_debug_subtrees && + } else if (debug_subtrees && le->get_type() != EVENT_SUBTREEMAP_TEST) { // debug: journal this every time to catch subtree replay bugs. // use a different event id so it doesn't get interpreted as a @@ -358,7 +372,7 @@ void MDLog::_submit_thread() std::unique_lock locker{submit_mutex}; while (!mds->is_daemon_stopping()) { - if (g_conf()->mds_log_pause) { + if (pause) { submit_cond.wait(locker); continue; } @@ -602,19 +616,18 @@ void MDLog::try_to_commit_open_file_table(uint64_t last_seq) void MDLog::trim(int m) { - unsigned max_segments = g_conf()->mds_log_max_segments; - int max_events = g_conf()->mds_log_max_events; + int max_ev = max_events; if (m >= 0) - max_events = m; + max_ev = m; if (mds->mdcache->is_readonly()) { dout(10) << "trim, ignoring read-only FS" << dendl; return; } - // Clamp max_events to not be smaller than events per segment - if (max_events > 0 && max_events <= g_conf()->mds_log_events_per_segment) { - max_events = g_conf()->mds_log_events_per_segment + 1; + // Clamp max_ev to not be smaller than events per segment + if (max_ev > 0 && (uint64_t)max_ev <= events_per_segment) { + max_ev = events_per_segment + 1; } submit_mutex.lock(); @@ -622,7 +635,7 @@ void MDLog::trim(int m) // trim! dout(10) << "trim " << segments.size() << " / " << max_segments << " segments, " - << num_events << " / " << max_events << " events" + << num_events << " / " << max_ev << " events" << ", " << expiring_segments.size() << " (" << expiring_events << ") expiring" << ", " << expired_segments.size() << " (" << expired_events << ") expired" << dendl; @@ -658,7 +671,7 @@ void MDLog::trim(int m) unsigned num_remaining_segments = (segments.size() - expired_segments.size() - expiring_segments.size()); if ((num_remaining_segments <= max_segments) && - (max_events < 0 || num_events - expiring_events - expired_events <= max_events)) + (max_ev < 0 || num_events - expiring_events - expired_events <= max_ev)) break; // Do not trim too many segments at once for peak workload. If mds keeps creating N segments each tick, @@ -1514,3 +1527,26 @@ void MDLog::dump_replay_status(Formatter *f) const f->dump_unsigned("num_segments", get_num_segments()); f->close_section(); } + + +void MDLog::handle_conf_change(const std::set& changed, const MDSMap& mdsmap) +{ + if (changed.count("mds_debug_subtrees")) { + debug_subtrees = g_conf().get_val("mds_debug_subtrees"); + } + if (changed.count("mds_log_events_per_segment")) { + events_per_segment = g_conf().get_val("mds_log_events_per_segment"); + } + if (changed.count("mds_log_max_events")) { + max_events = g_conf().get_val("mds_log_max_events"); + } + if (changed.count("mds_log_max_segments")) { + max_segments = g_conf().get_val("mds_log_max_segments"); + } + if (changed.count("mds_log_pause")) { + pause = g_conf().get_val("mds_log_pause"); + if (!pause) { + kick_submitter(); + } + } +} diff --git a/src/mds/MDLog.h b/src/mds/MDLog.h index f86e6075640..f4cc07492e2 100644 --- a/src/mds/MDLog.h +++ b/src/mds/MDLog.h @@ -48,6 +48,7 @@ enum { #include "common/Thread.h" #include "LogSegment.h" +#include "MDSMap.h" #include #include @@ -61,10 +62,7 @@ class ESubtreeMap; class MDLog { public: - explicit MDLog(MDSRank *m) : mds(m), - replay_thread(this), - recovery_thread(this), - submit_thread(this) {} + MDLog(MDSRank *m); ~MDLog(); const std::set &get_expiring_segments() const @@ -116,6 +114,13 @@ public: size_t get_num_events() const { return num_events; } size_t get_num_segments() const { return segments.size(); } + auto get_debug_subtrees() const { + return events_per_segment; + } + auto get_max_segments() const { + return max_segments; + } + uint64_t get_read_pos() const; uint64_t get_write_pos() const; uint64_t get_safe_pos() const; @@ -166,6 +171,8 @@ public: void standby_trim_segments(); + void handle_conf_change(const std::set& changed, const MDSMap& mds_map); + void dump_replay_status(Formatter *f) const; MDSRank *mds; @@ -305,5 +312,11 @@ private: // -- events -- LogEvent *cur_event = nullptr; + + bool debug_subtrees; + uint64_t events_per_segment; + int64_t max_events; + uint64_t max_segments; + bool pause; }; #endif diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index d6d55adaa14..78146913b83 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -3796,6 +3796,7 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_cache_trim_decay_rate", "mds_cap_acquisition_throttle_retry_request_time", "mds_cap_revoke_eviction_timeout", + "mds_debug_subtrees", "mds_dir_max_entries", "mds_dump_cache_threshold_file", "mds_dump_cache_threshold_formatter", @@ -3811,6 +3812,9 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_inject_journal_corrupt_dentry_first", "mds_inject_migrator_session_race", "mds_inject_rename_corrupt_dentry_first", + "mds_log_events_per_segment", + "mds_log_max_events", + "mds_log_max_segments", "mds_log_pause", "mds_max_caps_per_client", "mds_max_export_size", @@ -3906,12 +3910,10 @@ void MDSRankDispatcher::handle_conf_change(const ConfigProxy& conf, const std::s dout(10) << "flushing conf change to components: " << changed << dendl; - if (changed.count("mds_log_pause") && !g_conf()->mds_log_pause) { - mdlog->kick_submitter(); - } sessionmap.handle_conf_change(changed); server->handle_conf_change(changed); mdcache->handle_conf_change(changed, *mdsmap); + mdlog->handle_conf_change(changed, *mdsmap); purge_queue.handle_conf_change(changed, *mdsmap); })); } diff --git a/src/mds/journal.cc b/src/mds/journal.cc index 4b9e73b5a7e..96e5295ef35 100644 --- a/src/mds/journal.cc +++ b/src/mds/journal.cc @@ -2827,7 +2827,7 @@ void ESubtreeMap::replay(MDSRank *mds) dout(0) << "journal subtrees: " << subtrees << dendl; dout(0) << "journal ambig_subtrees: " << ambiguous_subtrees << dendl; mds->mdcache->show_subtrees(); - ceph_assert(!g_conf()->mds_debug_subtrees || errors == 0); + ceph_assert(!mds->mdlog->get_debug_subtrees() || errors == 0); } return; } -- 2.39.5