From f48b04d3755393d55f29442e558cfd04c8583870 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Wed, 21 Feb 2024 10:03:14 -0500 Subject: [PATCH] mds: add counter to throttle quiesce So a storm of quiesce operations do not affect normal MDS operations. Signed-off-by: Patrick Donnelly (cherry picked from commit f4eec857fc8bda4980b9046cde6314fa005122c2) --- src/common/options/mds.yaml.in | 27 +++++++++++++++++++++++++++ src/mds/MDCache.cc | 23 ++++++++++++++++++++++- src/mds/MDCache.h | 3 +++ src/mds/MDSRank.cc | 3 +++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/src/common/options/mds.yaml.in b/src/common/options/mds.yaml.in index 6b7ef89080a08..d25e7b52edb04 100644 --- a/src/common/options/mds.yaml.in +++ b/src/common/options/mds.yaml.in @@ -163,6 +163,33 @@ options: - mds flags: - runtime +- name: mds_cache_quiesce_decay_rate + type: float + level: advanced + desc: decay rate for quiescing inodes throttle + default: 1 + services: + - mds + flags: + - runtime +- name: mds_cache_quiesce_threshold + type: size + level: advanced + desc: threshold for number of inodes that can be quiesced + default: 512_K + services: + - mds + flags: + - runtime +- name: mds_cache_quiesce_sleep + type: millisecs + level: advanced + desc: sleep time for request after passing quiesce threshold + default: 200 + services: + - mds + flags: + - runtime - name: mds_max_file_recover type: uint level: advanced diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 2d2b0273e2c4e..8a3dd21d57be5 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -122,7 +122,10 @@ MDCache::MDCache(MDSRank *m, PurgeQueue &purge_queue_) : filer(m->objecter, m->finisher), stray_manager(m, purge_queue_), recovery_queue(m), - trim_counter(g_conf().get_val("mds_cache_trim_decay_rate")) + trim_counter(g_conf().get_val("mds_cache_trim_decay_rate")), + quiesce_counter(g_conf().get_val("mds_cache_trim_decay_rate")), + quiesce_threshold(g_conf().get_val("mds_cache_quiesce_threshold")), + quiesce_sleep(g_conf().get_val("mds_cache_quiesce_sleep")) { migrator.reset(new Migrator(mds, this)); @@ -195,6 +198,15 @@ void MDCache::handle_conf_change(const std::set& changed, const MDS cache_health_threshold = g_conf().get_val("mds_health_cache_threshold"); if (changed.count("mds_cache_mid")) lru.lru_set_midpoint(g_conf().get_val("mds_cache_mid")); + if (changed.count("mds_cache_quiesce_decay_rate")) { + quiesce_counter = DecayCounter(g_conf().get_val("mds_cache_quiesce_decay_rate")); + } + if (changed.count("mds_cache_quiesce_threshold")) { + quiesce_threshold = g_conf().get_val("mds_cache_quiesce_threshold"); + } + if (changed.count("mds_cache_quiesce_sleep")) { + quiesce_sleep = g_conf().get_val("mds_cache_quiesce_sleep"); + } if (changed.count("mds_cache_trim_decay_rate")) { trim_counter = DecayCounter(g_conf().get_val("mds_cache_trim_decay_rate")); } @@ -13530,6 +13542,15 @@ void MDCache::dispatch_quiesce_inode(const MDRequestRef& mdr) dout(20) << __func__ << " " << *mdr << " quiescing " << *in << dendl; + if (quiesce_counter.get() > quiesce_threshold) { + dout(20) << __func__ + << " quiesce counter " << quiesce_counter + << " threshold (" << quiesce_threshold + << ") reached: scheduling retry" << dendl; + mds->timer.add_event_after(quiesce_sleep, new C_MDS_RetryRequest(this, mdr)); + return; + } + quiesce_counter.hit(); { /* Acquire authpins on `in` to prevent migrations after this rank considers diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index ba91a24e1451c..ab34d238eafb2 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -1489,6 +1489,9 @@ private: uint64_t kill_shutdown_at = 0; std::map quiesced_subvolumes; + DecayCounter quiesce_counter; + uint64_t quiesce_threshold; + std::chrono::milliseconds quiesce_sleep; }; class C_MDS_RetryRequest : public MDSInternalContext { diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index 209de7cb86e30..c9e50d79fa209 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -4020,6 +4020,9 @@ const char** MDSRankDispatcher::get_tracked_conf_keys() const "mds_cache_memory_limit", "mds_cache_mid", "mds_cache_reservation", + "mds_cache_quiesce_decay_rate", + "mds_cache_quiesce_threshold", + "mds_cache_quiesce_sleep", "mds_cache_trim_decay_rate", "mds_cap_acquisition_throttle_retry_request_time", "mds_cap_revoke_eviction_timeout", -- 2.39.5