From: Patrick Donnelly Date: Wed, 24 Apr 2024 19:35:14 +0000 (-0400) Subject: mds: use regular dispatch for processing metrics X-Git-Tag: v20.0.0~1886^2~1 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=ed1fe9909338bc1bc0a29df22666e9ba11fa52fe;p=ceph.git mds: use regular dispatch for processing metrics There have been cases where the MDS does an undesirable failover because it misses heartbeat resets after a long recovery in up:replay. It was observed that the MDS was processing a flood of metrics messages from all reconnecting clients. This likely caused undersiable MetricAggregator::lock contention in the messenger threads while fast dispatching client metrics. Instead, use the normal dispatch where acquiring locks is okay to do. See-also: linux.git/f7c2f4f6ce16fb58f7d024f3e1b40023c4b43ff9 Fixes: https://tracker.ceph.com/issues/65658 Signed-off-by: Patrick Donnelly --- diff --git a/src/mds/MetricAggregator.cc b/src/mds/MetricAggregator.cc index f2e520792b5a..547579169d1f 100644 --- a/src/mds/MetricAggregator.cc +++ b/src/mds/MetricAggregator.cc @@ -126,15 +126,6 @@ void MetricAggregator::shutdown() { } } -bool MetricAggregator::ms_can_fast_dispatch2(const cref_t &m) const { - return m->get_type() == MSG_MDS_METRICS; -} - -void MetricAggregator::ms_fast_dispatch2(const ref_t &m) { - bool handled = ms_dispatch2(m); - ceph_assert(handled); -} - bool MetricAggregator::ms_dispatch2(const ref_t &m) { if (m->get_type() == MSG_MDS_METRICS && m->get_connection()->get_peer_type() == CEPH_ENTITY_TYPE_MDS) { diff --git a/src/mds/MetricAggregator.h b/src/mds/MetricAggregator.h index 6d48756f76dc..72c37217e624 100644 --- a/src/mds/MetricAggregator.h +++ b/src/mds/MetricAggregator.h @@ -34,11 +34,6 @@ public: void notify_mdsmap(const MDSMap &mdsmap); - bool ms_can_fast_dispatch_any() const override { - return true; - } - bool ms_can_fast_dispatch2(const cref_t &m) const override; - void ms_fast_dispatch2(const ref_t &m) override; bool ms_dispatch2(const ref_t &m) override; void ms_handle_connect(Connection *c) override {