From 147e561d7122e53c0c6d7fc917e1c9503de53252 Mon Sep 17 00:00:00 2001 From: Patrick Donnelly Date: Mon, 8 Nov 2021 14:55:26 -0500 Subject: [PATCH] mds: defer messages to bootstrapping ranks This is necessary with msgr protocol v2. The protocol no longer updates the server "myname" for each message. It's associated for a connection. For this reason, a newly starting rank (failover or new) updating its messenger "myname" races with other ranks trying to connect to it. We need those ranks to hold off on connecting until the rank reaches a known good state. Fixes: https://tracker.ceph.com/issues/53194 Signed-off-by: Patrick Donnelly (cherry picked from commit 23a6c256162563b41c0212f2233d940cf22ce5b1) Conflicts: src/mds/MDSRank.h: trivial --- src/mds/MDSMap.h | 3 +++ src/mds/MDSRank.cc | 31 +++++++++++++++++++++++++++++++ src/mds/MDSRank.h | 4 ++++ 3 files changed, 38 insertions(+) diff --git a/src/mds/MDSMap.h b/src/mds/MDSMap.h index f583e22ea88a..eefc3b70aa02 100644 --- a/src/mds/MDSMap.h +++ b/src/mds/MDSMap.h @@ -456,6 +456,9 @@ public: } bool is_boot(mds_rank_t m) const { return get_state(m) == STATE_BOOT; } + bool is_bootstrapping(mds_rank_t m) const { + return is_creating(m) || is_starting(m) || is_replay(m); + } bool is_creating(mds_rank_t m) const { return get_state(m) == STATE_CREATING; } bool is_starting(mds_rank_t m) const { return get_state(m) == STATE_STARTING; } bool is_replay(mds_rank_t m) const { return get_state(m) == STATE_REPLAY; } diff --git a/src/mds/MDSRank.cc b/src/mds/MDSRank.cc index e2eab04c9d6f..d850c1741ac3 100644 --- a/src/mds/MDSRank.cc +++ b/src/mds/MDSRank.cc @@ -1419,12 +1419,28 @@ void MDSRank::send_message(const ref_t& m, const ConnectionRef& c) c->send_message2(m); } +class C_MDS_RetrySendMessageMDS : public MDSInternalContext { +public: + C_MDS_RetrySendMessageMDS(MDSRank* mds, mds_rank_t who, ref_t m) + : MDSInternalContext(mds), who(who), m(std::move(m)) {} + void finish(int r) override { + mds->send_message_mds(m, who); + } +private: + mds_rank_t who; + ref_t m; +}; + void MDSRank::send_message_mds(const ref_t& m, mds_rank_t mds) { if (!mdsmap->is_up(mds)) { dout(10) << "send_message_mds mds." << mds << " not up, dropping " << *m << dendl; return; + } else if (mdsmap->is_bootstrapping(mds)) { + dout(5) << __func__ << "mds." << mds << " is bootstrapping, deferring " << *m << dendl; + wait_for_bootstrapped_peer(mds, new C_MDS_RetrySendMessageMDS(this, mds, m)); + return; } // send mdsmap first? @@ -2440,6 +2456,21 @@ void MDSRankDispatcher::handle_mds_map( } } + // did someone leave a "bootstrapping" state? We can't connect until then to + // allow messenger "myname" updates. + { + std::vector erase; + for (auto& [rank, queue] : waiting_for_bootstrapping_peer) { + auto state = mdsmap->get_state(rank); + if (state > MDSMap::STATE_REPLAY) { + queue_waiters(queue); + erase.push_back(rank); + } + } + for (const auto& rank : erase) { + waiting_for_bootstrapping_peer.erase(rank); + } + } // for testing... if (unlikely(g_conf().get_val("mds_connect_bootstrapping"))) { std::set bootstrapping; diff --git a/src/mds/MDSRank.h b/src/mds/MDSRank.h index 4a5a7dd9305f..7fe06420e8e7 100644 --- a/src/mds/MDSRank.h +++ b/src/mds/MDSRank.h @@ -303,6 +303,9 @@ class MDSRank { void send_message_client(const ref_t& m, Session* session); void send_message(const ref_t& m, const ConnectionRef& c); + void wait_for_bootstrapped_peer(mds_rank_t who, MDSContext *c) { + waiting_for_bootstrapping_peer[who].push_back(c); + } void wait_for_active_peer(mds_rank_t who, MDSContext *c) { waiting_for_active_peer[who].push_back(c); } @@ -587,6 +590,7 @@ class MDSRank { bool replaying_requests_done = false; map waiting_for_active_peer; + map waiting_for_bootstrapping_peer; map waiting_for_mdsmap; epoch_t osd_epoch_barrier = 0; -- 2.47.3