From: Leonid Usov Date: Wed, 25 Oct 2023 10:04:14 +0000 (+0300) Subject: quincy: mds/MDBalancer: ignore queued callbacks if MDS is not active X-Git-Tag: v17.2.8~578^2 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=cd49431b77b51a91e411cf0218800ea4874307d5;p=ceph.git quincy: mds/MDBalancer: ignore queued callbacks if MDS is not active Signed-off-by: Leonid Usov Fixes: https://tracker.ceph.com/issues/63480 Original-Issue: https://tracker.ceph.com/issues/63281 Original-PR: https://github.com/ceph/ceph/pull/54178 (cherry picked from commit 41406dc3ec5940ec8b9db8dcbf8e4640ce5c3eae) --- diff --git a/src/mds/MDBalancer.cc b/src/mds/MDBalancer.cc index cb63f605444..89e0a849952 100644 --- a/src/mds/MDBalancer.cc +++ b/src/mds/MDBalancer.cc @@ -550,7 +550,8 @@ double MDBalancer::try_match(balance_state_t& state, mds_rank_t ex, double& maxe void MDBalancer::queue_split(const CDir *dir, bool fast) { - dout(10) << __func__ << " enqueuing " << *dir + constexpr const auto &_func_ = __func__; + dout(10) << _func_ << " enqueuing " << *dir << " (fast=" << fast << ")" << dendl; const dirfrag_t df = dir->dirfrag(); @@ -564,6 +565,16 @@ void MDBalancer::queue_split(const CDir *dir, bool fast) return; } + if (mds->is_stopping()) { + // not a good time. This could have been (!mds->is_active()) + // or at least (mds->is_stopping() || mds->is_stopped()), but + // is_stopped() is never true because an MDS respawns as soon as it's removed from the map; + // the narrow is_stopping check is to avoid potential regressions + // due to unknown coupling with other parts of the MDS (especially multiple ranks). + dout(5) << "ignoring the " << _func_ << " callback because the MDS state is '" << ceph_mds_state_name(mds->get_state()) << "'" << dendl; + return; + } + auto mdcache = mds->mdcache; CDir *dir = mdcache->get_dirfrag(df); @@ -578,7 +589,7 @@ void MDBalancer::queue_split(const CDir *dir, bool fast) // Pass on to MDCache: note that the split might still not // happen if the checks in MDCache::can_fragment fail. - dout(10) << __func__ << " splitting " << *dir << dendl; + dout(10) << _func_ << " splitting " << *dir << dendl; int bits = g_conf()->mds_bal_split_bits; if (dir->inode->is_ephemeral_dist()) { unsigned min_frag_bits = mdcache->get_ephemeral_dist_frag_bits(); @@ -608,6 +619,7 @@ void MDBalancer::queue_split(const CDir *dir, bool fast) void MDBalancer::queue_merge(CDir *dir) { const auto frag = dir->dirfrag(); + constexpr const auto &_func_ = __func__; auto callback = [this, frag](int r) { ceph_assert(frag.frag != frag_t()); @@ -616,6 +628,16 @@ void MDBalancer::queue_merge(CDir *dir) // starting one), and this context is the only one that erases it. merge_pending.erase(frag); + if (mds->is_stopping()) { + // not a good time. This could have been (!mds->is_active()) + // or at least (mds->is_stopping() || mds->is_stopped()), but + // is_stopped() is never true because an MDS respawns as soon as it's removed from the map; + // the narrow is_stopping check is to avoid potential regressions + // due to unknown coupling with other parts of the MDS (especially multiple ranks). + dout(5) << "ignoring the " << _func_ << " callback because the MDS state is '" << ceph_mds_state_name(mds->get_state()) << "'" << dendl; + return; + } + auto mdcache = mds->mdcache; CDir *dir = mdcache->get_dirfrag(frag); if (!dir) {