From: Milind Changire Date: Tue, 26 Sep 2023 10:50:50 +0000 (+0530) Subject: mds/scrub: enqueue all child frags for a given fragset X-Git-Tag: testing/wip-batrick-testing-20240411.154038~648^2 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=c20b4d706dfe32e08c5301676fd9144b052a4347;p=ceph-ci.git mds/scrub: enqueue all child frags for a given fragset Problem: fragsets sent over to replicas for scrubbing are simplified i.e. they are bit representation of the lease common ancestors of the frags that need scrubbing on that replica. A search operation of a frag in the frasget often fails to match exactly with the frags delegated to the replica causing the scrub item to infinitely be held in the scrub stack. Solution: Test if the frag in the fragset sent over to the replica contains the delegated frag as a child to accept it for scrubbing. Fixes: https://tracker.ceph.com/issues/62658 Signed-off-by: Milind Changire --- diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc index 6d799343f14..047bf3ba822 100644 --- a/src/mds/ScrubStack.cc +++ b/src/mds/ScrubStack.cc @@ -892,22 +892,30 @@ void ScrubStack::handle_scrub(const cref_t &m) std::vector dfs; MDSGatherBuilder gather(g_ceph_context); + frag_vec_t frags; + diri->dirfragtree.get_leaves(frags); for (const auto& fg : m->get_frags()) { - CDir *dir = diri->get_dirfrag(fg); - if (!dir) { - dout(10) << __func__ << " no frag " << fg << dendl; - continue; - } - if (!dir->is_auth()) { - dout(10) << __func__ << " not auth " << *dir << dendl; - continue; - } - if (!dir->can_auth_pin()) { - dout(10) << __func__ << " can't auth pin " << *dir << dendl; - dir->add_waiter(CDir::WAIT_UNFREEZE, gather.new_sub()); - continue; + for (auto f : frags) { + if (!fg.contains(f)) { + dout(20) << __func__ << " skipping " << f << dendl; + continue; + } + CDir *dir = diri->get_or_open_dirfrag(mdcache, f); + if (!dir) { + dout(10) << __func__ << " no frag " << f << dendl; + continue; + } + if (!dir->is_auth()) { + dout(10) << __func__ << " not auth " << *dir << dendl; + continue; + } + if (!dir->can_auth_pin()) { + dout(10) << __func__ << " can't auth pin " << *dir << dendl; + dir->add_waiter(CDir::WAIT_UNFREEZE, gather.new_sub()); + continue; + } + dfs.push_back(dir); } - dfs.push_back(dir); } if (gather.has_subs()) {