mds: skip scrubbing damaged dirfrag

author Patrick Donnelly <pdonnell@ibm.com>

Tue, 28 Jan 2025 22:30:19 +0000 (17:30 -0500)

committer Patrick Donnelly <pdonnell@ibm.com>

Thu, 30 Jan 2025 04:19:53 +0000 (23:19 -0500)
author Patrick Donnelly <pdonnell@ibm.com>
Tue, 28 Jan 2025 22:30:19 +0000 (17:30 -0500)
committer Patrick Donnelly <pdonnell@ibm.com>
Thu, 30 Jan 2025 04:19:53 +0000 (23:19 -0500)
diff --git a/src/mds/CInode.cc b/src/mds/CInode.cc

index dfad411d323d8b416ccb6819cc888dfc25aa1054..15ddd0f1e2d551c1f2f4b9a73fc09100de5f5afa 100644 (file)
--- a/src/mds/CInode.cc
+++ b/src/mds/CInode.cc
@@ -4999,9 +4999,15 @@ next:
        // check each dirfrag...
        for (const auto &p : in->dirfrags) {
         CDir *dir = p.second;
-       ceph_assert(dir->get_version() > 0);
-       nest_info.add(dir->get_fnode()->accounted_rstat);
-       dir_info.add(dir->get_fnode()->accounted_fragstat);
+        /* If the dirfrag is damaged, we can not do any checks on the fragment. */
+        if (in->mdcache->mds->damage_table.is_dirfrag_damaged(dir)) {
+          results->raw_stats.error_str << "one or more dirfrags are damaged";
+          goto next;
+        } else {
+         ceph_assert(dir->get_version() > 0);
+         nest_info.add(dir->get_fnode()->accounted_rstat);
+         dir_info.add(dir->get_fnode()->accounted_fragstat);
+        }
        }
        nest_info.rsubdirs++; // it gets one to account for self
        if (const sr_t *srnode = in->get_projected_srnode(); srnode)
@@ -5015,9 +5021,6 @@ next:
             << "freshly-calculated rstats don't match existing ones (will be fixed)";
           in->mdcache->repair_inode_stats(in);
            results->raw_stats.repaired = true;
-          for (const auto &p : in->dirfrags){
-            in->mdcache->mds->damage_table.remove_dirfrag_damage_entry(p.second);
-          }
         } else {
           results->raw_stats.error_str
             << "freshly-calculated rstats don't match existing ones";
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc

index 04e1fd7111952eb39020368d53f2b6e5721222f1..06b4c2a2af0ebc5493f6ac3e4797efa96efa2fa9 100644 (file)
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -13534,6 +13534,10 @@ void MDCache::repair_inode_stats_work(const MDRequestRef& mdr)
          ceph_assert(mdr->is_auth_pinned(diri));
          dir = diri->get_or_open_dirfrag(this, leaf);
        }
+      if (mds->damage_table.is_dirfrag_damaged(dir)) {
+        mds->server->respond_to_request(mdr, -CEPHFS_EIO);
+        return;
+      }
        if (dir->get_version() == 0) {
          ceph_assert(dir->is_auth());
          dir->fetch_keys({}, new C_MDS_RetryRequest(this, mdr));
diff --git a/src/mds/ScrubStack.cc b/src/mds/ScrubStack.cc

index 7ec77a31de3ddd6353ce39eee2f537f06d022e57..974e0f1ba142235d63baf44102c4132f7fafbd7e 100644 (file)
--- a/src/mds/ScrubStack.cc
+++ b/src/mds/ScrubStack.cc
@@ -382,7 +382,15 @@ void ScrubStack::scrub_dir_inode(CInode *in, bool *added_children, bool *done)
      if (queued.contains(fg))
        continue;
      CDir *dir = in->get_or_open_dirfrag(mdcache, fg);
-    if (!dir->is_auth()) {
+    if (mds->damage_table.is_dirfrag_damaged(dir)) {
+      /* N.B.: we are cowardly (and ironically) not looking at dirfrags we've
+       * noted as damaged already. The state of the dirfrag will be missing an
+       * omap (or object) or the fnode is corrupt. Neither situation the MDS
+       * presently knows how to recover from. So skip it for now.
+       */
+      dout(5) << __func__ << ": not scrubbing damaged dirfrag: " << *dir << dendl;
+      continue;
+    } else if (!dir->is_auth()) {
        if (dir->is_ambiguous_auth()) {
         dout(20) << __func__ << " ambiguous auth " << *dir  << dendl;
         dir->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, gather.new_sub());
author	Patrick Donnelly <pdonnell@ibm.com>
	Tue, 28 Jan 2025 22:30:19 +0000 (17:30 -0500)
committer	Patrick Donnelly <pdonnell@ibm.com>
	Thu, 30 Jan 2025 04:19:53 +0000 (23:19 -0500)
src/mds/CInode.cc		patch \| blob \| history
src/mds/MDCache.cc		patch \| blob \| history
src/mds/ScrubStack.cc		patch \| blob \| history