]> git.apps.os.sepia.ceph.com Git - xfsprogs-dev.git/commitdiff
xfs_scrub: recheck entire metadata objects after corruption repairs
authorDarrick J. Wong <djwong@kernel.org>
Mon, 29 Jul 2024 23:23:09 +0000 (16:23 -0700)
committerDarrick J. Wong <djwong@kernel.org>
Tue, 30 Jul 2024 00:01:08 +0000 (17:01 -0700)
When we've finished making repairs to some domain of filesystem metadata
(file, AG, etc.) to correct an inconsistency, we should recheck all the
other metadata types within that domain to make sure that we neither
made things worse nor introduced more cross-referencing problems.  If we
did, requeue the item to make the repairs.  If the only changes we made
were optimizations, don't bother.

The XFS_SCRUB_TYPE_ values are getting close to the max for a u32, so
I chose u64 for sri_selected.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
scrub/repair.c
scrub/scrub.c
scrub/scrub.h
scrub/scrub_private.h

index eba936e1fd192b66494bc45401a6a3b949fe0acc..19f5c9052affb21e24fe235b34f9247772b295ce 100644 (file)
@@ -485,8 +485,10 @@ action_item_try_repair(
 {
        struct scrub_item       *sri = &aitem->sri;
        unsigned int            before, after;
+       unsigned int            scrub_type;
        int                     ret;
 
+       BUILD_BUG_ON(sizeof(sri->sri_selected) * NBBY < XFS_SCRUB_TYPE_NR);
        before = repair_item_count_needsrepair(sri);
 
        ret = repair_item(ctx, sri, 0);
@@ -507,6 +509,41 @@ action_item_try_repair(
                return 0;
        }
 
+       /*
+        * Nothing in this fs object was marked inconsistent.  This means we
+        * were merely optimizing metadata and there is no revalidation work to
+        * be done.
+        */
+       if (!sri->sri_inconsistent) {
+               *outcome = TR_REPAIRED;
+               return 0;
+       }
+
+       /*
+        * We fixed inconsistent metadata, so reschedule the entire object for
+        * immediate revalidation to see if anything else went wrong.
+        */
+       foreach_scrub_type(scrub_type)
+               if (sri->sri_selected & (1ULL << scrub_type))
+                       sri->sri_state[scrub_type] = SCRUB_ITEM_NEEDSCHECK;
+       sri->sri_inconsistent = false;
+       sri->sri_revalidate = true;
+
+       ret = scrub_item_check(ctx, sri);
+       if (ret)
+               return ret;
+
+       after = repair_item_count_needsrepair(sri);
+       if (after > 0) {
+               /*
+                * Uhoh, we found something else broken.  Tell the caller that
+                * this item needs to be queued for more repairs.
+                */
+               sri->sri_revalidate = false;
+               *outcome = TR_REQUEUE;
+               return 0;
+       }
+
        /* Repairs complete. */
        *outcome = TR_REPAIRED;
        return 0;
index 69dfb1eb84dc1ca41027b0b7186aea20191fb6b4..2b6b6274e382bd38ae57b2341092cd547ee278b0 100644 (file)
@@ -117,11 +117,12 @@ xfs_check_metadata(
        dbg_printf("check %s flags %xh\n", descr_render(&dsc), meta.sm_flags);
 
        error = -xfrog_scrub_metadata(xfdp, &meta);
-       if (debug_tweak_on("XFS_SCRUB_FORCE_REPAIR") && !error)
-               meta.sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
        switch (error) {
        case 0:
                /* No operational errors encountered. */
+               if (!sri->sri_revalidate &&
+                   debug_tweak_on("XFS_SCRUB_FORCE_REPAIR"))
+                       meta.sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
                break;
        case ENOENT:
                /* Metadata not present, just skip it. */
index 246c923f4905ed77a80d182433a03a15fbfe8624..90578108a1c8df52b3a96e8da996c1f1cb6a842f 100644 (file)
@@ -59,11 +59,20 @@ struct scrub_item {
        __u32                   sri_gen;
        __u32                   sri_agno;
 
+       /* Bitmask of scrub types that were scheduled here. */
+       __u64                   sri_selected;
+
        /* Scrub item state flags, one for each XFS_SCRUB_TYPE. */
        __u8                    sri_state[XFS_SCRUB_TYPE_NR];
 
        /* Track scrub and repair call retries for each scrub type. */
        __u8                    sri_tries[XFS_SCRUB_TYPE_NR];
+
+       /* Were there any corruption repairs needed? */
+       bool                    sri_inconsistent:1;
+
+       /* Are we revalidating after repairs? */
+       bool                    sri_revalidate:1;
 };
 
 #define foreach_scrub_type(loopvar) \
@@ -103,6 +112,7 @@ static inline void
 scrub_item_schedule(struct scrub_item *sri, unsigned int scrub_type)
 {
        sri->sri_state[scrub_type] = SCRUB_ITEM_NEEDSCHECK;
+       sri->sri_selected |= (1ULL << scrub_type);
 }
 
 void scrub_item_schedule_group(struct scrub_item *sri,
index 234b30ef2b811d99f8578e099ff83fb997e2cece..bcfabda16be153154a97c76b5a91f4537543ed27 100644 (file)
@@ -71,6 +71,8 @@ scrub_item_save_state(
        unsigned  int                   scrub_flags)
 {
        sri->sri_state[scrub_type] = scrub_flags & SCRUB_ITEM_REPAIR_ANY;
+       if (scrub_flags & SCRUB_ITEM_NEEDSREPAIR)
+               sri->sri_inconsistent = true;
 }
 
 static inline void