]> git.apps.os.sepia.ceph.com Git - xfstests-dev.git/commitdiff
xfs: test rebuilding the entire filesystem with online fsck
authorDarrick J. Wong <djwong@kernel.org>
Fri, 30 Dec 2022 22:19:12 +0000 (14:19 -0800)
committerZorro Lang <zlang@kernel.org>
Sat, 18 Feb 2023 06:30:36 +0000 (14:30 +0800)
Add a new knob, TEST_XFS_SCRUB_REBUILD, that makes it so that we use
xfs_scrub to rebuild the ondisk metadata after every test.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
README
common/fuzzy
common/rc
common/xfs

diff --git a/README b/README
index 4c4f22f853dee560ac07ce26b72746dda21b79dc..744317625fb18ea40daa4ce8e77dc22d22627ae4 100644 (file)
--- a/README
+++ b/README
@@ -191,6 +191,9 @@ Extra XFS specification:
    to check the filesystem. As of August 2021, xfs_repair finds all
    filesystem corruptions found by xfs_check, and more, which means that
    xfs_check is no longer run by default.
+ - Set TEST_XFS_SCRUB_REBUILD=1 to have _check_xfs_filesystem run xfs_scrub in
+   "force_repair" mode to rebuild the filesystem; and xfs_repair -n to check
+   the results of the rebuilding.
  - xfs_scrub, if present, will always check the test and scratch
    filesystems if they are still online at the end of the test. It is no
    longer necessary to set TEST_XFS_SCRUB.
index 7670a093d76b6fb2f379d6b46a336db5cd2d0bec..cc775b62be57ab20a421d61361d000590ef43128 100644 (file)
@@ -975,6 +975,7 @@ __scratch_xfs_stress_setup_force_rebuild() {
 # and wait for 30*TIME_FACTOR seconds to see if the filesystem goes down.
 # Same requirements and arguments as _scratch_xfs_stress_scrub.
 _scratch_xfs_stress_online_repair() {
+       touch "$RESULT_DIR/.skip_orebuild"      # no need to test online rebuild
        __scratch_xfs_stress_setup_force_rebuild
        XFS_SCRUB_FORCE_REPAIR=1 _scratch_xfs_stress_scrub "$@"
 }
index 16ef36afd174ed99247912f172768f35335b8bf3..6852af79b40741459d9d206971ededb4f203a4cd 100644 (file)
--- a/common/rc
+++ b/common/rc
@@ -1691,7 +1691,7 @@ _require_scratch_nocheck()
             exit 1
         fi
     fi
-    rm -f ${RESULT_DIR}/require_scratch
+    rm -f ${RESULT_DIR}/require_scratch "$RESULT_DIR/.skip_orebuild"
 }
 
 # we need the scratch device and it needs to not be an lvm device
index 97c049e2ca1cb4fcde6f048ef4a960cabfb03ba8..0d4855ea682c04adb1da3b788495f1a4e9c04f3f 100644 (file)
@@ -682,6 +682,8 @@ _scratch_xfs_mdrestore()
 # run xfs_check and friends on a FS.
 _check_xfs_filesystem()
 {
+       local can_scrub=
+
        if [ $# -ne 3 ]; then
                echo "Usage: _check_xfs_filesystem device <logdev>|none <rtdev>|none" 1>&2
                exit 1
@@ -716,6 +718,8 @@ _check_xfs_filesystem()
        # Run online scrub if we can.
        mntpt="$(_is_dev_mounted $device)"
        if [ -n "$mntpt" ] && _supports_xfs_scrub "$mntpt" "$device"; then
+               can_scrub=1
+
                # Tests can create a scenario in which a call to syncfs() issued
                # at the end of the execution of the test script would return an
                # error code. xfs_scrub internally calls syncfs() before
@@ -832,6 +836,79 @@ _check_xfs_filesystem()
                _mount_or_remount_rw "$extra_mount_options" $device $mountpoint
        fi
 
+       # If desired, test the online metadata rebuilding behavior if the
+       # filesystem was mounted when this function was called.
+       if [ -n "$TEST_XFS_SCRUB_REBUILD" ] && [ -n "$can_scrub" ] && [ ! -e "$RESULT_DIR/.skip_orebuild" ]; then
+               orebuild_ok=1
+
+               # Walk the entire directory tree to load directory blocks into
+               # memory and populate the dentry cache, which can speed up the
+               # repairs considerably when the directory tree is very large.
+               find $mntpt &>/dev/null &
+
+               XFS_SCRUB_FORCE_REPAIR=1 "$XFS_SCRUB_PROG" -v -d $mntpt > $tmp.scrub 2>&1
+               if [ $? -ne 0 ]; then
+                       if grep -q 'No space left on device' $tmp.scrub; then
+                               # It's not an error if the fs does not have
+                               # enough space to complete a repair.  We will
+                               # check everything, though.
+                               echo "*** XFS_SCRUB_FORCE_REPAIR=1 xfs_scrub -v -d ran out of space ***" >> $seqres.full
+                               cat $tmp.scrub >> $seqres.full
+                               echo "*** end xfs_scrub output" >> $seqres.full
+                       else
+                               _log_err "_check_xfs_filesystem: filesystem on $device failed scrub orebuild"
+                               echo "*** XFS_SCRUB_FORCE_REPAIR=1 xfs_scrub -v -d output ***" >> $seqres.full
+                               cat $tmp.scrub >> $seqres.full
+                               echo "*** end xfs_scrub output" >> $seqres.full
+                               ok=0
+                               orebuild_ok=0
+                       fi
+               fi
+               rm -f $tmp.scrub
+
+               # Clear force_repair because xfs_scrub could have set it
+               $XFS_IO_PROG -x -c 'inject noerror' "$mntpt" >> $seqres.full
+
+               "$XFS_SCRUB_PROG" -v -d -n $mntpt > $tmp.scrub 2>&1
+               if [ $? -ne 0 ]; then
+                       _log_err "_check_xfs_filesystem: filesystem on $device failed scrub orebuild recheck"
+                       echo "*** xfs_scrub -v -d -n output ***" >> $seqres.full
+                       cat $tmp.scrub >> $seqres.full
+                       echo "*** end xfs_scrub output" >> $seqres.full
+                       ok=0
+                       orebuild_ok=0
+               fi
+               rm -f $tmp.scrub
+
+               mountpoint=`_umount_or_remount_ro $device`
+
+               $XFS_REPAIR_PROG -n $extra_options $extra_log_options $extra_rt_options $device >$tmp.repair 2>&1
+               if [ $? -ne 0 ]; then
+                       _log_err "_check_xfs_filesystem: filesystem on $device is inconsistent (orebuild-reverify)"
+                       echo "*** xfs_repair -n output ***"     >>$seqres.full
+                       cat $tmp.repair                         >>$seqres.full
+                       echo "*** end xfs_repair output"        >>$seqres.full
+
+                       ok=0
+                       orebuild_ok=0
+               fi
+               rm -f $tmp.repair
+
+               if [ $ok -eq 0 ]; then
+                       echo "*** mount output ***"             >>$seqres.full
+                       _mount                                  >>$seqres.full
+                       echo "*** end mount output"             >>$seqres.full
+               elif [ "$type" = "xfs" ]; then
+                       _mount_or_remount_rw "$extra_mount_options" $device $mountpoint
+               fi
+
+               if [ "$orebuild_ok" -ne 1 ] && [ "$DUMP_CORRUPT_FS" = "1" ]; then
+                       local flatdev="$(basename "$device")"
+                       _xfs_metadump "$seqres.$flatdev.orebuild.md" "$device" \
+                               "$logdev" compress >> $seqres.full
+               fi
+       fi
+
        if [ $ok -eq 0 ]; then
                status=1
                if [ "$iam" != "check" ]; then