From: Darrick J. Wong Date: Tue, 10 Mar 2026 03:53:15 +0000 (-0700) Subject: xfs: test xfs_healer wont repair the wrong filesystem X-Git-Tag: v2026.03.20~4 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=17fdaa0ba3f1e36fec1514f8a511898b13897a86;p=xfstests-dev.git xfs: test xfs_healer wont repair the wrong filesystem Make sure that when xfs_healer needs to reopen a filesystem to repair it, it won't latch on to another xfs filesystem that has been mounted atop the same mountpoint. Signed-off-by: Darrick J. Wong Reviewed-by: Zorro Lang Signed-off-by: Zorro Lang --- diff --git a/tests/xfs/664 b/tests/xfs/664 new file mode 100755 index 00000000..4d746fb2 --- /dev/null +++ b/tests/xfs/664 @@ -0,0 +1,135 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2025-2026 Oracle. All Rights Reserved. +# +# FS QA Test No. 664 +# +# Ensure that autonomous self healing won't fix the wrong filesystem if a +# snapshot of the original filesystem is now mounted on the same directory as +# the original. +# +. ./common/preamble +_begin_fstest auto selfhealing + +. ./common/filter +. ./common/fuzzy +. ./common/systemd + +_cleanup() +{ + command -v _kill_fsstress &>/dev/null && _kill_fsstress + cd / + rm -r -f $tmp.* + test -e "$mntpt" && _unmount "$mntpt" &>/dev/null + test -e "$mntpt" && _unmount "$mntpt" &>/dev/null + test -e "$loop1" && _destroy_loop_device "$loop1" + test -e "$loop2" && _destroy_loop_device "$loop2" + test -e "$testdir" && rm -r -f "$testdir" +} + +_require_test +_require_scrub +_require_xfs_io_command "repair" # online repair support +_require_xfs_db_command "blocktrash" +_require_command "$XFS_HEALER_PROG" "xfs_healer" +_require_command "$XFS_PROPERTY_PROG" "xfs_property" + +testdir=$TEST_DIR/$seq +mntpt=$testdir/mount +disk1=$testdir/disk1 +disk2=$testdir/disk2 + +mkdir -p "$mntpt" +$XFS_IO_PROG -f -c "truncate 300m" $disk1 +$XFS_IO_PROG -f -c "truncate 300m" $disk2 +loop1="$(_create_loop_device "$disk1")" + +filter_mntpt() { + sed -e "s|$mntpt|MNTPT|g" +} + +_mkfs_dev "$loop1" >> $seqres.full +_mount "$loop1" "$mntpt" || _notrun "cannot mount victim filesystem" + +_xfs_has_feature $mntpt rmapbt || \ + _notrun "reverse mapping required to test directory auto-repair" +_xfs_has_feature $mntpt parent || \ + _notrun "parent pointers required to test directory auto-repair" +_require_xfs_healer $mntpt --repair + +# Configure the filesystem for automatic repair of the filesystem. +$XFS_PROPERTY_PROG $mntpt set autofsck=repair >> $seqres.full + +# Create a largeish directory +dblksz=$(_xfs_get_dir_blocksize "$mntpt") +echo testdata > $mntpt/a +mkdir -p "$mntpt/some/victimdir" +for ((i = 0; i < (dblksz / 255); i++)); do + fname="$(printf "%0255d" "$i")" + ln $mntpt/a $mntpt/some/victimdir/$fname +done + +# Did we get at least two dir blocks? +dirsize=$(stat -c '%s' $mntpt/some/victimdir) +test "$dirsize" -gt "$dblksz" || echo "failed to create two-block directory" + +# Clone the fs, break the directory, remount filesystem +_unmount "$mntpt" + +cp --sparse=always "$disk1" "$disk2" || _fail "cannot copy disk1" +loop2="$(_create_loop_device_like_bdev "$disk2" "$loop1")" + +$XFS_DB_PROG "$loop1" -x \ + -c 'path /some/victimdir' \ + -c 'bmap' \ + -c 'dblock 1' \ + -c 'blocktrash -z -0 -o 0 -x 2048 -y 2048 -n 2048' >> $seqres.full +_mount "$loop1" "$mntpt" || _fail "cannot mount broken fs" + +_invoke_xfs_healer "$mntpt" "$tmp.healer" --repair + +# Stop the healer process so that it can't read error events while we do some +# shenanigans. +test -n "$XFS_HEALER_PID" || _fail "nobody set XFS_HEALER_PID?" +kill -STOP $XFS_HEALER_PID + +echo "LOG $XFS_HEALER_PID SO FAR:" >> $seqres.full +cat $tmp.healer >> $seqres.full + +# Access the broken directory to trigger a repair event, which will not yet be +# processed. +ls $mntpt/some/victimdir > /dev/null 2> $tmp.err +filter_mntpt < $tmp.err + +ps auxfww | grep xfs_healer >> $seqres.full + +echo "LOG AFTER TRYING TO POKE:" >> $seqres.full +cat $tmp.healer >> $seqres.full + +# Mount the clone filesystem to the same mountpoint so that the healer cannot +# actually reopen it to perform repairs. +_mount "$loop2" "$mntpt" -o nouuid || _fail "cannot mount decoy fs" + +grep -w xfs /proc/mounts >> $seqres.full + +# Continue the healer process so it can handle events now. Wait a few seconds +# while it fails to reopen disk1's mount point to repair things. +kill -CONT $XFS_HEALER_PID +sleep 2 + +new_dir_unmount() { + _unmount "$mntpt" + _unmount "$mntpt" +} + +# Unmount to kill the healer +_kill_xfs_healer new_dir_unmount +echo "LOG AFTER FAILURE" >> $seqres.full +cat $tmp.healer >> $seqres.full + +# Did the healer log complaints about not being able to reopen the mountpoint +# to enact repairs? +grep -q 'Stale file handle' $tmp.healer || \ + echo "Should have seen stale file handle complaints" + +_exit 0 diff --git a/tests/xfs/664.out b/tests/xfs/664.out new file mode 100755 index 00000000..5cbf2512 --- /dev/null +++ b/tests/xfs/664.out @@ -0,0 +1,2 @@ +QA output created by 664 +ls: reading directory 'MNTPT/some/victimdir': Structure needs cleaning