]> git-server-git.apps.pok.os.sepia.ceph.com Git - xfstests-dev.git/commitdiff
xfs: test xfs_healer background service
authorDarrick J. Wong <djwong@kernel.org>
Tue, 10 Mar 2026 03:53:31 +0000 (20:53 -0700)
committerZorro Lang <zlang@kernel.org>
Tue, 17 Mar 2026 03:31:44 +0000 (11:31 +0800)
Make sure that when xfs_healer can monitor and repair filesystems when it's
running as a systemd service, which is the intended usage model.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
tests/xfs/665 [new file with mode: 0755]
tests/xfs/665.out [new file with mode: 0755]

diff --git a/tests/xfs/665 b/tests/xfs/665
new file mode 100755 (executable)
index 0000000..0e37e40
--- /dev/null
@@ -0,0 +1,152 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024-2026 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 665
+#
+# Ensure that autonomous self healing fixes the filesystem correctly when
+# running in a systemd service
+#
+# unreliable_in_parallel: this test runs the xfs_healer systemd service, which
+# cannot be isolated to a specific testcase with the way check-parallel is
+# implemented.
+#
+. ./common/preamble
+_begin_fstest auto selfhealing unreliable_in_parallel
+
+_cleanup()
+{
+       cd /
+       if [ -n "$new_svcfile" ]; then
+               rm -f "$new_svcfile"
+               systemctl daemon-reload
+       fi
+       rm -r -f $tmp.*
+}
+
+. ./common/filter
+. ./common/fuzzy
+. ./common/systemd
+
+_require_systemd_is_running
+_require_systemd_unit_defined xfs_healer@.service
+_require_scrub
+_require_xfs_io_command "repair"       # online repair support
+_require_xfs_db_command "blocktrash"
+_require_command "$XFS_HEALER_PROG" "xfs_healer"
+_require_command "$XFS_PROPERTY_PROG" "xfs_property"
+_require_scratch
+
+_scratch_mkfs >> $seqres.full
+_scratch_mount
+
+_xfs_has_feature $SCRATCH_MNT rmapbt || \
+       _notrun "reverse mapping required to test directory auto-repair"
+_xfs_has_feature $SCRATCH_MNT parent || \
+       _notrun "parent pointers required to test directory auto-repair"
+_require_xfs_healer $SCRATCH_MNT --repair
+
+# Configure the filesystem for automatic repair of the filesystem.
+$XFS_PROPERTY_PROG $SCRATCH_MNT set autofsck=repair >> $seqres.full
+
+# Create a largeish directory
+dblksz=$(_xfs_get_dir_blocksize "$SCRATCH_MNT")
+echo testdata > $SCRATCH_MNT/a
+mkdir -p "$SCRATCH_MNT/some/victimdir"
+for ((i = 0; i < (dblksz / 255); i++)); do
+       fname="$(printf "%0255d" "$i")"
+       ln $SCRATCH_MNT/a $SCRATCH_MNT/some/victimdir/$fname
+done
+
+# Did we get at least two dir blocks?
+dirsize=$(stat -c '%s' $SCRATCH_MNT/some/victimdir)
+test "$dirsize" -gt "$dblksz" || echo "failed to create two-block directory"
+
+# Break the directory
+_scratch_unmount
+_scratch_xfs_db -x \
+       -c 'path /some/victimdir' \
+       -c 'bmap' \
+       -c 'dblock 1' \
+       -c 'blocktrash -z -0 -o 0 -x 2048 -y 2048 -n 2048' >> $seqres.full
+
+# Find the existing xfs_healer@ service definition, figure out where we're
+# going to land our test-specific override
+orig_svcfile="$(_systemd_unit_path "xfs_healer@-.service")"
+test -f "$orig_svcfile" || \
+       _notrun "cannot find xfs_healer@ service file"
+
+new_svcdir="$(_systemd_runtime_dir)"
+test -d "$new_svcdir" || \
+       _notrun "cannot find runtime systemd service dir"
+
+# We need to make some local mods to the xfs_healer@ service definition
+# so we fork it and create a new service just for this test.
+new_healer_template="xfs_healer_fstest@.service"
+new_healer_svc="$(_systemd_service_unit_path "$new_healer_template" "$SCRATCH_MNT")"
+_systemd_unit_status "$new_healer_svc" 2>&1 | \
+       grep -E -q '(could not be found|Loaded: not-found)' || \
+       _notrun "systemd service \"$new_healer_svc\" found, will not mess with this"
+
+new_svcfile="$new_svcdir/$new_healer_template"
+cp "$orig_svcfile" "$new_svcfile"
+
+# Pick up all the CLI args except for --repair and --no-autofsck because we're
+# going to force it to --autofsck below
+execargs="$(grep '^ExecStart=' $new_svcfile | \
+           sed -e 's/^ExecStart=\S*//g' \
+               -e 's/--no-autofsck//g' \
+               -e 's/--repair//g')"
+sed -e '/ExecStart=/d' -e '/BindPaths=/d' -e '/ExecCondition=/d' -i $new_svcfile
+cat >> "$new_svcfile" << ENDL
+
+[Service]
+ExecCondition=$XFS_HEALER_PROG --supported %f
+ExecStart=$XFS_HEALER_PROG $execargs
+ENDL
+_systemd_reload
+
+# Emit the results of our editing to the full log.
+systemctl cat "$new_healer_svc" >> $seqres.full
+
+# Remount, with service activation
+_scratch_mount
+
+old_healer_svc="$(_xfs_healer_svcname "$SCRATCH_MNT")"
+_systemd_unit_stop "$old_healer_svc" &>> $seqres.full
+_systemd_unit_start "$new_healer_svc" &>> $seqres.full
+
+_systemd_unit_status "$new_healer_svc" 2>&1 | grep -q 'Active: active' || \
+       echo "systemd service \"$new_healer_svc\" not running??"
+
+# Access the broken directory to trigger a repair, then poll the directory
+# for 5 seconds to see if it gets fixed without us needing to intervene.
+ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err
+_filter_scratch < $tmp.err
+try=0
+while [ $try -lt 50 ] && grep -q 'Structure needs cleaning' $tmp.err; do
+       echo "try $try saw corruption" >> $seqres.full
+       sleep 0.1
+       ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err
+       try=$((try + 1))
+done
+echo "try $try no longer saw corruption or gave up" >> $seqres.full
+_filter_scratch < $tmp.err
+
+# List the dirents of /victimdir to see if it stops reporting corruption
+ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err
+try=0
+while [ $try -lt 50 ] && grep -q 'Structure needs cleaning' $tmp.err; do
+       echo "retry $try still saw corruption" >> $seqres.full
+       sleep 0.1
+       ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err
+       try=$((try + 1))
+done
+echo "retry $try no longer saw corruption or gave up" >> $seqres.full
+
+# Unmount to kill the healer
+_scratch_kill_xfs_healer
+journalctl -u "$new_healer_svc" >> $seqres.full
+
+status=0
+exit
diff --git a/tests/xfs/665.out b/tests/xfs/665.out
new file mode 100755 (executable)
index 0000000..a960991
--- /dev/null
@@ -0,0 +1,2 @@
+QA output created by 665
+ls: reading directory 'SCRATCH_MNT/some/victimdir': Structure needs cleaning