From: Darrick J. Wong Date: Tue, 10 Mar 2026 03:53:31 +0000 (-0700) Subject: xfs: test xfs_healer background service X-Git-Tag: v2026.03.20~3 X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=df0f4b76c572b517068a00ac360f9622417c3778;p=xfstests-dev.git xfs: test xfs_healer background service Make sure that when xfs_healer can monitor and repair filesystems when it's running as a systemd service, which is the intended usage model. Signed-off-by: Darrick J. Wong Reviewed-by: Zorro Lang Signed-off-by: Zorro Lang --- diff --git a/tests/xfs/665 b/tests/xfs/665 new file mode 100755 index 00000000..0e37e409 --- /dev/null +++ b/tests/xfs/665 @@ -0,0 +1,152 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (c) 2024-2026 Oracle. All Rights Reserved. +# +# FS QA Test No. 665 +# +# Ensure that autonomous self healing fixes the filesystem correctly when +# running in a systemd service +# +# unreliable_in_parallel: this test runs the xfs_healer systemd service, which +# cannot be isolated to a specific testcase with the way check-parallel is +# implemented. +# +. ./common/preamble +_begin_fstest auto selfhealing unreliable_in_parallel + +_cleanup() +{ + cd / + if [ -n "$new_svcfile" ]; then + rm -f "$new_svcfile" + systemctl daemon-reload + fi + rm -r -f $tmp.* +} + +. ./common/filter +. ./common/fuzzy +. ./common/systemd + +_require_systemd_is_running +_require_systemd_unit_defined xfs_healer@.service +_require_scrub +_require_xfs_io_command "repair" # online repair support +_require_xfs_db_command "blocktrash" +_require_command "$XFS_HEALER_PROG" "xfs_healer" +_require_command "$XFS_PROPERTY_PROG" "xfs_property" +_require_scratch + +_scratch_mkfs >> $seqres.full +_scratch_mount + +_xfs_has_feature $SCRATCH_MNT rmapbt || \ + _notrun "reverse mapping required to test directory auto-repair" +_xfs_has_feature $SCRATCH_MNT parent || \ + _notrun "parent pointers required to test directory auto-repair" +_require_xfs_healer $SCRATCH_MNT --repair + +# Configure the filesystem for automatic repair of the filesystem. +$XFS_PROPERTY_PROG $SCRATCH_MNT set autofsck=repair >> $seqres.full + +# Create a largeish directory +dblksz=$(_xfs_get_dir_blocksize "$SCRATCH_MNT") +echo testdata > $SCRATCH_MNT/a +mkdir -p "$SCRATCH_MNT/some/victimdir" +for ((i = 0; i < (dblksz / 255); i++)); do + fname="$(printf "%0255d" "$i")" + ln $SCRATCH_MNT/a $SCRATCH_MNT/some/victimdir/$fname +done + +# Did we get at least two dir blocks? +dirsize=$(stat -c '%s' $SCRATCH_MNT/some/victimdir) +test "$dirsize" -gt "$dblksz" || echo "failed to create two-block directory" + +# Break the directory +_scratch_unmount +_scratch_xfs_db -x \ + -c 'path /some/victimdir' \ + -c 'bmap' \ + -c 'dblock 1' \ + -c 'blocktrash -z -0 -o 0 -x 2048 -y 2048 -n 2048' >> $seqres.full + +# Find the existing xfs_healer@ service definition, figure out where we're +# going to land our test-specific override +orig_svcfile="$(_systemd_unit_path "xfs_healer@-.service")" +test -f "$orig_svcfile" || \ + _notrun "cannot find xfs_healer@ service file" + +new_svcdir="$(_systemd_runtime_dir)" +test -d "$new_svcdir" || \ + _notrun "cannot find runtime systemd service dir" + +# We need to make some local mods to the xfs_healer@ service definition +# so we fork it and create a new service just for this test. +new_healer_template="xfs_healer_fstest@.service" +new_healer_svc="$(_systemd_service_unit_path "$new_healer_template" "$SCRATCH_MNT")" +_systemd_unit_status "$new_healer_svc" 2>&1 | \ + grep -E -q '(could not be found|Loaded: not-found)' || \ + _notrun "systemd service \"$new_healer_svc\" found, will not mess with this" + +new_svcfile="$new_svcdir/$new_healer_template" +cp "$orig_svcfile" "$new_svcfile" + +# Pick up all the CLI args except for --repair and --no-autofsck because we're +# going to force it to --autofsck below +execargs="$(grep '^ExecStart=' $new_svcfile | \ + sed -e 's/^ExecStart=\S*//g' \ + -e 's/--no-autofsck//g' \ + -e 's/--repair//g')" +sed -e '/ExecStart=/d' -e '/BindPaths=/d' -e '/ExecCondition=/d' -i $new_svcfile +cat >> "$new_svcfile" << ENDL + +[Service] +ExecCondition=$XFS_HEALER_PROG --supported %f +ExecStart=$XFS_HEALER_PROG $execargs +ENDL +_systemd_reload + +# Emit the results of our editing to the full log. +systemctl cat "$new_healer_svc" >> $seqres.full + +# Remount, with service activation +_scratch_mount + +old_healer_svc="$(_xfs_healer_svcname "$SCRATCH_MNT")" +_systemd_unit_stop "$old_healer_svc" &>> $seqres.full +_systemd_unit_start "$new_healer_svc" &>> $seqres.full + +_systemd_unit_status "$new_healer_svc" 2>&1 | grep -q 'Active: active' || \ + echo "systemd service \"$new_healer_svc\" not running??" + +# Access the broken directory to trigger a repair, then poll the directory +# for 5 seconds to see if it gets fixed without us needing to intervene. +ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err +_filter_scratch < $tmp.err +try=0 +while [ $try -lt 50 ] && grep -q 'Structure needs cleaning' $tmp.err; do + echo "try $try saw corruption" >> $seqres.full + sleep 0.1 + ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err + try=$((try + 1)) +done +echo "try $try no longer saw corruption or gave up" >> $seqres.full +_filter_scratch < $tmp.err + +# List the dirents of /victimdir to see if it stops reporting corruption +ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err +try=0 +while [ $try -lt 50 ] && grep -q 'Structure needs cleaning' $tmp.err; do + echo "retry $try still saw corruption" >> $seqres.full + sleep 0.1 + ls $SCRATCH_MNT/some/victimdir > /dev/null 2> $tmp.err + try=$((try + 1)) +done +echo "retry $try no longer saw corruption or gave up" >> $seqres.full + +# Unmount to kill the healer +_scratch_kill_xfs_healer +journalctl -u "$new_healer_svc" >> $seqres.full + +status=0 +exit diff --git a/tests/xfs/665.out b/tests/xfs/665.out new file mode 100755 index 00000000..a960991a --- /dev/null +++ b/tests/xfs/665.out @@ -0,0 +1,2 @@ +QA output created by 665 +ls: reading directory 'SCRATCH_MNT/some/victimdir': Structure needs cleaning