]> git-server-git.apps.pok.os.sepia.ceph.com Git - xfstests-dev.git/commitdiff
xfs: set up common code for testing xfs_healer
authorDarrick J. Wong <djwong@kernel.org>
Tue, 10 Mar 2026 03:51:10 +0000 (20:51 -0700)
committerZorro Lang <zlang@kernel.org>
Tue, 17 Mar 2026 03:31:43 +0000 (11:31 +0800)
Add a bunch of common code so that we can test the xfs_healer daemon.
Most of the changes here are to make it easier to manage the systemd
service units for xfs_healer and xfs_scrub.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
common/config
common/rc
common/systemd
common/xfs
tests/xfs/802

index 1420e35ddfee421e047448cbd6a83b6298680b45..8468a60081f50ce026fbb6cb65ff328a8df63ceb 100644 (file)
@@ -161,6 +161,20 @@ export XFS_ADMIN_PROG="$(type -P xfs_admin)"
 export XFS_GROWFS_PROG=$(type -P xfs_growfs)
 export XFS_SPACEMAN_PROG="$(type -P xfs_spaceman)"
 export XFS_SCRUB_PROG="$(type -P xfs_scrub)"
+
+XFS_HEALER_PROG="$(type -P xfs_healer)"
+XFS_HEALER_START_PROG="$(type -P xfs_healer_start)"
+
+# If not found, try the ones installed in libexec
+if [ ! -x "$XFS_HEALER_PROG" ] && [ -e /usr/libexec/xfsprogs/xfs_healer ]; then
+       XFS_HEALER_PROG=/usr/libexec/xfsprogs/xfs_healer
+fi
+if [ ! -x "$XFS_HEALER_START_PROG" ] && [ -e /usr/libexec/xfsprogs/xfs_healer_start ]; then
+       XFS_HEALER_START_PROG=/usr/libexec/xfsprogs/xfs_healer_start
+fi
+export XFS_HEALER_PROG
+export XFS_HEALER_START_PROG
+
 export XFS_PARALLEL_REPAIR_PROG="$(type -P xfs_prepair)"
 export XFS_PARALLEL_REPAIR64_PROG="$(type -P xfs_prepair64)"
 export __XFSDUMP_PROG="$(type -P xfsdump)"
index e89ae9f4540f67922b9cc3eb1b08b01532341956..5fe44e2158ffb3bfd287b2db97e20c2dc7b9361e 100644 (file)
--- a/common/rc
+++ b/common/rc
@@ -3031,6 +3031,11 @@ _require_xfs_io_command()
        "label")
                testio=`$XFS_IO_PROG -c "label" $TEST_DIR 2>&1`
                ;;
+       "verifymedia")
+               testio=`$XFS_IO_PROG -x -c "verifymedia $* 0 0" 2>&1`
+               echo $testio | grep -q "invalid option" && \
+                       _notrun "xfs_io $command support is missing"
+               ;;
        "open")
                # -c "open $f" is broken in xfs_io <= 4.8. Along with the fix,
                # a new -C flag was introduced to execute one shot commands.
index b2e24f267b2d9326c98dab9ee24f8298c863e45d..589aad1bef2637e522c42464743dfac06667c8c8 100644 (file)
@@ -44,6 +44,18 @@ _systemd_unit_active() {
        test "$(systemctl is-active "$1")" = "active"
 }
 
+# Wait for up to a certain number of seconds for a service to reach inactive
+# state.
+_systemd_unit_wait() {
+       local svcname="$1"
+       local timeout="${2:-30}"
+
+       for ((i = 0; i < (timeout * 2); i++)); do
+               test "$(systemctl is-active "$svcname")" = "inactive" && break
+               sleep 0.5
+       done
+}
+
 _require_systemd_unit_active() {
        _require_systemd_unit_defined "$1"
        _systemd_unit_active "$1" || \
@@ -71,3 +83,30 @@ _systemd_unit_status() {
        _systemd_installed || return 1
        systemctl status "$1"
 }
+
+# Start a running systemd unit
+_systemd_unit_start() {
+       systemctl start "$1"
+}
+# Stop a running systemd unit
+_systemd_unit_stop() {
+       systemctl stop "$1"
+}
+
+# Mask or unmask a running systemd unit
+_systemd_unit_mask() {
+       systemctl mask "$1"
+}
+_systemd_unit_unmask() {
+       systemctl unmask "$1"
+}
+_systemd_unit_masked() {
+       systemctl status "$1" 2>/dev/null | grep -q 'Loaded: masked'
+}
+
+_systemd_service_unit_path() {
+       local template="$1"
+       local path="$2"
+
+       systemd-escape --template "$template" --path "$path"
+}
index 7fa0db2e26b4c94de4c61c47513484d9199dc666..f7a6d2f2f03a4b96848e5c8108a4ebf0c8b916e3 100644 (file)
@@ -2301,3 +2301,101 @@ _filter_bmap_gno()
                if ($ag =~ /\d+/) {print "$ag "} ;
         '
 }
+
+# Run the xfs_healer program on some filesystem
+_xfs_healer() {
+       $XFS_HEALER_PROG "$@"
+}
+
+# Compute the xfs_healer systemd service instance name for a given path.
+# This is easy because xfs_healer has always supported --svcname.
+_xfs_healer_svcname()
+{
+       _xfs_healer --svcname "$@"
+}
+
+# Compute the xfs_scrub systemd service instance name for a given path.  This
+# is tricky because xfs_scrub only gained --svcname when xfs_healer was made.
+_xfs_scrub_svcname()
+{
+       local ret
+
+       if ret="$($XFS_SCRUB_PROG --svcname "$@" 2>/dev/null)"; then
+               echo "$ret"
+               return 0
+       fi
+
+       # ...but if not, we can fall back to brute force systemd invocations.
+       _systemd_service_unit_path "xfs_scrub@.service" "$*"
+}
+
+# Run the xfs_healer program on the scratch fs
+_scratch_xfs_healer() {
+       _xfs_healer "$@" "$SCRATCH_MNT"
+}
+
+# Turn off the background xfs_healer service if any so that it doesn't fix
+# injected metadata errors; then start a background copy of xfs_healer to
+# capture that.
+_invoke_xfs_healer() {
+       local mount="$1"
+       local logfile="$2"
+       shift; shift
+
+       if _systemd_is_running; then
+               local svc="$(_xfs_healer_svcname "$mount")"
+               _systemd_unit_stop "$svc" &>> $seqres.full
+       fi
+
+       $XFS_HEALER_PROG "$mount" "$@" &> "$logfile" &
+       XFS_HEALER_PID=$!
+
+       # Wait 30s for the healer program to really start up
+       for ((i = 0; i < 60; i++)); do
+               test -e "$logfile" && \
+                       grep -q 'monitoring started' "$logfile" && \
+                       break
+               sleep 0.5
+       done
+}
+
+# Run our own copy of xfs_healer against the scratch device.  Note that
+# unmounting the scratch fs causes the healer daemon to exit, so we don't need
+# to kill it explicitly from _cleanup.
+_scratch_invoke_xfs_healer() {
+       _invoke_xfs_healer "$SCRATCH_MNT" "$@"
+}
+
+# Unmount the filesystem to kill the xfs_healer instance started by
+# _invoke_xfs_healer, and wait up to a certain amount of time for it to exit.
+_kill_xfs_healer() {
+       local unmount="$1"
+       local timeout="${2:-30}"
+       local i
+
+       # Unmount fs to kill healer, then wait for it to finish
+       for ((i = 0; i < (timeout * 2); i++)); do
+               $unmount &>> $seqres.full && break
+               sleep 0.5
+       done
+
+       test -n "$XFS_HEALER_PID" && \
+               kill $XFS_HEALER_PID &>> $seqres.full
+       wait
+       unset XFS_HEALER_PID
+}
+
+# Unmount the scratch fs to kill a _scratch_invoke_xfs_healer instance.
+_scratch_kill_xfs_healer() {
+       local unmount="${1:-_scratch_unmount}"
+       shift
+
+       _kill_xfs_healer "$unmount" "$@"
+}
+
+# Does this mounted filesystem support xfs_healer?
+_require_xfs_healer()
+{
+       _xfs_healer --supported "$@" &>/dev/null || \
+               _notrun "health monitoring not supported on this kernel"
+}
index fc4767acb66a55be0b193237350b88ea8ac1bef4..18312b15b645bdde11034ebf69d81bde11d62aaf 100755 (executable)
@@ -105,8 +105,8 @@ run_scrub_service() {
 }
 
 echo "Scrub Scratch FS"
-scratch_path=$(systemd-escape --path "$SCRATCH_MNT")
-run_scrub_service xfs_scrub@$scratch_path
+svc="$(_xfs_scrub_svcname "$SCRATCH_MNT")"
+run_scrub_service "$svc"
 find_scrub_trace "$SCRATCH_MNT"
 
 # Remove the xfs_scrub_all media scan stamp directory (if specified) because we