From: Darrick J. Wong Date: Fri, 30 Dec 2022 22:12:54 +0000 (-0800) Subject: fuzzy: abort scrub stress testing if the scratch fs went down X-Git-Tag: v2023.01.15~11 X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ae42d50a2acd193c7e11f2bb8ee1de7f4a124473;p=xfstests-dev.git fuzzy: abort scrub stress testing if the scratch fs went down There's no point in continuing a stress test of online fsck if the filesystem goes down. We can't query that kind of state directly, so as a proxy we try to stat the mountpoint and interpret any error return as a sign that the fs is down. Signed-off-by: Darrick J. Wong Reviewed-by: Zorro Lang Signed-off-by: Zorro Lang --- diff --git a/common/fuzzy b/common/fuzzy index 6519d5c1..f1bc2dc7 100644 --- a/common/fuzzy +++ b/common/fuzzy @@ -338,10 +338,17 @@ __stress_scrub_filter_output() { -e '/No space left on device/d' } +# Decide if the scratch filesystem is still alive. +__stress_scrub_scratch_alive() { + # If we can't stat the scratch filesystem, there's a reasonably good + # chance that the fs shut down, which is not good. + stat "$SCRATCH_MNT" &>/dev/null +} + # Decide if we want to keep running stress tests. The first argument is the # stop time, and second argument is the path to the sentinel file. __stress_scrub_running() { - test -e "$2" && test "$(date +%s)" -lt "$1" + test -e "$2" && test "$(date +%s)" -lt "$1" && __stress_scrub_scratch_alive } # Run fs freeze and thaw in a tight loop. @@ -486,6 +493,10 @@ _scratch_xfs_stress_scrub() { done _scratch_xfs_stress_scrub_cleanup + # Warn the user if we think the scratch filesystem went down. + __stress_scrub_scratch_alive || \ + echo "Did the scratch filesystem die?" + echo "Loop finished at $(date)" >> $seqres.full }