From: Darrick J. Wong <djwong@kernel.org>
Date: Fri, 30 Dec 2022 22:12:54 +0000 (-0800)
Subject: fuzzy: abort scrub stress testing if the scratch fs went down
X-Git-Tag: v2023.01.15~11
X-Git-Url: http://git.apps.os.sepia.ceph.com/?a=commitdiff_plain;h=ae42d50a2acd193c7e11f2bb8ee1de7f4a124473;p=xfstests-dev.git

fuzzy: abort scrub stress testing if the scratch fs went down

There's no point in continuing a stress test of online fsck if the
filesystem goes down.  We can't query that kind of state directly, so as
a proxy we try to stat the mountpoint and interpret any error return as
a sign that the fs is down.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
---

diff --git a/common/fuzzy b/common/fuzzy
index 6519d5c1..f1bc2dc7 100644
--- a/common/fuzzy
+++ b/common/fuzzy
@@ -338,10 +338,17 @@ __stress_scrub_filter_output() {
 		    -e '/No space left on device/d'
 }
 
+# Decide if the scratch filesystem is still alive.
+__stress_scrub_scratch_alive() {
+	# If we can't stat the scratch filesystem, there's a reasonably good
+	# chance that the fs shut down, which is not good.
+	stat "$SCRATCH_MNT" &>/dev/null
+}
+
 # Decide if we want to keep running stress tests.  The first argument is the
 # stop time, and second argument is the path to the sentinel file.
 __stress_scrub_running() {
-	test -e "$2" && test "$(date +%s)" -lt "$1"
+	test -e "$2" && test "$(date +%s)" -lt "$1" && __stress_scrub_scratch_alive
 }
 
 # Run fs freeze and thaw in a tight loop.
@@ -486,6 +493,10 @@ _scratch_xfs_stress_scrub() {
 	done
 	_scratch_xfs_stress_scrub_cleanup
 
+	# Warn the user if we think the scratch filesystem went down.
+	__stress_scrub_scratch_alive || \
+		echo "Did the scratch filesystem die?"
+
 	echo "Loop finished at $(date)" >> $seqres.full
 }