2 # SPDX-License-Identifier: GPL-2.0
3 # Copyright (c) 2021 Oracle, Inc. All Rights Reserved.
7 # Test nested log recovery with repeated (simulated) disk failures. We kick
8 # off fsstress on a loopback filesystem mounted on the scratch fs, then switch
9 # out the underlying scratch device with dm-error to see what happens when the
10 # disk goes down. Having taken down both fses in this manner, remount them and
11 # repeat. This test simulates VM hosts crashing to try to shake out CoW bugs
12 # in writeback on the host that cause VM guests to fail to recover.
15 _begin_fstest shutdown auto log metadata eio recoveryloop
20 $KILLALL_PROG -9 fsstress > /dev/null 2>&1
22 if [ -n "$loopmnt" ]; then
23 $UMOUNT_PROG $loopmnt 2>/dev/null
31 # Import common functions.
35 # Modify as appropriate.
38 _require_scratch_reflink
40 _require_dm_target error
41 _require_command "$KILLALL_PROG" "killall"
44 echo "Silence is golden."
46 _scratch_mkfs >> $seqres.full 2>&1
47 _require_metadata_journaling $SCRATCH_DEV
51 # Create a fs image consuming 1/3 of the scratch fs
52 scratch_freesp_bytes=$(_get_available_space $SCRATCH_MNT)
53 loopimg_bytes=$((scratch_freesp_bytes / 3))
55 loopimg=$SCRATCH_MNT/testfs
56 truncate -s $loopimg_bytes $loopimg
62 scratch_aliveflag=$tmp.runsnap
63 snap_aliveflag=$tmp.snapping
66 touch "$snap_aliveflag"
67 while [ -e "$scratch_aliveflag" ]; do
69 _cp_reflink $loopimg $loopimg.a
72 rm -f "$snap_aliveflag"
75 fsstress=($FSSTRESS_PROG $FSSTRESS_AVOID -d "$loopmnt" -n 999999 -p "$((LOAD_FACTOR * 4))")
77 for i in $(seq 1 $((25 * TIME_FACTOR)) ); do
78 touch $scratch_aliveflag
79 snap_loop_fs >> $seqres.full 2>&1 &
81 if ! _mount $loopimg $loopmnt -o loop; then
82 rm -f $scratch_aliveflag
83 _metadump_dev $loopimg $seqres.loop.$i.md
84 _fail "iteration $i loopimg mount failed"
88 ("${fsstress[@]}" >> $seqres.full &) > /dev/null 2>&1
90 # purposely include 0 second sleeps to test shutdown immediately after
92 sleep $((RANDOM % (3 * TIME_FACTOR) ))
93 rm -f $scratch_aliveflag
95 # This test aims to simulate sudden disk failure, which means that we
96 # do not want to quiesce the filesystem or otherwise give it a chance
97 # to flush its logs. Therefore we want to call dmsetup with the
98 # --nolockfs parameter; to make this happen we must call the load
99 # error table helper *without* 'lockfs'.
100 _dmerror_load_error_table
102 ps -e | grep fsstress > /dev/null 2>&1
103 while [ $? -eq 0 ]; do
104 $KILLALL_PROG -9 fsstress > /dev/null 2>&1
105 wait > /dev/null 2>&1
106 ps -e | grep fsstress > /dev/null 2>&1
108 for ((j = 0; j < 10; j++)); do
109 test -e "$snap_aliveflag" || break
113 # Mount again to replay log after loading working table, so we have a
114 # consistent fs after test.
115 $UMOUNT_PROG $loopmnt
117 # We must unmount dmerror at here, or whole later testing will crash.
118 # So try to umount enough times, before we have no choice.
119 for ((j = 0; j < 100; j++)); do
121 _dmerror_unmount > $tmp.unmount.err 2>&1
127 if [ $is_unmounted -ne 0 ];then
129 _fail "iteration $i scratch unmount failed"
131 _dmerror_load_working_table
132 if ! _dmerror_mount; then
133 _metadump_dev $DMERROR_DEV $seqres.scratch.$i.md
134 _fail "iteration $i scratch mount failed"
138 # Make sure the fs image file is ok
139 if [ -f "$loopimg" ]; then
140 if _mount $loopimg $loopmnt -o loop; then
141 $UMOUNT_PROG $loopmnt &> /dev/null
143 _metadump_dev $DMERROR_DEV $seqres.scratch.final.md
144 echo "final scratch mount failed"
146 SCRATCH_RTDEV= SCRATCH_LOGDEV= _check_scratch_fs $loopimg
149 # success, all done; let the test harness check the scratch fs