generic: test MADV_POPULATE_READ with IO errors
[xfstests-dev.git] / tests / generic / 019
1 #! /bin/bash
2 # SPDX-License-Identifier: GPL-2.0
3
4 #
5 # FSQA Test No. generic/019
6 #
7 # Run fsstress and fio(dio/aio and mmap) and simulate disk failure
8 # check filesystem consistency at the end.
9 #
10 . ./common/preamble
11 _begin_fstest aio dangerous enospc rw stress recoveryloop
12
13 fio_config=$tmp.fio
14
15 # Import common functions.
16 . ./common/filter
17 _supported_fs generic
18 _require_scratch
19 _require_block_device $SCRATCH_DEV
20 _require_fail_make_request
21
22 SYSFS_BDEV=`_sysfs_dev $SCRATCH_DEV`
23
24 allow_fail_make_request()
25 {
26     echo "Allow global fail_make_request feature"
27     echo 100 > $DEBUGFS_MNT/fail_make_request/probability
28     echo 9999999 > $DEBUGFS_MNT/fail_make_request/times
29     echo 0 >  /sys/kernel/debug/fail_make_request/verbose
30 }
31
32 disallow_fail_make_request()
33 {
34     echo "Disallow global fail_make_request feature"
35     echo 0 > $DEBUGFS_MNT/fail_make_request/probability
36     echo 0 > $DEBUGFS_MNT/fail_make_request/times
37 }
38
39 start_fail_scratch_dev()
40 {
41     echo "Force SCRATCH_DEV device failure"
42     echo " echo 1 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
43     echo 1 > $SYSFS_BDEV/make-it-fail
44 }
45
46 stop_fail_scratch_dev()
47 {
48     echo "Make SCRATCH_DEV device operable again"
49     echo " echo 0 > $SYSFS_BDEV/make-it-fail" >> $seqres.full
50     echo 0 > $SYSFS_BDEV/make-it-fail
51 }
52
53 # Override the default cleanup function.
54 _cleanup()
55 {
56         kill $fs_pid $fio_pid &> /dev/null
57         disallow_fail_make_request
58         cd /
59         rm -r -f $tmp.*
60 }
61
62 RUN_TIME=$((20+10*$TIME_FACTOR))
63 NUM_JOBS=$((4*LOAD_FACTOR))
64 BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
65 FILE_SIZE=$((BLK_DEV_SIZE * 512))
66
67 # Don't fail the test just because fio or fsstress dump cores
68 ulimit -c 0
69
70 cat >$fio_config <<EOF
71 ###########
72 # $seq test's fio activity
73 # Filenames derived from jobsname and jobid like follows:
74 # ${JOB_NAME}.${JOB_ID}.${ITERATION_ID}
75 [global]
76 ioengine=libaio
77 bs=4k
78 directory=${SCRATCH_MNT}
79 filesize=${FILE_SIZE}
80 size=9999T
81 continue_on_error=write
82 ignore_error=EIO,ENOSPC:EIO
83 error_dump=0
84
85 [stress_dio_aio_activity]
86 create_on_open=1
87 fallocate=none
88 iodepth=128*${LOAD_FACTOR}
89 direct=1
90 buffered=0
91 numjobs=${NUM_JOBS}
92 rw=randwrite
93 runtime=40+${RUN_TIME}
94 time_based
95
96 [stress_mmap_activity]
97 ioengine=mmap
98 create_on_open=0
99 fallocate=1
100 fdatasync=40960
101 filesize=8M
102 size=9999T
103 numjobs=${NUM_JOBS}
104 rw=randwrite
105 runtime=40+${RUN_TIME}
106 time_based
107
108 EOF
109
110 _require_fio $fio_config
111
112 # Disable all sync operations to get higher load
113 FSSTRESS_AVOID="$FSSTRESS_AVOID -ffsync=0 -fsync=0 -ffdatasync=0 -f setattr=1"
114
115 _workout()
116 {
117         out=$SCRATCH_MNT/fsstress.$$
118         args=`_scale_fsstress_args -p 1 -n999999999 -f setattr=0 $FSSTRESS_AVOID -d $out`
119         echo ""
120         echo "Start fsstress.."
121         echo ""
122         echo "fsstress $args" >> $seqres.full
123         $FSSTRESS_PROG $args > /dev/null 2>&1 &
124         fs_pid=$!
125         echo "Start fio.."
126         cat $fio_config >>  $seqres.full
127         $FIO_PROG $fio_config >> $seqres.full 2>&1 &
128         fio_pid=$!
129
130         # Let's it work for awhile, and force device failure
131         sleep $RUN_TIME
132         start_fail_scratch_dev
133         # After device turns in to failed state filesystem may yet not know about
134         # that so buffered write(2) may succeed, but any integrity operations
135         # such as (sync, fsync, fdatasync, direct-io) should fail.
136         dd if=/dev/zero of=$SCRATCH_MNT/touch_failed_filesystem count=1 bs=4k conv=fsync \
137             >> $seqres.full 2>&1 && \
138             _fail "failed: still able to perform integrity fsync on $SCRATCH_MNT"
139
140         kill $fs_pid &> /dev/null
141         wait $fs_pid
142         wait $fio_pid
143         unset fs_pid
144         unset fio_pid
145
146         # We expect that broken FS still can be umounted
147         run_check _scratch_unmount
148         # Once filesystem was umounted no one is able to write to block device
149         # It is now safe to bring device back to normal state
150         stop_fail_scratch_dev
151
152         # In order to check that filesystem is able to recover journal on mount(2)
153         # perform mount/umount, after that all errors should be fixed
154         _scratch_mount
155         run_check _scratch_unmount
156 }
157
158 # real QA test starts here
159
160 _scratch_mkfs >> $seqres.full 2>&1 || _fail "mkfs failed"
161 _scratch_mount
162 allow_fail_make_request
163 _workout
164 status=$?
165 exit