]> git.apps.os.sepia.ceph.com Git - xfstests-dev.git/commitdiff
xfs: race fsmap with readonly remounts to detect crash or livelock
authorDarrick J. Wong <djwong@kernel.org>
Fri, 30 Dec 2022 22:12:58 +0000 (14:12 -0800)
committerZorro Lang <zlang@kernel.org>
Sat, 14 Jan 2023 13:55:28 +0000 (21:55 +0800)
Add a new test that races the GETFSMAP ioctl with ro/rw remounting to
make sure we don't livelock on the empty transaction that fsmap uses to
avoid deadlocking on rmap btree cycles.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
common/fuzzy
ltp/fsstress.c
tests/xfs/560 [new file with mode: 0755]
tests/xfs/560.out [new file with mode: 0644]

index 58e299d34b5e41743de201a5b7f3d7209ab168b3..ee97aa4298912cb0b0d26634ebd38a3274030a73 100644 (file)
@@ -429,6 +429,7 @@ __stress_scrub_clean_scratch() {
 __stress_scrub_fsx_loop() {
        local end="$1"
        local runningfile="$2"
+       local remount_period="$3"
        local focus=(-q -X)     # quiet, validate file contents
 
        # As of November 2022, 2 million fsx ops should be enough to keep
@@ -440,6 +441,43 @@ __stress_scrub_fsx_loop() {
        local args="$FSX_AVOID ${focus[@]} ${SCRATCH_MNT}/fsx.$seq"
        echo "Running $here/ltp/fsx $args" >> $seqres.full
 
+       if [ -n "$remount_period" ]; then
+               local mode="rw"
+               local rw_arg=""
+               while __stress_scrub_running "$end" "$runningfile"; do
+                       # Need to recheck running conditions if we cleared
+                       # anything.
+                       test "$mode" = "rw" && __stress_scrub_clean_scratch && continue
+
+                       timeout -s TERM "$remount_period" $here/ltp/fsx \
+                                       $args $rw_arg >> $seqres.full
+                       res=$?
+                       echo "$mode fsx exits with $res at $(date)" >> $seqres.full
+                       if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then
+                               # Stop if fsstress returns error.  Mask off
+                               # the magic code 124 because that is how the
+                               # timeout(1) program communicates that we ran
+                               # out of time.
+                               break;
+                       fi
+                       if [ "$mode" = "rw" ]; then
+                               mode="ro"
+                               rw_arg="-t 0 -w 0 -FHzCIJBE0"
+                       else
+                               mode="rw"
+                               rw_arg=""
+                       fi
+
+                       # Try remounting until we get the result we wanted
+                       while ! _scratch_remount "$mode" &>/dev/null && \
+                             __stress_scrub_running "$end" "$runningfile"; do
+                               sleep 0.2
+                       done
+               done
+               rm -f "$runningfile"
+               return 0
+       fi
+
        while __stress_scrub_running "$end" "$runningfile"; do
                # Need to recheck running conditions if we cleared anything
                __stress_scrub_clean_scratch && continue
@@ -453,12 +491,50 @@ __stress_scrub_fsx_loop() {
 __stress_scrub_fsstress_loop() {
        local end="$1"
        local runningfile="$2"
+       local remount_period="$3"
 
        # As of March 2022, 2 million fsstress ops should be enough to keep
        # any filesystem busy for a couple of hours.
        local args=$(_scale_fsstress_args -p 4 -d $SCRATCH_MNT -n 2000000 $FSSTRESS_AVOID)
        echo "Running $FSSTRESS_PROG $args" >> $seqres.full
 
+       if [ -n "$remount_period" ]; then
+               local mode="rw"
+               local rw_arg=""
+               while __stress_scrub_running "$end" "$runningfile"; do
+                       # Need to recheck running conditions if we cleared
+                       # anything.
+                       test "$mode" = "rw" && __stress_scrub_clean_scratch && continue
+
+                       timeout -s TERM "$remount_period" $FSSTRESS_PROG \
+                                       $args $rw_arg >> $seqres.full
+                       res=$?
+                       echo "$mode fsstress exits with $res at $(date)" >> $seqres.full
+                       if [ "$res" -ne 0 ] && [ "$res" -ne 124 ]; then
+                               # Stop if fsstress returns error.  Mask off
+                               # the magic code 124 because that is how the
+                               # timeout(1) program communicates that we ran
+                               # out of time.
+                               break;
+                       fi
+                       if [ "$mode" = "rw" ]; then
+                               mode="ro"
+                               rw_arg="-R"
+                       else
+                               mode="rw"
+                               rw_arg=""
+                       fi
+
+                       # Try remounting until we get the result we wanted
+                       while ! _scratch_remount "$mode" &>/dev/null && \
+                             __stress_scrub_running "$end" "$runningfile"; do
+                               sleep 0.2
+                       done
+               done
+               rm -f "$runningfile"
+               return 0
+       fi
+
        while __stress_scrub_running "$end" "$runningfile"; do
                # Need to recheck running conditions if we cleared anything
                __stress_scrub_clean_scratch && continue
@@ -526,6 +602,13 @@ _scratch_xfs_stress_scrub_cleanup() {
        echo "Waiting for children to exit at $(date)" >> $seqres.full
        wait
 
+       # Ensure the scratch fs is also writable before we exit.
+       if [ -n "$__SCRUB_STRESS_REMOUNT_LOOP" ]; then
+               echo "Remounting rw at $(date)" >> $seqres.full
+               _scratch_remount rw >> $seqres.full 2>&1
+               __SCRUB_STRESS_REMOUNT_LOOP=""
+       fi
+
        echo "Cleanup finished at $(date)" >> $seqres.full
 }
 
@@ -561,6 +644,9 @@ __stress_scrub_check_commands() {
 #      in a separate loop.  If zero -i options are specified, do not run.
 #      Callers must check each of these commands (via _require_xfs_io_command)
 #      before calling here.
+# -r   Run fsstress for this amount of time, then remount the fs ro or rw.
+#      The default is to run fsstress continuously with no remount, unless
+#      XFS_SCRUB_STRESS_REMOUNT_PERIOD is set.
 # -s   Pass this command to xfs_io to test scrub.  If zero -s options are
 #      specified, xfs_io will not be run.
 # -t   Run online scrub against this file; $SCRATCH_MNT is the default.
@@ -577,16 +663,19 @@ _scratch_xfs_stress_scrub() {
        local scrub_delay="${XFS_SCRUB_STRESS_DELAY:--1}"
        local exerciser="fsstress"
        local io_args=()
+       local remount_period="${XFS_SCRUB_STRESS_REMOUNT_PERIOD}"
 
        __SCRUB_STRESS_FREEZE_PID=""
+       __SCRUB_STRESS_REMOUNT_LOOP=""
        rm -f "$runningfile"
        touch "$runningfile"
 
        OPTIND=1
-       while getopts "fi:s:t:w:X:" c; do
+       while getopts "fi:r:s:t:w:X:" c; do
                case "$c" in
                        f) freeze=yes;;
                        i) io_args+=("$OPTARG");;
+                       r) remount_period="$OPTARG";;
                        s) one_scrub_args+=("$OPTARG");;
                        t) scrub_tgt="$OPTARG";;
                        w) scrub_delay="$OPTARG";;
@@ -611,7 +700,12 @@ _scratch_xfs_stress_scrub() {
        echo "Loop started at $(date --date="@${start}")," \
                   "ending at $(date --date="@${end}")" >> $seqres.full
 
-       "__stress_scrub_${exerciser}_loop" "$end" "$runningfile" &
+       if [ -n "$remount_period" ]; then
+               __SCRUB_STRESS_REMOUNT_LOOP="1"
+       fi
+
+       "__stress_scrub_${exerciser}_loop" "$end" "$runningfile" \
+                       "$remount_period" &
 
        if [ -n "$freeze" ]; then
                __stress_scrub_freeze_loop "$end" "$runningfile" &
index b395bc4da24675954a07aa0aca66657ab9b8dc85..10608fb554c49d19c568c607f4d480accf58e999 100644 (file)
@@ -426,6 +426,7 @@ int symlink_path(const char *, pathname_t *);
 int    truncate64_path(pathname_t *, off64_t);
 int    unlink_path(pathname_t *);
 void   usage(void);
+void   read_freq(void);
 void   write_freq(void);
 void   zero_freq(void);
 void   non_btrfs_freq(const char *);
@@ -472,7 +473,7 @@ int main(int argc, char **argv)
        xfs_error_injection_t           err_inj;
        struct sigaction action;
        int             loops = 1;
-       const char      *allopts = "cd:e:f:i:l:m:M:n:o:p:rs:S:vVwx:X:zH";
+       const char      *allopts = "cd:e:f:i:l:m:M:n:o:p:rRs:S:vVwx:X:zH";
 
        errrange = errtag = 0;
        umask(0);
@@ -538,6 +539,9 @@ int main(int argc, char **argv)
                case 'r':
                        namerand = 1;
                        break;
+               case 'R':
+                       read_freq();
+                       break;
                case 's':
                        seed = strtoul(optarg, NULL, 0);
                        break;
@@ -1917,6 +1921,7 @@ usage(void)
        printf("   -o logfile       specifies logfile name\n");
        printf("   -p nproc         specifies the no. of processes (default 1)\n");
        printf("   -r               specifies random name padding\n");
+       printf("   -R               zeros frequencies of write operations\n");
        printf("   -s seed          specifies the seed for the random generator (default random)\n");
        printf("   -v               specifies verbose mode\n");
        printf("   -w               zeros frequencies of non-write operations\n");
@@ -1928,6 +1933,17 @@ usage(void)
        printf("   -H               prints usage and exits\n");
 }
 
+void
+read_freq(void)
+{
+       opdesc_t        *p;
+
+       for (p = ops; p < ops_end; p++) {
+               if (p->iswrite)
+                       p->freq = 0;
+       }
+}
+
 void
 write_freq(void)
 {
diff --git a/tests/xfs/560 b/tests/xfs/560
new file mode 100755 (executable)
index 0000000..28b45d5
--- /dev/null
@@ -0,0 +1,38 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (c) 2022 Oracle.  All Rights Reserved.
+#
+# FS QA Test No. 560
+#
+# Race GETFSMAP and ro remount for a while to see if we crash or livelock.
+#
+. ./common/preamble
+_begin_fstest auto quick fsmap remount
+
+# Override the default cleanup function.
+_cleanup()
+{
+       cd /
+       _scratch_xfs_stress_scrub_cleanup
+       rm -rf $tmp.*
+}
+
+# Import common functions.
+. ./common/filter
+. ./common/fuzzy
+. ./common/xfs
+
+# real QA test starts here
+_supported_fs xfs
+_require_xfs_scratch_rmapbt
+_require_xfs_io_command "fsmap"
+_require_xfs_stress_scrub
+
+_scratch_mkfs > "$seqres.full" 2>&1
+_scratch_mount
+_scratch_xfs_stress_scrub -r 5 -i 'fsmap -v'
+
+# success, all done
+echo "Silence is golden"
+status=0
+exit
diff --git a/tests/xfs/560.out b/tests/xfs/560.out
new file mode 100644 (file)
index 0000000..841fd90
--- /dev/null
@@ -0,0 +1,2 @@
+QA output created by 560
+Silence is golden