btrfs: test for deadlock between snapshot delete and other read-write operations
authorQu Wenruo <wqu@suse.com>
Fri, 11 Jan 2019 05:01:51 +0000 (13:01 +0800)
committerEryu Guan <guaneryu@gmail.com>
Sat, 12 Jan 2019 07:44:04 +0000 (15:44 +0800)
Commit fb235dc06fac ("btrfs: qgroup: Move half of the qgroup
accounting time out of commit trans") could cause ABBA deadlock
between backref lookup with write lock hold (subvolume deletion) and
other read/write operations.

It's going to be fixed by "btrfs: qgroup: Don't trigger backref walk
at delayed ref insert time".

This test will generate pwrite background workload, along with
constant subvolume creation and deletion to trigger the bug.

It needs some time to generate enough files to bump the tree height
to trigger the bug.

In my test environment, with 'unsafe' cache mode for the VM, it
triggers the bug at around 70~90 seconds. So I leave the default
runtime to 120s to make sure the bug will be triggered.

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: Eryu Guan <guaneryu@gmail.com>
tests/btrfs/179 [new file with mode: 0755]
tests/btrfs/179.out [new file with mode: 0644]
tests/btrfs/group

diff --git a/tests/btrfs/179 b/tests/btrfs/179
new file mode 100755 (executable)
index 0000000..4a24ea4
--- /dev/null
@@ -0,0 +1,116 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2019 SUSE Linux Products GmbH. All Rights Reserved.
+#
+# FS QA Test 179
+#
+# Test if btrfs will lockup at subvolume deletion when qgroups are enabled.
+#
+# This bug is going to be fixed by a patch for the kernel titled
+# "btrfs: qgroup: Don't trigger backref walk at delayed ref insert time".
+#
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1       # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+       cd /
+       rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+
+# Modify as appropriate.
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+# default sleep interval
+sleep_time=1
+
+# stress test runtime
+runtime=120
+
+_scratch_mkfs > /dev/null 2>&1
+_scratch_mount
+
+mkdir -p "$SCRATCH_MNT/snapshots"
+$BTRFS_UTIL_PROG subvolume create "$SCRATCH_MNT/src" > /dev/null
+$BTRFS_UTIL_PROG quota enable "$SCRATCH_MNT" > /dev/null
+$BTRFS_UTIL_PROG quota rescan -w "$SCRATCH_MNT" > /dev/null
+
+fill_workload()
+{
+       trap "wait; exit" SIGTERM
+       local i=0
+       while true; do
+               _pwrite_byte 0xcd 0 8K "$SCRATCH_MNT/src/large_$i" > /dev/null
+               _pwrite_byte 0xcd 0 2K "$SCRATCH_MNT/src/inline_$i" > /dev/null
+
+               # Randomly remove some files for every 5 loop
+               if [ $(( $i % 5 )) -eq 0 ]; then
+                       victim=$(ls "$SCRATCH_MNT/src" | sort -R | head -n1)
+                       rm "$SCRATCH_MNT/src/$victim"
+               fi
+               i=$((i + 1))
+       done
+}
+
+snapshot_workload()
+{
+       trap "wait; exit" SIGTERM
+       local i=0
+       while true; do
+               sleep $sleep_time
+               $BTRFS_UTIL_PROG subvolume snapshot "$SCRATCH_MNT/src" \
+                       "$SCRATCH_MNT/snapshots/$i" > /dev/null
+               i=$((i + 1))
+       done
+}
+
+delete_workload()
+{
+       trap "wait; exit" SIGTERM
+       while true; do
+               sleep $((sleep_time * 2))
+               victim=$(ls "$SCRATCH_MNT/snapshots" | sort -R | head -n1)
+               $BTRFS_UTIL_PROG subvolume delete \
+                       "$SCRATCH_MNT/snapshots/$victim" > /dev/null
+       done
+}
+
+fill_workload &
+fill_pid=$!
+
+sleep $((sleep_time * 2))
+snapshot_workload &
+snapshot_pid=$!
+delete_workload &
+delete_pid=$!
+
+sleep $runtime
+kill $fill_pid
+wait $fill_pid
+kill $snapshot_pid
+wait $snapshot_pid
+kill $delete_pid
+wait $delete_pid
+
+# success, all done
+echo "Silence is golden"
+
+status=0
+exit
diff --git a/tests/btrfs/179.out b/tests/btrfs/179.out
new file mode 100644 (file)
index 0000000..cb9eba3
--- /dev/null
@@ -0,0 +1,2 @@
+QA output created by 179
+Silence is golden
index 04c0254aa4bfe53579ab9a70c61457272686b1e1..46dd3c9523c22804058f6da12d90ebba1aec94f0 100644 (file)
 176 auto quick swap volume
 177 auto quick swap balance
 178 auto quick send
+179 auto qgroup dangerous