From: Qu Wenruo <wqu@suse.com>
Date: Wed, 27 Jul 2022 05:41:48 +0000 (+0800)
Subject: btrfs: add test case to make sure btrfs can handle one corrupted device
X-Git-Tag: v2022.07.31~3
X-Git-Url: http://git-server-git.apps.pok.os.sepia.ceph.com/?a=commitdiff_plain;h=9347bb488cde33b54c963c00d4e5db8ab8271579;p=xfstests-dev.git

btrfs: add test case to make sure btrfs can handle one corrupted device

The new test case will verify that btrfs can handle one corrupted device
without affecting the consistency of the filesystem.

Unlike a missing device, one corrupted device can return garbage to the fs,
thus btrfs has to utilize its data/metadata checksum to verify which
data is correct.

The test case will:

- Create a small fs
  Mostly to speedup the test

- Fill the fs with a regular file

- Use fsstress to create some contents

- Save the fssum for later verification

- Corrupt one device with garbage but keep the primary superblock
  untouched

- Run fssum verification

- Run scrub to fix the fs

- Run scrub again to make sure the fs is fine

Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Boris Burkov <boris@bur.io>
Reviewed-by: Zorro Lang <zlang@redhat.com>
Signed-off-by: Zorro Lang <zlang@kernel.org>
---

diff --git a/tests/btrfs/261 b/tests/btrfs/261
new file mode 100755
index 00000000..21567052
--- /dev/null
+++ b/tests/btrfs/261
@@ -0,0 +1,87 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2022 SUSE Linux Products GmbH. All Rights Reserved.
+#
+# FS QA Test 261
+#
+# Make sure btrfs raid profiles can handling one corrupted device
+# without affecting the consistency of the fs.
+#
+. ./common/preamble
+_begin_fstest auto volume raid
+
+_supported_fs btrfs
+_require_scratch_dev_pool 4
+_btrfs_get_profile_configs replace-missing
+_require_fssum
+
+prepare_fs()
+{
+	local mkfs_opts=$1
+
+	# We don't want too large fs which can take too long to populate
+	# And the extra redirection of stderr is to avoid the RAID56 warning
+	# message to polluate the golden output
+	_scratch_pool_mkfs $mkfs_opts -b 1G >> $seqres.full 2>&1
+	if [ $? -ne 0 ]; then
+		_fail "mkfs $mkfs_opts failed"
+	fi
+
+	# Disable compression, as compressed read repair is known to have problems
+	_scratch_mount -o compress=no
+
+	# Fill some part of the fs first
+	$XFS_IO_PROG -f -c "pwrite -S 0xfe 0 400M" $SCRATCH_MNT/garbage > /dev/null 2>&1
+
+	# Then use fsstress to generate some extra contents.
+	# Disable setattr related operations, as it may set NODATACOW which will
+	# not allow us to use btrfs checksum to verify the content.
+	$FSSTRESS_PROG -f setattr=0 -d $SCRATCH_MNT -w -n 3000 > /dev/null 2>&1
+	sync
+
+	# Save the fssum of this fs
+	$FSSUM_PROG -A -f -w $tmp.saved_fssum $SCRATCH_MNT
+	_scratch_unmount
+}
+
+workload()
+{
+	local mkfs_opts=$1
+	local num_devs=$2
+
+	_scratch_dev_pool_get 4
+	echo "=== Testing profile $mkfs_opts ===" >> $seqres.full
+	rm -f -- $tmp.saved_fssum
+	prepare_fs "$mkfs_opts"
+
+	# $SCRATCH_DEV is always the first device of dev pool.
+	# Corrupt the disk but keep the primary superblock.
+	$XFS_IO_PROG -c "pwrite 1M 1023M" $SCRATCH_DEV > /dev/null 2>&1
+
+	_scratch_mount
+
+	# All content should be fine
+	$FSSUM_PROG -r $tmp.saved_fssum $SCRATCH_MNT > /dev/null
+
+	# Scrub to fix the fs, this is known to report various correctable
+	# errors
+	$BTRFS_UTIL_PROG scrub start -B $SCRATCH_MNT >> $seqres.full 2>&1
+
+	# Make sure above scrub fixed the fs
+	$BTRFS_UTIL_PROG scrub start -Br $SCRATCH_MNT >> $seqres.full
+	if [ $? -ne 0 ]; then
+		echo "scrub failed to fix the fs for profile $mkfs_opts"
+	fi
+	_scratch_unmount
+	_scratch_dev_pool_put
+}
+
+for t in "${_btrfs_profile_configs[@]}"; do
+	workload "$t"
+done
+
+echo "Silence is golden"
+
+# success, all done
+status=0
+exit
diff --git a/tests/btrfs/261.out b/tests/btrfs/261.out
new file mode 100644
index 00000000..679ddc0f
--- /dev/null
+++ b/tests/btrfs/261.out
@@ -0,0 +1,2 @@
+QA output created by 261
+Silence is golden