#! /bin/bash # SPDX-License-Identifier: GPL-2.0 # Copyright (C) 2014 SUSE Linux Products GmbH. All Rights Reserved. # # FSQA Test No. 038 # # This test was motivated by btrfs issues, but it's generic enough as it # doesn't use any btrfs specific features. # # Stress btrfs' block group allocation and deallocation while running fstrim in # parallel. Part of the goal is also to get data block groups deallocated so # that new metadata block groups, using the same physical device space ranges, # get allocated while fstrim is running. This caused several issues ranging # from invalid memory accesses, kernel crashes, metadata or data corruption, # free space cache inconsistencies, free space leaks and memory leaks. # # These issues were fixed by the following btrfs linux kernel patches: # # Btrfs: fix invalid block group rbtree access after bg is removed # Btrfs: fix crash caused by block group removal # Btrfs: fix freeing used extents after removing empty block group # Btrfs: fix race between fs trimming and block group remove/allocation # Btrfs: fix race between writing free space cache and trimming # Btrfs: make btrfs_abort_transaction consider existence of new block groups # Btrfs: fix memory leak after block remove + trimming # Btrfs: fix fs mapping extent map leak # Btrfs: fix unprotected deletion from pending_chunks list # # The issues were found on a qemu/kvm guest with 4 virtual CPUs, 4Gb of ram and # scsi-hd devices with discard support enabled (that means hole punching in the # disk's image file is performed by the host). # seq=`basename $0` seqres=$RESULT_DIR/$seq echo "QA output created by $seq" tmp=/tmp/$$ status=1 # failure is the default! trap "_cleanup; exit \$status" 0 1 2 3 15 _cleanup() { rm -fr $tmp } # get standard environment, filters and checks . ./common/rc . ./common/filter # real QA test starts here _supported_fs generic _supported_os Linux _require_scratch _require_xfs_io_command "falloc" rm -f $seqres.full # Keep allocating and deallocating 1G of data space with the goal of creating # and deleting 1 block group constantly. The intention is to race with the # fstrim loop below. fallocate_loop() { # Wait for running subcommand before exitting so that # mountpoint is not busy when we try to unmount it trap "wait; exit" SIGTERM local name=$1 while true; do $XFS_IO_PROG -f -c "falloc -k 0 1G" \ $SCRATCH_MNT/$name &> /dev/null sleep 3 $XFS_IO_PROG -c "truncate 0" \ $SCRATCH_MNT/$name &> /dev/null sleep 3 done } trim_loop() { # Wait for running subcommand before exitting so that # mountpoint is not busy when we try to unmount it trap "wait; exit" SIGTERM while true; do $FSTRIM_PROG $SCRATCH_MNT done } # Create a bunch of small files that get their single extent inlined in the # btree, so that we consume a lot of metadata space and get a chance of a # data block group getting deleted and reused for metadata later. Sometimes # the creation of all these files succeeds other times we get ENOSPC failures # at some point - this depends on how fast the btrfs' cleaner kthread is # notified about empty block groups, how fast it deletes them and how fast # the fallocate calls happen. So we don't really care if they all succeed or # not, the goal is just to keep metadata space usage growing while data block # groups are deleted. # # Creating 200,000 files sequentially is really slow, so speed it up a bit # by doing it concurrently with 4 threads in 4 separate directories. nr_files=$((50000 * LOAD_FACTOR)) create_files() { local prefix=$1 for ((n = 0; n < 4; n++)); do mkdir $SCRATCH_MNT/$n ( trap "wait; exit" SIGTERM for ((i = 1; i <= $nr_files; i++)); do $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 3900" \ $SCRATCH_MNT/$n/"${prefix}_$i" &> /dev/null if [ $? -ne 0 ]; then echo "Failed creating file $n/${prefix}_$i" >>$seqres.full break fi done ) & create_pids[$n]=$! done wait ${create_pids[@]} } _scratch_mkfs >>$seqres.full 2>&1 _scratch_mount _require_fs_space $SCRATCH_MNT $((10 * 1024 * 1024)) _require_batched_discard $SCRATCH_MNT for ((i = 0; i < $((4 * $LOAD_FACTOR)); i++)); do trim_loop & trim_pids[$i]=$! done for ((i = 0; i < $((1 * $LOAD_FACTOR)); i++)); do fallocate_loop "falloc_file_$i" & fallocate_pids[$i]=$! done create_files "foobar" kill ${fallocate_pids[@]} kill ${trim_pids[@]} wait # The fstests framework will now check for fs consistency with fsck. # The trimming was racy and caused some btree nodes to get full of zeroes on # disk, which obviously caused fs metadata corruption. The race often lead # to missing free space entries in a block group's free space cache too. echo "Silence is golden" status=0 exit