tests/generic/038

   1 #! /bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 # Copyright (C) 2014 SUSE Linux Products GmbH. All Rights Reserved.
   4 #
   5 # FSQA Test No. 038
   6 #
   7 # This test was motivated by btrfs issues, but it's generic enough as it
   8 # doesn't use any btrfs specific features.
   9 #
  10 # Stress btrfs' block group allocation and deallocation while running fstrim in
  11 # parallel. Part of the goal is also to get data block groups deallocated so
  12 # that new metadata block groups, using the same physical device space ranges,
  13 # get allocated while fstrim is running. This caused several issues ranging
  14 # from invalid memory accesses, kernel crashes, metadata or data corruption,
  15 # free space cache inconsistencies, free space leaks and memory leaks.
  16 #
  17 # These issues were fixed by the following btrfs linux kernel patches:
  18 #
  19 #   Btrfs: fix invalid block group rbtree access after bg is removed
  20 #   Btrfs: fix crash caused by block group removal
  21 #   Btrfs: fix freeing used extents after removing empty block group
  22 #   Btrfs: fix race between fs trimming and block group remove/allocation
  23 #   Btrfs: fix race between writing free space cache and trimming
  24 #   Btrfs: make btrfs_abort_transaction consider existence of new block groups
  25 #   Btrfs: fix memory leak after block remove + trimming
  26 #   Btrfs: fix fs mapping extent map leak
  27 #   Btrfs: fix unprotected deletion from pending_chunks list
  28 #
  29 # The issues were found on a qemu/kvm guest with 4 virtual CPUs, 4Gb of ram and
  30 # scsi-hd devices with discard support enabled (that means hole punching in the
  31 # disk's image file is performed by the host).
  32 #
  33 seq=`basename $0`
  34 seqres=$RESULT_DIR/$seq
  35 echo "QA output created by $seq"
  36
  37 tmp=/tmp/$$
  38 status=1        # failure is the default!
  39 trap "_cleanup; exit \$status" 0 1 2 3 15
  40
  41 _cleanup()
  42 {
  43         rm -fr $tmp
  44 }
  45
  46 # get standard environment, filters and checks
  47 . ./common/rc
  48 . ./common/filter
  49
  50 # real QA test starts here
  51 _supported_fs generic
  52 _supported_os Linux
  53 _require_scratch
  54 _require_xfs_io_command "falloc"
  55
  56 rm -f $seqres.full
  57
  58 # Keep allocating and deallocating 1G of data space with the goal of creating
  59 # and deleting 1 block group constantly. The intention is to race with the
  60 # fstrim loop below.
  61 fallocate_loop()
  62 {
  63         # Wait for running subcommand before exitting so that
  64         # mountpoint is not busy when we try to unmount it
  65         trap "wait; exit" SIGTERM
  66
  67         local name=$1
  68         while true; do
  69                 $XFS_IO_PROG -f -c "falloc -k 0 1G" \
  70                         $SCRATCH_MNT/$name &> /dev/null
  71                 sleep 3
  72                 $XFS_IO_PROG -c "truncate 0" \
  73                         $SCRATCH_MNT/$name &> /dev/null
  74                 sleep 3
  75         done
  76 }
  77
  78 trim_loop()
  79 {
  80         # Wait for running subcommand before exitting so that
  81         # mountpoint is not busy when we try to unmount it
  82         trap "wait; exit" SIGTERM
  83
  84         while true; do
  85                 $FSTRIM_PROG $SCRATCH_MNT
  86         done
  87 }
  88
  89 # Create a bunch of small files that get their single extent inlined in the
  90 # btree, so that we consume a lot of metadata space and get a chance of a
  91 # data block group getting deleted and reused for metadata later. Sometimes
  92 # the creation of all these files succeeds other times we get ENOSPC failures
  93 # at some point - this depends on how fast the btrfs' cleaner kthread is
  94 # notified about empty block groups, how fast it deletes them and how fast
  95 # the fallocate calls happen. So we don't really care if they all succeed or
  96 # not, the goal is just to keep metadata space usage growing while data block
  97 # groups are deleted.
  98 #
  99 # Creating 200,000 files sequentially is really slow, so speed it up a bit
 100 # by doing it concurrently with 4 threads in 4 separate directories.
 101 nr_files=$((50000 * LOAD_FACTOR))
 102 create_files()
 103 {
 104         local prefix=$1
 105
 106         for ((n = 0; n < 4; n++)); do
 107                 mkdir $SCRATCH_MNT/$n
 108                 (
 109                 trap "wait; exit" SIGTERM
 110
 111                 for ((i = 1; i <= $nr_files; i++)); do
 112                         $XFS_IO_PROG -f -c "pwrite -S 0xaa 0 3900" \
 113                                 $SCRATCH_MNT/$n/"${prefix}_$i" &> /dev/null
 114                         if [ $? -ne 0 ]; then
 115                                 echo "Failed creating file $n/${prefix}_$i" >>$seqres.full
 116                                 break
 117                         fi
 118                 done
 119                 ) &
 120                 create_pids[$n]=$!
 121         done
 122
 123         wait ${create_pids[@]}
 124
 125 }
 126
 127 _scratch_mkfs >>$seqres.full 2>&1
 128 _scratch_mount
 129 _require_fs_space $SCRATCH_MNT $((10 * 1024 * 1024))
 130 _require_batched_discard $SCRATCH_MNT
 131
 132 for ((i = 0; i < $((4 * $LOAD_FACTOR)); i++)); do
 133         trim_loop &
 134         trim_pids[$i]=$!
 135 done
 136
 137 for ((i = 0; i < $((1 * $LOAD_FACTOR)); i++)); do
 138         fallocate_loop "falloc_file_$i" &
 139         fallocate_pids[$i]=$!
 140 done
 141
 142 create_files "foobar"
 143
 144 kill ${fallocate_pids[@]}
 145 kill ${trim_pids[@]}
 146 wait
 147
 148 # The fstests framework will now check for fs consistency with fsck.
 149 # The trimming was racy and caused some btree nodes to get full of zeroes on
 150 # disk, which obviously caused fs metadata corruption. The race often lead
 151 # to missing free space entries in a block group's free space cache too.
 152
 153 echo "Silence is golden"
 154 status=0
 155 exit