tests/btrfs/236

   1 #! /bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 # Copyright (C) 2021 SUSE Linux Products GmbH. All Rights Reserved.
   4 #
   5 # FSQA Test No. 236
   6 #
   7 # Test for fsync data loss after renaming a file or adding a hard link, with a
   8 # previous fsync of another file, as well as that mtime and ctime are correct.
   9 # Test both with COW and NOCOW writes.
  10 #
  11 seq=`basename $0`
  12 seqres=$RESULT_DIR/$seq
  13 echo "QA output created by $seq"
  14 tmp=/tmp/$$
  15 status=1        # failure is the default!
  16 trap "_cleanup; exit \$status" 0 1 2 3 15
  17
  18 _cleanup()
  19 {
  20         _cleanup_flakey
  21         cd /
  22         rm -f $tmp.*
  23 }
  24
  25 # get standard environment, filters and checks
  26 . ./common/rc
  27 . ./common/filter
  28 . ./common/dmflakey
  29
  30 # real QA test starts here
  31 _supported_fs btrfs
  32 _require_scratch
  33 _require_dm_target flakey
  34
  35 rm -f $seqres.full
  36
  37 # The comments inside the function mentioning specific inode numbers and IDs
  38 # (transactions, log commits, etc) are for the case where the function is run
  39 # on a freshly created filesystem, but the logic and reasoning still applies
  40 # for future invocations.
  41 test_fsync()
  42 {
  43         local suffix=$1
  44         local op_type=$2
  45         local foo="$SCRATCH_MNT/foo_$suffix"
  46         local bar="$SCRATCH_MNT/bar_$suffix"
  47         local baz="$SCRATCH_MNT/baz_$suffix"
  48         local digest_before
  49         local digest_after
  50         local mtime_before
  51         local ctime_before
  52         local mtime_after
  53         local ctime_after
  54
  55         if [ "$op_type" != "rename" ] && [ "$op_type" != "link" ]; then
  56                 _fail "Operation type, 2nd argument, must be 'rename' or 'link'"
  57         fi
  58
  59         # Create an extra empty file to be used later for syncing the log and
  60         # bumping the committed log transaction ID by +1.
  61         touch $bar
  62
  63         # This is the file we are interested in testing for data loss after an
  64         # fsync. Create it and fill it with initial data, then fsync it.
  65         #
  66         # Before the fsync the inode has the full sync flag set, the current
  67         # log transaction has an ID of 0 (first log transaction), the inode's
  68         # ->logged_trans is 0, ->last_sub_trans is 0 and ->last_log_commit is -1.
  69         #
  70         # After the fsync the full sync flag is not set anymore, ->logged_trans
  71         # is 6 (generation of the current transaction), ->last_log_commit is 0
  72         # (which is the value of ->last_sub_trans) and ->last_sub_trans remains
  73         # as 0.
  74         #
  75         # Also, after the fsync, the log transaction ID is bumped from 0 to 1.
  76         #
  77         $XFS_IO_PROG -f -c "pwrite -S 0xab 0 1M" -c "fsync" $baz >>$seqres.full
  78         # File bar is inode 257.
  79         # File baz is inode 258.
  80
  81         # During the rename or link operation below the following happens:
  82         #
  83         # We update inode 258 and that causes its ->last_sub_trans to be set to
  84         # 1 (the current log transaction ID), and its ->last_log_commit remains
  85         # with a value of 0 (meaning it was committed in the previous log
  86         # transaction).
  87         #
  88         # After updating inode 258, because we have previously fsynced it, we
  89         # log again the inode because it has a new name/dentry now. This results
  90         # in updating its ->last_log_commit from 0 to 1 (the current value of
  91         # its ->last_sub_trans).
  92         #
  93         if [ "$op_type" == "rename" ]; then
  94                 mv $baz $foo
  95         else
  96                 ln $baz $foo
  97         fi
  98         # File bar is inode 257.
  99         # File foo is inode 258.
 100
 101         # Before the next write, sleep for 1 second so that we can test if mtime
 102         # and ctime are preserved after the power failure.
 103         sleep 1
 104
 105         # This buffered write leaves ->last_sub_trans of inode 258 as 1, the ID
 106         # of the current log transaction (inode->root->log_transid).
 107         $XFS_IO_PROG -c "pwrite -S 0xcd 0 1M" $foo >>$seqres.full
 108
 109         # The fsync against inode 257, file bar, results in committing the log
 110         # transaction with ID 1, updating inode->root->last_log_commit to 1, and
 111         # bumping root->log_transid from 1 to 2.
 112         $XFS_IO_PROG -c "fsync" $bar
 113
 114         # Now on fsync of inode 258, file foo, delalloc is flushed and, because
 115         # the inode does not have the full sync flag set anymore, it only waits
 116         # for the writeback to finish, it does not wait for the ordered extent
 117         # to complete.
 118         #
 119         # If the ordered extent does not complete before btrfs_sync_file() calls
 120         # btrfs_inode_in_log(), then we would not update the inode the log and
 121         # sync the log, resulting in a guaranteed data loss after a power failure
 122         # for COW writes and with slim chances for data loss as well for NOCOW
 123         # writes, because the fsync would return success to user space without
 124         # issuing barriers (REQ_PREFLUSH not sent to the block layer).
 125         # That happened because btrfs_inode_in_log() would return true before the
 126         # ordered extent completes, as in that case the inode has ->last_sub_trans
 127         # set to 1, ->last_log_commit as 1 and root->last_log_commit is 1 as well.
 128         #
 129         # Also, for both COW and NOCOW writes, when the race happens we ended up
 130         # not logging the inode, resulting in an outdated mtime and ctime after
 131         # replaying the log.
 132         #
 133         $XFS_IO_PROG -c "fsync" $foo
 134
 135         # Simulate a power failure and then mount again the filesystem to replay
 136         # the log tree.
 137         # After the power failure We expect $foo, inode 258, to have 1M of data
 138         # full of bytes with a value of 0xcd, and not 0xab.
 139
 140         digest_before=$(_md5_checksum $foo)
 141         mtime_before=$(stat -c %Y $foo)
 142         ctime_before=$(stat -c %Z $foo)
 143
 144         _flakey_drop_and_remount
 145
 146         digest_after=$(_md5_checksum $foo)
 147         mtime_after=$(stat -c %Y $foo)
 148         ctime_after=$(stat -c %Z $foo)
 149
 150         if [ $digest_after != $digest_before ]; then
 151                 echo -n "Incorrect data after log replay, "
 152                 echo "expected digest: $digest_before got: $digest_after"
 153         fi
 154
 155         if [ $mtime_after != $mtime_before ]; then
 156                 echo "mtime not preserved, expected: $mtime_before got: $mtime_after"
 157         fi
 158
 159         if [ $ctime_after != $ctime_before ]; then
 160                 echo "ctime not preserved, expected: $ctime_before got: $ctime_after"
 161         fi
 162 }
 163
 164 # Test first with data cow (it's the default, but be explicit to make it clear).
 165 _scratch_mkfs >>$seqres.full 2>&1
 166 _require_metadata_journaling $SCRATCH_DEV
 167 _init_flakey
 168 MOUNT_OPTIONS="-o datacow"
 169 _mount_flakey
 170
 171 # Test a few times each scenario because this test was motivated by a race
 172 # condition.
 173 echo "Testing fsync after rename with COW writes"
 174 for ((i = 1; i <= 3; i++)); do
 175         test_fsync "rename_cow_$i" "rename"
 176 done
 177 echo "Testing fsync after link with COW writes"
 178 for ((i = 1; i <= 3; i++)); do
 179         test_fsync "link_cow_$i" "link"
 180 done
 181
 182 # Now lets test with nodatacow.
 183 _unmount_flakey
 184 MOUNT_OPTIONS="-o nodatacow"
 185 _mount_flakey
 186
 187 echo "Testing fsync after rename with NOCOW writes"
 188 for ((i = 1; i <= 3; i++)); do
 189         test_fsync "rename_nocow_$i" "rename"
 190 done
 191 echo "Testing fsync after link with NOCOW writes"
 192 for ((i = 1; i <= 3; i++)); do
 193         test_fsync "link_nocow_$i" "link"
 194 done
 195
 196 _unmount_flakey
 197
 198 status=0
 199 exit