tests/btrfs/240

   1 #! /bin/bash
   2 # SPDX-License-Identifier: GPL-2.0
   3 # Copyright (C) 2021 SUSE Linux Products GmbH. All Rights Reserved.
   4 #
   5 # FSQA Test No. 240
   6 #
   7 # Test a scenario where we do several partial writes into multiple preallocated
   8 # extents across two transactions and with several fsyncs in between. The goal
   9 # is to check that the fsyncs succeed. This scenario used to trigger an -EIO
  10 # failure on the last fsync and turn the filesystem to RO mode because of a
  11 # transaction abort.
  12 #
  13 . ./common/preamble
  14 _begin_fstest auto quick prealloc log
  15
  16 # Override the default cleanup function.
  17 _cleanup()
  18 {
  19         _cleanup_flakey
  20         cd /
  21         rm -f $tmp.*
  22 }
  23
  24 # Import common functions.
  25 . ./common/filter
  26 . ./common/dmflakey
  27
  28 # real QA test starts here
  29 _supported_fs btrfs
  30 _require_scratch
  31 _require_dm_target flakey
  32 _require_xfs_io_command "falloc"
  33
  34 _scratch_mkfs >>$seqres.full 2>&1
  35 _require_metadata_journaling $SCRATCH_DEV
  36 _init_flakey
  37 _mount_flakey
  38
  39 # Create our test file with 2 preallocated extents. Leave a 1M hole between them
  40 # to ensure that we get two file extent items that will never be merged into a
  41 # single one. The extents are contiguous on disk, which will later result in the
  42 # checksums for their data to be merged into a single checksum item in the csums
  43 # btree.
  44 #
  45 $XFS_IO_PROG -f \
  46              -c "falloc 0 1M" \
  47              -c "falloc 3M 3M" \
  48              $SCRATCH_MNT/foobar
  49
  50 # Now write to the second extent and leave only 1M of it as unwritten, which
  51 # corresponds to the file range [4M, 5M[.
  52 #
  53 # Then fsync the file to flush delalloc and to clear full sync flag from the
  54 # inode, so that a future fsync will use the fast code path.
  55 #
  56 # After the writeback triggered by the fsync we have 3 file extent items that
  57 # point to the second extent we previously allocated with fallocate():
  58 #
  59 # 1) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the file
  60 #    range [3M, 4M[
  61 #
  62 # 2) One file extent item of type BTRFS_FILE_EXTENT_PREALLOC that covers the
  63 #    file range [4M, 5M[
  64 #
  65 # 3) One file extent item of type BTRFS_FILE_EXTENT_REG that covers the file
  66 #    range [5M, 6M[
  67 #
  68 # All these file extent items have a generation of 6, which is the ID of the
  69 # transaction where they were created. The split of the original file extent
  70 # item is done at btrfs_mark_extent_written() when ordered extents complete for
  71 # the file ranges [3M, 4M[ and [5M, 6M[.
  72 #
  73 $XFS_IO_PROG -c "pwrite -S 0xab 3M 1M" \
  74              -c "pwrite -S 0xef 5M 1M" \
  75              -c "fsync" \
  76              $SCRATCH_MNT/foobar | _filter_xfs_io
  77
  78 # Commit the current transaction. This wipes out the log tree created by the
  79 # previous fsync.
  80 sync
  81
  82 # Now write to the unwritten range of the second extent we allocated,
  83 # corresponding to the file range [4M, 5M[, and fsync the file, which triggers
  84 # the fast fsync code path.
  85 #
  86 # The fast fsync code path sees that there is a new extent map covering the file
  87 # range [4M, 5M[ and therefore it will log a checksum item covering the range
  88 # [1M, 2M[ of the second extent we allocated.
  89 #
  90 # Also, after the fsync finishes we no longer have the 3 file extent items that
  91 # pointed to 3 sections of the second extent we allocated. Instead we end up
  92 # with a single file extent item pointing to the whole extent, with a type of
  93 # BTRFS_FILE_EXTENT_REG and a generation of 7 (the current transaction ID). This
  94 # is due to the file extent item merging we do when completing ordered extents
  95 # into ranges that point to unwritten (preallocated) extents. This merging is
  96 # done at btrfs_mark_extent_written().
  97 #
  98 $XFS_IO_PROG -c "pwrite -S 0xcd 4M 1M" \
  99              -c "fsync" \
 100              $SCRATCH_MNT/foobar | _filter_xfs_io
 101
 102 # Now do some write to our file outside the range of the second extent that we
 103 # allocated with fallocate() and truncate the file size from 6M down to 5M.
 104 #
 105 # The truncate operation sets the full sync runtime flag on the inode, forcing
 106 # the next fsync to use the slow code path. It also changes the length of the
 107 # second file extent item so that it represents the file range [3M, 5M[ and not
 108 # the range [3M, 6M[ anymore.
 109 #
 110 # Finally fsync the file. Since this is a fsync that triggers the slow code path,
 111 # it will remove all items associated to the inode from the log tree and then it
 112 # will scan for file extent items in the fs/subvolume tree that have a generation
 113 # matching the current transaction ID, which is 7. This means it will log 2 file
 114 # extent items:
 115 #
 116 # 1) One for the first extent we allocated, covering the file range [0, 1M[
 117 #
 118 # 2) Another for the first 2M of the second extent we allocated, covering the
 119 #    file range [3M, 5M[
 120 #
 121 # When logging the first file extent item we log a single checksum item that has
 122 # all the checksums for the entire extent.
 123 #
 124 # When logging the second file extent item, we also lookup for the checksums that
 125 # are associated with the range [0, 2M[ of the second extent we allocated (file
 126 # range [3M, 5M[), and then we log them with btrfs_csum_file_blocks(). However
 127 # that results in ending up with a log that has two checksum items with ranges
 128 # that overlap:
 129 #
 130 # 1) One for the range [1M, 2M[ of the second extent we allocated, corresponding
 131 #    to the file range [4M, 5M[, which we logged in the previous fsync that used
 132 #    the fast code path;
 133 #
 134 # 2) One for the ranges [0, 1M[ and [0, 2M[ of the first and second extents,
 135 #    respectively, corresponding to the files ranges [0, 1M[ and [3M, 5M[.
 136 #    This one was added during this last fsync that uses the slow code path
 137 #    and overlaps with the previous one logged by the previous fast fsync.
 138 #
 139 # This happens because when logging the checksums for the second extent, we
 140 # notice they start at an offset that matches the end of the checksums item that
 141 # we logged for the first extent, and because both extents are contiguous on
 142 # disk, btrfs_csum_file_blocks() decides to extend that existing checksums item
 143 # and append the checksums for the second extent to this item. The end result is
 144 # we end up with two checksum items in the log tree that have overlapping ranges,
 145 # as listed before, resulting in the fsync to fail with -EIO and aborting the
 146 # transaction, turning the filesystem into RO mode.
 147 #
 148 $XFS_IO_PROG -c "pwrite -S 0xff 0 1M" \
 149              -c "truncate 5M" \
 150              -c "fsync" \
 151              $SCRATCH_MNT/foobar | _filter_xfs_io
 152
 153 echo "File content before power failure:"
 154 od -A d -t x1 $SCRATCH_MNT/foobar
 155
 156 # Simulate a power failure and mount again the filesystem. The file content
 157 # must be the same that we had before.
 158 _flakey_drop_and_remount
 159
 160 echo "File content before after failure:"
 161 od -A d -t x1 $SCRATCH_MNT/foobar
 162
 163 _unmount_flakey
 164
 165 status=0
 166 exit