#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2015 SUSE Linux Products GmbH. All Rights Reserved.
#
# FSQA Test No. 098
#
# Test that if we fsync a file that got one extent partially cloned into a
# lower file offset, after a power failure our file has the same content it
# had before the power failure and after the extent cloning operation.
#
seq=`basename $0`
seqres=$RESULT_DIR/$seq
echo "QA output created by $seq"
tmp=/tmp/$$
status=1	# failure is the default!
trap "_cleanup; exit \$status" 0 1 2 3 15

_cleanup()
{
	_cleanup_flakey
	rm -f $tmp.*
}

# get standard environment, filters and checks
. ./common/rc
. ./common/filter
. ./common/dmflakey

# real QA test starts here
_supported_fs btrfs
_require_scratch
_require_dm_target flakey
_require_cloner

rm -f $seqres.full

_scratch_mkfs >>$seqres.full 2>&1
_require_metadata_journaling $SCRATCH_DEV
_init_flakey
_mount_flakey

BLOCK_SIZE=$(_get_block_size $SCRATCH_MNT)

# Create our test file with a single 25 block extent starting at file offset
# mapped by 200th block We fsync the file here to make the fsync log tree get a
# single csum item that covers the whole 25 block extent, which causes the
# second fsync, done after the cloning operation below, to not leave in the log
# tree two csum items covering two block sub-ranges ([0, 5[ and [5, 25[)) of our
# extent.
$XFS_IO_PROG -f -c "pwrite -S 0xaa $((200 * $BLOCK_SIZE)) $((25 * $BLOCK_SIZE))" \
		-c "fsync"                     \
		$SCRATCH_MNT/foo | _filter_xfs_io_blocks_modified


# Now clone part of our extent into file offset mapped by 100th block. This adds
# a file extent item to our inode's metadata that points to the 25 block extent
# we created before, using a data offset of 5 blocks and a data length of 5
# blocks, so that it refers to the block sub-range [5, 10[ of our original
# extent.
$CLONER_PROG -s $(((200 * $BLOCK_SIZE) + (5 * $BLOCK_SIZE))) \
	     -d $((100 * $BLOCK_SIZE)) -l $((5 * $BLOCK_SIZE)) \
	     $SCRATCH_MNT/foo $SCRATCH_MNT/foo

# Now fsync our file to make sure the extent cloning is durably persisted. This
# fsync will not add a second csum item to the log tree containing the checksums
# for the blocks in the block sub-range [5, 10[ of our extent, because there was
# already a csum item in the log tree covering the whole extent, added by the
# first fsync we did before.
$XFS_IO_PROG -c "fsync" $SCRATCH_MNT/foo

echo "File contents before power failure:"
od -t x1 $SCRATCH_MNT/foo | _filter_od

# The fsync log replay first processes the file extent item corresponding to the
# file offset mapped by 100th block (the one which refers to the [5, 10[ block
# sub-range of our 25 block extent) and then processes the file extent item for
# file offset mapped by 200th block. It used to happen that when processing the
# later, it erroneously left in the csum tree 2 csum items that overlapped each
# other, 1 for the block sub-range [5, 10[ and 1 for the whole range of our
# extent. This introduced a problem where subsequent lookups for the checksums
# of blocks within the block range [10, 25[ of our extent would not find
# anything because lookups in the csum tree ended up looking only at the smaller
# csum item, the one covering the block subrange [5, 10[. This made read
# requests assume an expected checksum with a value of 0 for those blocks, which
# caused checksum verification failure when the read operations finished.
# However those checksum failure did not result in read requests returning an
# error to user space (like -EIO for e.g.) because the expected checksum value
# had the special value 0, and in that case btrfs set all bytes of the
# corresponding pages with the value 0x01 and produce the following warning in
# dmesg/syslog:
#
#  "BTRFS warning (device dm-0): csum failed ino 257 off 917504 csum 1322675045\
#    expected csum 0"
#
_flakey_drop_and_remount

echo "File contents after log replay:"
# Must match the file contents we had after cloning the extent and before
# the power failure happened.
od -t x1 $SCRATCH_MNT/foo | _filter_od

_unmount_flakey

status=0
exit