new: don't allow new tests in group 'other' master
authorDarrick J. Wong <djwong@kernel.org>
Fri, 17 Sep 2021 00:40:06 +0000 (17:40 -0700)
committerEryu Guan <guaneryu@gmail.com>
Sun, 26 Sep 2021 13:36:20 +0000 (21:36 +0800)
The 'other' group is vaguely defined at best -- other than what?  It's
not clear what tests belong in this group, and it has become a dumping
ground for random stuff that are classified in other groups.  Don't let
people create new other group tests.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Amir Goldstein <amir73il@gmail.com>
Signed-off-by: Eryu Guan <guaneryu@gmail.com>
140 files changed:
.gitignore
README
common/attr
common/btrfs
common/config
common/dmerror
common/dmflakey
common/module
common/preamble
common/rc
common/renameat2
common/scsi_debug
common/xfs
configure.ac
doc/group-names.txt [new file with mode: 0644]
ltp/fsstress.c
new
src/Makefile
src/dirstress.c
src/dmerror
src/fill2fs
src/idmapped-mounts/idmapped-mounts.c
src/idmapped-mounts/mount-idmapped.c
src/idmapped-mounts/utils.c
src/idmapped-mounts/utils.h
src/mmap-rw-fault.c [new file with mode: 0644]
tests/btrfs/003
tests/btrfs/011
tests/btrfs/012
tests/btrfs/023
tests/btrfs/049
tests/btrfs/057
tests/btrfs/116
tests/btrfs/124
tests/btrfs/131
tests/btrfs/136
tests/btrfs/140
tests/btrfs/141
tests/btrfs/142
tests/btrfs/143
tests/btrfs/146
tests/btrfs/150
tests/btrfs/151
tests/btrfs/156
tests/btrfs/157
tests/btrfs/158
tests/btrfs/175
tests/btrfs/177
tests/btrfs/177.out
tests/btrfs/179
tests/btrfs/194
tests/btrfs/195
tests/btrfs/197
tests/btrfs/198
tests/btrfs/215
tests/btrfs/220
tests/btrfs/233
tests/btrfs/236
tests/btrfs/244 [new file with mode: 0755]
tests/btrfs/244.out [new file with mode: 0644]
tests/btrfs/245 [new file with mode: 0755]
tests/btrfs/245.out [new file with mode: 0644]
tests/btrfs/246 [new file with mode: 0755]
tests/btrfs/246.out [new file with mode: 0644]
tests/btrfs/247 [new file with mode: 0755]
tests/btrfs/247.out [new file with mode: 0644]
tests/ceph/001
tests/ceph/002
tests/ceph/003
tests/ext4/006
tests/ext4/051 [new file with mode: 0755]
tests/ext4/051.out [new file with mode: 0644]
tests/ext4/052 [new file with mode: 0755]
tests/ext4/052.out [new file with mode: 0644]
tests/generic/081
tests/generic/108
tests/generic/128
tests/generic/361
tests/generic/452
tests/generic/457
tests/generic/459
tests/generic/471
tests/generic/570
tests/generic/628
tests/generic/629
tests/generic/631
tests/generic/633
tests/generic/642 [new file with mode: 0755]
tests/generic/642.out [new file with mode: 0644]
tests/generic/643 [new file with mode: 0755]
tests/generic/643.out [new file with mode: 0644]
tests/generic/644 [new file with mode: 0755]
tests/generic/644.out [new file with mode: 0644]
tests/generic/645 [new file with mode: 0755]
tests/generic/645.out [new file with mode: 0644]
tests/generic/646 [new file with mode: 0755]
tests/generic/646.out [new file with mode: 0644]
tests/generic/647 [new file with mode: 0755]
tests/generic/647.out [new file with mode: 0644]
tests/generic/648 [new file with mode: 0755]
tests/generic/648.out [new file with mode: 0644]
tests/generic/649 [new file with mode: 0755]
tests/generic/649.out [new file with mode: 0644]
tests/generic/650 [new file with mode: 0755]
tests/generic/650.out [new file with mode: 0644]
tests/overlay/077
tests/overlay/078
tests/shared/032
tests/xfs/108
tests/xfs/175 [new file with mode: 0755]
tests/xfs/175.out [new file with mode: 0644]
tests/xfs/176 [new file with mode: 0755]
tests/xfs/176.out [new file with mode: 0644]
tests/xfs/177 [new file with mode: 0755]
tests/xfs/177.out [new file with mode: 0644]
tests/xfs/185 [new file with mode: 0755]
tests/xfs/185.out [new file with mode: 0644]
tests/xfs/187
tests/xfs/187.out
tests/xfs/293
tests/xfs/419 [new file with mode: 0755]
tests/xfs/419.out [new file with mode: 0644]
tests/xfs/449
tests/xfs/491
tests/xfs/492
tests/xfs/493
tests/xfs/505
tests/xfs/514
tests/xfs/515
tests/xfs/519
tests/xfs/520
tests/xfs/535
tests/xfs/536
tests/xfs/540 [new file with mode: 0755]
tests/xfs/540.out [new file with mode: 0644]
tests/xfs/541 [new file with mode: 0755]
tests/xfs/541.out [new file with mode: 0644]
tools/mkgroupfile
tools/mvtest
tools/nextid

index 2d72b06..9e6d2fd 100644 (file)
@@ -105,6 +105,7 @@ tags
 /src/metaperf
 /src/mkswap
 /src/mmapcat
+/src/mmap-rw-fault
 /src/mmap-write-concurrent
 /src/multi_open_unlink
 /src/nametest
diff --git a/README b/README
index 18f7a6b..63f0641 100644 (file)
--- a/README
+++ b/README
@@ -117,6 +117,18 @@ Preparing system for tests:
                name of a file to compress; and it must accept '-d -f -k' and
                the name of a file to decompress.  In other words, it must
                emulate gzip.
+            - Set MIN_FSSIZE to specify the minimal size (bytes) of a
+               filesystem we can create. Setting this parameter will
+               skip the tests creating a filesystem less than
+               MIN_FSSIZE.
+            - Set MODPROBE_PATIENT_RM_TIMEOUT_SECONDS to specify the amount of
+              time we should try a patient module remove. The default is 50
+              seconds. Set this to "forever" and we'll wait forever until the
+              module is gone.
+             - Set FORCE_XFS_CHECK_PROG=yes to have _check_xfs_filesystem run
+               xfs_check to check the filesystem.  As of August 2021,
+               xfs_repair finds all filesystem corruptions found by xfs_check,
+               and more, which means that xfs_check is no longer run by default.
 
         - or add a case to the switch in common/config assigning
           these variables based on the hostname of your test
index d390234..35682d7 100644 (file)
@@ -256,6 +256,45 @@ case "$FSTYP" in
 xfs|udf|pvfs2|9p|ceph|nfs)
        MAX_ATTRS=1000
        ;;
+ext2|ext3|ext4)
+       # For 4k blocksizes, most of the attributes have an attr_name of
+       # "attribute_NN" which is 12, and "value_NN" which is 8.
+       # But for larger block sizes, we start having extended attributes of the
+       # form "attribute_NNN" or "attribute_NNNN", and "value_NNN" and
+       # "value_NNNN", which causes the round(len(..), 4) to jump up by 4
+       # bytes.  So round_up(len(attr_name, 4)) becomes 16 instead of 12, and
+       # round_up(len(value, 4)) becomes 12 instead of 8.
+       #
+       # For 64K blocksize the calculation becomes
+       #       max_attrs = (block_size - 32) / (16 + 12 + 16)
+       # or
+       #       max_attrs = (block_size - 32) / 44
+       #
+       # For 4K blocksize:-
+       #       max_attrs = (block_size - 32) / (16 + 8 + 12)
+       # or
+       #       max_attrs = (block_size - 32) / 36
+       #
+       # Note (for 4K bs) above are exact calculations for attrs of type
+       # attribute_NN with values of type value_NN.
+       # With above calculations, for 4k blocksize max_attrs becomes 112.
+       # This means we can have few attrs of type attribute_NNN with values of
+       # type value_NNN. To avoid/handle this we need to add extra 4 bytes of
+       # headroom.
+       #
+       # So for 4K, the calculations becomes:-
+       #       max_attrs = (block_size - 32) / (16 + 8 + 12 + 4)
+       # or
+       #       max_attrs = (block_size - 32) / 40
+       #
+       # Assume max ~1 block of attrs
+       BLOCK_SIZE=`_get_block_size $TEST_DIR`
+       if [ $BLOCK_SIZE -le 4096 ]; then
+               let MAX_ATTRS=$((($BLOCK_SIZE - 32) / (16 + 8 + 12 + 4)))
+       else
+               let MAX_ATTRS=$((($BLOCK_SIZE - 32) / (16 + 12 + 16 )))
+       fi
+       ;;
 *)
        # Assume max ~1 block of attrs
        BLOCK_SIZE=`_get_block_size $TEST_DIR`
index ebe6ce2..ac880bd 100644 (file)
@@ -96,6 +96,11 @@ _require_btrfs_fs_feature()
        modprobe btrfs > /dev/null 2>&1
        [ -e /sys/fs/btrfs/features/$feat ] || \
                _notrun "Feature $feat not supported by the available btrfs version"
+
+       if [ $feat = "raid56" ]; then
+               # Zoned btrfs only supports SINGLE profile
+               _require_non_zoned_device "${SCRATCH_DEV}"
+       fi
 }
 
 _require_btrfs_fs_sysfs()
@@ -222,6 +227,21 @@ _btrfs_get_profile_configs()
                else
                        local unsupported=()
                fi
+
+               if _scratch_btrfs_is_zoned; then
+                       # Zoned btrfs only supports SINGLE profile
+                       unsupported+=(
+                               "dup"
+                               "raid0"
+                               "raid1"
+                               "raid1c3"
+                               "raid1c4"
+                               "raid10"
+                               "raid5"
+                               "raid6"
+                       )
+               fi
+
                for unsupp in "${unsupported[@]}"; do
                        if [ "${profiles[0]}" == "$unsupp" -o "${profiles[1]}" == "$unsupp" ]; then
                             if [ -z "$BTRFS_PROFILE_CONFIGS" ]; then
@@ -419,3 +439,9 @@ _btrfs_rescan_devices()
 {
        $BTRFS_UTIL_PROG device scan &> /dev/null
 }
+
+_scratch_btrfs_is_zoned()
+{
+       [ `_zone_type ${SCRATCH_DEV}` != "none" ] && return 0
+       return 1
+}
index 005fd50..164381b 100644 (file)
@@ -252,6 +252,37 @@ if [[ "$UDEV_SETTLE_PROG" == "" || ! -d /proc/net ]]; then
 fi
 export UDEV_SETTLE_PROG
 
+# Set MODPROBE_PATIENT_RM_TIMEOUT_SECONDS to "forever" if you want the patient
+# modprobe removal to run forever trying to remove a module.
+MODPROBE_REMOVE_PATIENT=""
+modprobe --help | grep -q -1 "remove-patiently"
+if [[ $? -ne 0 ]]; then
+       if [[ -z "$MODPROBE_PATIENT_RM_TIMEOUT_SECONDS" ]]; then
+               # We will open code our own implementation of patient module
+               # remover in fstests. Use a 50 second default.
+               export MODPROBE_PATIENT_RM_TIMEOUT_SECONDS="50"
+       fi
+else
+       MODPROBE_RM_PATIENT_TIMEOUT_ARGS=""
+       if [[ ! -z "$MODPROBE_PATIENT_RM_TIMEOUT_SECONDS" ]]; then
+               if [[ "$MODPROBE_PATIENT_RM_TIMEOUT_SECONDS" != "forever" ]]; then
+                       MODPROBE_PATIENT_RM_TIMEOUT_MS="$((MODPROBE_PATIENT_RM_TIMEOUT_SECONDS * 1000))"
+                       MODPROBE_RM_PATIENT_TIMEOUT_ARGS="-t $MODPROBE_PATIENT_RM_TIMEOUT_MS"
+               fi
+       else
+               # We export MODPROBE_PATIENT_RM_TIMEOUT_SECONDS here for parity
+               # with environments without support for modprobe -p, but we
+               # only really need it exported right now for environments which
+               # don't have support for modprobe -p to implement our own
+               # patient module removal support within fstests.
+               export MODPROBE_PATIENT_RM_TIMEOUT_SECONDS="50"
+               MODPROBE_PATIENT_RM_TIMEOUT_MS="$((MODPROBE_PATIENT_RM_TIMEOUT_SECONDS * 1000))"
+               MODPROBE_RM_PATIENT_TIMEOUT_ARGS="-t $MODPROBE_PATIENT_RM_TIMEOUT_MS"
+       fi
+       MODPROBE_REMOVE_PATIENT="modprobe -p $MODPROBE_RM_TIMEOUT_ARGS"
+fi
+export MODPROBE_REMOVE_PATIENT
+
 export MKFS_XFS_PROG=$(type -P mkfs.xfs)
 export MKFS_EXT4_PROG=$(type -P mkfs.ext4)
 export MKFS_UDF_PROG=$(type -P mkudffs)
index 7d12e0a..01a4c8b 100644 (file)
@@ -10,11 +10,11 @@ _dmerror_setup()
 
        local blk_dev_size=`blockdev --getsz $dm_backing_dev`
 
-       DMERROR_DEV='/dev/mapper/error-test'
+       export DMERROR_DEV='/dev/mapper/error-test'
 
-       DMLINEAR_TABLE="0 $blk_dev_size linear $dm_backing_dev 0"
+       export DMLINEAR_TABLE="0 $blk_dev_size linear $dm_backing_dev 0"
 
-       DMERROR_TABLE="0 $blk_dev_size error $dm_backing_dev 0"
+       export DMERROR_TABLE="0 $blk_dev_size error $dm_backing_dev 0"
 }
 
 _dmerror_init()
@@ -42,6 +42,8 @@ _dmerror_cleanup()
        $DMSETUP_PROG resume error-test > /dev/null 2>&1
        $UMOUNT_PROG $SCRATCH_MNT > /dev/null 2>&1
        _dmsetup_remove error-test
+
+       unset DMERROR_DEV DMLINEAR_TABLE DMERROR_TABLE
 }
 
 _dmerror_load_error_table()
index b4e11ae..af4371a 100644 (file)
@@ -10,6 +10,7 @@ FLAKEY_ERROR_WRITES=2
 
 _init_flakey()
 {
+       # Scratch device
        local BLK_DEV_SIZE=`blockdev --getsz $SCRATCH_DEV`
        FLAKEY_DEV=/dev/mapper/flakey-test
        FLAKEY_TABLE="0 $BLK_DEV_SIZE flakey $SCRATCH_DEV 0 180 0"
@@ -17,11 +18,50 @@ _init_flakey()
        FLAKEY_TABLE_ERROR="0 $BLK_DEV_SIZE flakey $SCRATCH_DEV 0 0 180 1 error_writes"
        _dmsetup_create flakey-test --table "$FLAKEY_TABLE" || \
                _fatal "failed to create flakey device"
+
+       # Realtime device
+       if [ -n "$SCRATCH_RTDEV" ]; then
+               if [ -z "$NON_FLAKEY_RTDEV" ]; then
+                       # Set up the device switch
+                       local backing_dev="$SCRATCH_RTDEV"
+                       export NON_FLAKEY_RTDEV="$SCRATCH_RTDEV"
+                       SCRATCH_RTDEV=/dev/mapper/flakey-rttest
+               else
+                       # Already set up; recreate tables
+                       local backing_dev="$NON_FLAKEY_RTDEV"
+               fi
+               local BLK_DEV_SIZE=`blockdev --getsz $backing_dev`
+               FLAKEY_RTTABLE="0 $BLK_DEV_SIZE flakey $backing_dev 0 180 0"
+               FLAKEY_RTTABLE_DROP="0 $BLK_DEV_SIZE flakey $backing_dev 0 0 180 1 drop_writes"
+               FLAKEY_RTTABLE_ERROR="0 $BLK_DEV_SIZE flakey $backing_dev 0 0 180 1 error_writes"
+               _dmsetup_create flakey-rttest --table "$FLAKEY_RTTABLE" || \
+                       _fatal "failed to create flakey rt device"
+       fi
+
+       # External log device
+       if [ -n "$SCRATCH_LOGDEV" ]; then
+               if [ -z "$NON_FLAKEY_LOGDEV" ]; then
+                       # Set up the device switch
+                       local backing_dev="$SCRATCH_LOGDEV"
+                       export NON_FLAKEY_LOGDEV="$SCRATCH_LOGDEV"
+                       SCRATCH_LOGDEV=/dev/mapper/flakey-logtest
+               else
+                       # Already set up; recreate tables
+                       local backing_dev="$NON_FLAKEY_LOGDEV"
+               fi
+               local BLK_DEV_SIZE=`blockdev --getsz $backing_dev`
+               FLAKEY_LOGTABLE="0 $BLK_DEV_SIZE flakey $backing_dev 0 180 0"
+               FLAKEY_LOGTABLE_DROP="0 $BLK_DEV_SIZE flakey $backing_dev 0 0 180 1 drop_writes"
+               FLAKEY_LOGTABLE_ERROR="0 $BLK_DEV_SIZE flakey $backing_dev 0 0 180 1 error_writes"
+               _dmsetup_create flakey-logtest --table "$FLAKEY_LOGTABLE" || \
+                       _fatal "failed to create flakey log device"
+       fi
 }
 
 _mount_flakey()
 {
        _scratch_options mount
+
        mount -t $FSTYP $SCRATCH_OPTIONS $MOUNT_OPTIONS $FLAKEY_DEV $SCRATCH_MNT
 }
 
@@ -34,9 +74,21 @@ _cleanup_flakey()
 {
        # If dmsetup load fails then we need to make sure to do resume here
        # otherwise the umount will hang
+       test -n "$NON_FLAKEY_LOGDEV" && $DMSETUP_PROG resume flakey-logtest &> /dev/null
+       test -n "$NON_FLAKEY_RTDEV" && $DMSETUP_PROG resume flakey-rttest &> /dev/null
        $DMSETUP_PROG resume flakey-test > /dev/null 2>&1
+
        $UMOUNT_PROG $SCRATCH_MNT > /dev/null 2>&1
+
        _dmsetup_remove flakey-test
+       test -n "$NON_FLAKEY_LOGDEV" && _dmsetup_remove flakey-logtest
+       test -n "$NON_FLAKEY_RTDEV" && _dmsetup_remove flakey-rttest
+
+       SCRATCH_LOGDEV="$NON_FLAKEY_LOGDEV"
+       unset NON_FLAKEY_LOGDEV
+
+       SCRATCH_RTDEV="$NON_FLAKEY_RTDEV"
+       unset NON_FLAKEY_RTDEV
 }
 
 # _load_flakey_table <table> [lockfs]
@@ -45,22 +97,70 @@ _cleanup_flakey()
 # table, so it simulates power failure.
 _load_flakey_table()
 {
-       table="$FLAKEY_TABLE"
-       [ $1 -eq $FLAKEY_DROP_WRITES ] && table="$FLAKEY_TABLE_DROP"
-       [ $1 -eq $FLAKEY_ERROR_WRITES ] && table="$FLAKEY_TABLE_ERROR"
+       case "$1" in
+       "$FLAKEY_DROP_WRITES")
+               table="$FLAKEY_TABLE_DROP"
+               logtable="$FLAKEY_LOGTABLE_DROP"
+               rttable="$FLAKEY_RTTABLE_DROP"
+               ;;
+       "$FLAKEY_ERROR_WRITES")
+               table="$FLAKEY_TABLE_ERROR"
+               logtable="$FLAKEY_LOGTABLE_ERROR"
+               rttable="$FLAKEY_RTTABLE_ERROR"
+               ;;
+       *)
+               table="$FLAKEY_TABLE"
+               logtable="$FLAKEY_LOGTABLE"
+               rttable="$FLAKEY_RTTABLE"
+               ;;
+       esac
 
        suspend_opt="--nolockfs"
        [ $# -gt 1 ] && [ $2 -eq 1 ] && suspend_opt=""
 
+       # Suspend the scratch device before the log and realtime devices so
+       # that the kernel can freeze and flush the filesystem if the caller
+       # wanted a freeze.
        $DMSETUP_PROG suspend $suspend_opt flakey-test
        [ $? -ne 0 ] && _fatal "failed to suspend flakey-test"
 
+       if [ -n "$NON_FLAKEY_RTDEV" ]; then
+               $DMSETUP_PROG suspend $suspend_opt flakey-rttest
+               [ $? -ne 0 ] && _fatal "failed to suspend flakey-rttest"
+       fi
+
+       if [ -n "$NON_FLAKEY_LOGDEV" ]; then
+               $DMSETUP_PROG suspend $suspend_opt flakey-logtest
+               [ $? -ne 0 ] && _fatal "failed to suspend flakey-logtest"
+       fi
+
        # There may be multiple dm targets in the table, and these dm targets
        # will be joined by the newline ("\n"). Option --table can not cope with
        # the multiple-targets case, so get them by reading from standard input.
        echo -e "$table" | $DMSETUP_PROG load flakey-test
        [ $? -ne 0 ] && _fatal "failed to load table into flakey-test"
 
+       if [ -n "$NON_FLAKEY_RTDEV" ]; then
+               echo -e "$rttable" | $DMSETUP_PROG load flakey-rttest
+               [ $? -ne 0 ] && _fatal "failed to load table into flakey-rttest"
+       fi
+
+       if [ -n "$NON_FLAKEY_LOGDEV" ]; then
+               echo -e "$logtable" | $DMSETUP_PROG load flakey-logtest
+               [ $? -ne 0 ] && _fatal "failed to load table into flakey-logtest"
+       fi
+
+       # Resume devices in the opposite order that we suspended them.
+       if [ -n "$NON_FLAKEY_LOGDEV" ]; then
+               $DMSETUP_PROG resume flakey-logtest
+               [ $? -ne 0 ] && _fatal  "failed to resume flakey-logtest"
+       fi
+
+       if [ -n "$NON_FLAKEY_RTDEV" ]; then
+               $DMSETUP_PROG resume flakey-rttest
+               [ $? -ne 0 ] && _fatal  "failed to resume flakey-rttest"
+       fi
+
        $DMSETUP_PROG resume flakey-test
        [ $? -ne 0 ] && _fatal  "failed to resume flakey-test"
 }
index 39e4e79..ead0f88 100644 (file)
@@ -16,7 +16,7 @@ _reload_module()
 {
        local module="$1"
 
-       modprobe -r "${module}" || _fail "${module} unload failed"
+       _patient_rmmod "${module}" || _fail "${module} unload failed"
        modprobe "${module}" || _fail "${module} load failed"
 }
 
@@ -44,7 +44,7 @@ _require_loadable_module()
        local module="$1"
 
        modinfo "${module}" > /dev/null 2>&1 || _notrun "${module}: must be a module."
-       modprobe -r "${module}" || _notrun "Require ${module} to be unloadable"
+       _patient_rmmod "${module}" || _notrun "Require ${module} to be unloadable"
        modprobe "${module}" || _notrun "${module} load failed"
 }
 
@@ -64,7 +64,7 @@ _require_loadable_fs_module()
        test -n "${had_scratchfs}" && _scratch_unmount
        local unload_ok=""
        local load_ok=""
-       modprobe -r "${module}" || unload_ok=0
+       _patient_rmmod "${module}" || unload_ok=0
        modprobe "${module}" || load_ok=0
        test -n "${had_scratchfs}" && _scratch_mount 2> /dev/null
        test -n "${had_testfs}" && _test_mount 2> /dev/null
@@ -81,3 +81,111 @@ _get_fs_module_param()
 {
        cat /sys/module/${FSTYP}/parameters/${1} 2>/dev/null
 }
+
+# checks the refcount and returns 0 if we can safely remove the module. rmmod
+# does this check for us, but we can use this to also iterate checking for this
+# refcount before we even try to remove the module. This is useful when using
+# debug test modules which take a while to quiesce.
+_patient_rmmod_check_refcnt()
+{
+       local module=$1
+       local refcnt=0
+
+       if [[ -f /sys/module/$module/refcnt ]]; then
+               refcnt=$(cat /sys/module/$module/refcnt 2>/dev/null)
+               if [[ $? -ne 0 || $refcnt -eq 0 ]]; then
+                       return 0
+               fi
+               return 1
+       fi
+       return 0
+}
+
+# Patiently tries to wait to remove a module by ensuring first
+# the refcnt is 0 and then trying to persistently remove the module within
+# the time allowed. The timeout is configurable per test, just set
+# MODPROBE_PATIENT_RM_TIMEOUT_SECONDS prior to including this file.
+# If you want this to try forever just set MODPROBE_PATIENT_RM_TIMEOUT_SECONDS
+# to the special value of "forever". This applies to both cases where kmod
+# supports the patient module remover (modrobe -p) and where it does not.
+#
+# If your version of kmod supports modprobe -p, we instead use that
+# instead. Otherwise we have to implement a patient module remover
+# ourselves.
+_patient_rmmod()
+{
+       local module=$1
+       local max_tries_max=$MODPROBE_PATIENT_RM_TIMEOUT_SECONDS
+       local max_tries=0
+       local mod_ret=0
+       local refcnt_is_zero=0
+
+       if [[ ! -z $MODPROBE_REMOVE_PATIENT ]]; then
+               $MODPROBE_REMOVE_PATIENT $module
+               mod_ret=$?
+               if [[ $mod_ret -ne 0 ]]; then
+                       echo "kmod patient module removal for $module timed out waiting for refcnt to become 0 using timeout of $max_tries_max returned $mod_ret"
+               fi
+               return $mod_ret
+       fi
+
+       max_tries=$max_tries_max
+
+       # We must use a string check as otherwise if max_tries is set to
+       # "forever" and we don't use a string check we can end up skipping
+       # entering this loop.
+       while [[ "$max_tries" != "0" ]]; do
+               _patient_rmmod_check_refcnt $module
+               if [[ $? -eq 0 ]]; then
+                       refcnt_is_zero=1
+                       break
+               fi
+               sleep 1
+               if [[ "$max_tries" == "forever" ]]; then
+                       continue
+               fi
+               let max_tries=$max_tries-1
+       done
+
+       if [[ $refcnt_is_zero -ne 1 ]]; then
+               echo "custom patient module removal for $module timed out waiting for refcnt to become 0 using timeout of $max_tries_max"
+               return -1
+       fi
+
+       # If we ran out of time but our refcnt check confirms we had
+       # a refcnt of 0, just try to remove the module once.
+       if [[ "$max_tries" == "0" ]]; then
+               modprobe -r $module
+               return $?
+       fi
+
+       # If we have extra time left. Use the time left to now try to
+       # persistently remove the module. We do this because although through
+       # the above we found refcnt to be 0, removal can still fail since
+       # userspace can always race to bump the refcnt. An example is any
+       # blkdev_open() calls against a block device. These issues have been
+       # tracked and documented in the following bug reports, which justifies
+       # our need to do this in userspace:
+       # https://bugzilla.kernel.org/show_bug.cgi?id=212337
+       # https://bugzilla.kernel.org/show_bug.cgi?id=214015
+       while [[ $max_tries != 0 ]]; do
+               if [[ -d /sys/module/$module ]]; then
+                       modprobe -r $module 2> /dev/null
+                       mod_ret=$?
+                       if [[ $mod_ret == 0 ]]; then
+                               break;
+                       fi
+                       sleep 1
+                       if [[ "$max_tries" == "forever" ]]; then
+                               continue
+                       fi
+                       let max_tries=$max_tries-1
+               fi
+       done
+
+       if [[ $mod_ret -ne 0 ]]; then
+               echo "custom patient module removal for $module timed out trying to remove $module using timeout of $max_tries_max last try returned $mod_ret"
+       fi
+
+       return $mod_ret
+}
index 66b0ed0..64d7938 100644 (file)
@@ -23,6 +23,26 @@ _register_cleanup()
        trap "${cleanup}exit \$status" EXIT HUP INT QUIT TERM $*
 }
 
+# Make sure each group is in the documentation file.
+_check_groups() {
+       test -n "$GROUPNAME_DOC_FILE" || return 0
+
+       local testname="$(echo "$0" | sed -e 's/^.*tests\///g')"
+       declare -a missing=()
+
+       for group in "$@"; do
+               if ! grep -q "^${group}[[:space:]]" "$GROUPNAME_DOC_FILE"; then
+                       missing+=("\"${group}\"")
+               fi
+       done
+       test "${#missing}" -eq 0 && return 0
+
+       local suffix=
+       test "${#missing}" -gt 1 && suffix="s"
+       echo "$testname: group$suffix ${missing[@]} not mentioned in documentation." 1>&2
+       return 1
+}
+
 # Prepare to run a fstest by initializing the required global variables to
 # their defaults, sourcing common functions, registering a cleanup function,
 # and removing the $seqres.full file.
@@ -42,6 +62,7 @@ _begin_fstest()
        # If we're only running the test to generate a group.list file,
        # spit out the group data and exit.
        if [ -n "$GENERATE_GROUPS" ]; then
+               _check_groups "$@" || exit 1
                echo "$seq $@"
                exit 0
        fi
index d4b1f21..a174b69 100644 (file)
--- a/common/rc
+++ b/common/rc
@@ -4,7 +4,7 @@
 
 . common/config
 
-BC=$(which bc 2> /dev/null) || BC=
+BC="$(type -P bc)" || BC=
 
 _require_math()
 {
@@ -339,7 +339,7 @@ _try_scratch_mount()
 # mount scratch device with given options and _fail if mount fails
 _scratch_mount()
 {
-       _try_scratch_mount $* || _fail "mount failed"
+       _try_scratch_mount $* || _fail "mount $(_scratch_mount_options $*) failed"
 }
 
 _scratch_mount_idmapped()
@@ -631,6 +631,30 @@ _ext4_metadump()
                $DUMP_COMPRESSOR -f "$dumpfile" &>> "$seqres.full"
 }
 
+# Capture the metadata of a filesystem in a dump file for offline analysis.
+# This is not supported by all filesystem types, so this function should only
+# be used after a test has already failed.
+_metadump_dev() {
+       local device="$1"
+       local dumpfile="$2"
+       local compressopt="$3"
+
+       test "$DUMP_CORRUPT_FS" = 1 || return 0
+
+       case "$FSTYP" in
+       ext*)
+               _ext4_metadump $device $dumpfile $compressopt
+               ;;
+       xfs)
+               _xfs_metadump $dumpfile $device none $compressopt
+               ;;
+       *)
+               echo "Don't know how to metadump $FSTYP"
+               return 1
+               ;;
+       esac
+}
+
 _test_mkfs()
 {
     case $FSTYP in
@@ -956,6 +980,16 @@ _available_memory_bytes()
        fi
 }
 
+_check_minimal_fs_size()
+{
+       local fssize=$1
+
+       if [ -n "$MIN_FSSIZE" ]; then
+               [ $MIN_FSSIZE -gt "$fssize" ] &&
+                       _notrun "specified filesystem size is too small"
+       fi
+}
+
 # Create fs of certain size on scratch device
 # _scratch_mkfs_sized <size in bytes> [optional blocksize]
 _scratch_mkfs_sized()
@@ -968,7 +1002,10 @@ _scratch_mkfs_sized()
        xfs)
                def_blksz=`echo $MKFS_OPTIONS | sed -rn 's/.*-b ?size= ?+([0-9]+).*/\1/p'`
                ;;
-       ext2|ext3|ext4|ext4dev|udf|btrfs|reiser4|ocfs2|reiserfs)
+       btrfs)
+               def_blksz=`echo $MKFS_OPTIONS | sed -rn 's/.*-s ?+([0-9]+).*/\1/p'`
+               ;;
+       ext2|ext3|ext4|ext4dev|udf|reiser4|ocfs2|reiserfs)
                def_blksz=`echo $MKFS_OPTIONS | sed -rn 's/.*-b ?+([0-9]+).*/\1/p'`
                ;;
        jfs)
@@ -989,6 +1026,8 @@ _scratch_mkfs_sized()
 
        local blocks=`expr $fssize / $blocksize`
 
+       _check_minimal_fs_size $fssize
+
        if [ -b "$SCRATCH_DEV" ]; then
                local devsize=`blockdev --getsize64 $SCRATCH_DEV`
                [ "$fssize" -gt "$devsize" ] && _notrun "Scratch device too small"
@@ -1822,6 +1861,9 @@ _require_loop()
     else
        _notrun "This test requires loopback device support"
     fi
+
+    # loop device does not handle zone information
+    _require_non_zoned_device ${TEST_DEV}
 }
 
 # this test requires kernel support for a secondary filesystem
@@ -1922,6 +1964,20 @@ _require_sane_bdev_flush()
        fi
 }
 
+# Decide if the scratch filesystem is likely to be mounted in fsdax mode.
+# If there's a dax clause in the mount options we assume the test runner
+# wants us to test DAX; or if the scratch device itself advertises dax mode
+# in sysfs.
+__detect_scratch_fsdax()
+{
+       _normalize_mount_options | egrep -q "dax(=always| |$)" && return 0
+
+       local sysfs="/sys/block/$(_short_dev $SCRATCH_DEV)"
+       test -e "${sysfs}/dax" && return 0
+       test "$(cat "${sysfs}/queue/dax" 2>/dev/null)" = "1" && return 0
+       return 1
+}
+
 # this test requires a specific device mapper target
 _require_dm_target()
 {
@@ -1933,9 +1989,7 @@ _require_dm_target()
        _require_sane_bdev_flush $SCRATCH_DEV
        _require_command "$DMSETUP_PROG" dmsetup
 
-       _normalize_mount_options | egrep -q "dax(=always| |$)" || \
-                       test -e "/sys/block/$(_short_dev $SCRATCH_DEV)/dax"
-       if [ $? -eq 0 ]; then
+       if __detect_scratch_fsdax; then
                case $target in
                stripe|linear|log-writes)
                        ;;
@@ -1951,6 +2005,16 @@ _require_dm_target()
        if [ $? -ne 0 ]; then
                _notrun "This test requires dm $target support"
        fi
+
+       # dm-error cannot handle the zone information
+       #
+       # dm-snapshot and dm-thin-pool cannot ensure sequential writes on
+       # the backing device
+       case $target in
+       error|snapshot|thin-pool)
+               _require_non_zoned_device ${SCRATCH_DEV}
+               ;;
+       esac
 }
 
 _zone_type()
@@ -2522,14 +2586,15 @@ _format_swapfile() {
        $CHATTR_PROG +C "$fname" > /dev/null 2>&1
        _pwrite_byte 0x61 0 "$sz" "$fname" >> $seqres.full
        # Ignore permission complaints on filesystems that don't support perms
-       $MKSWAP_PROG "$fname" 2> >(grep -v 'insecure permission' >&2) >> $seqres.full
+       $MKSWAP_PROG "$fname" 2>&1 >> $seqres.full | \
+               grep -v "insecure permission"
 }
 
 _swapon_file() {
        local fname="$1"
 
        # Ignore permission complaints on filesystems that don't support perms
-       swapon "$fname" 2> >(grep -v "insecure permissions" >&2)
+       $(swapon "$fname" 2> >(grep -v "insecure permissions" >&2))
 }
 
 # Check that the filesystem supports swapfiles
@@ -2655,10 +2720,10 @@ _fstyp_has_non_default_seek_data_hole()
                return 0
                ;;
        nfs*)
-               # NFSv2 and NFSv3 only support default behavior of SEEK_HOLE,
-               # while NFSv4 supports non-default behavior
-               local nfsvers=`_df_device $TEST_DEV | $AWK_PROG '{ print $2 }'`
-               [ "$nfsvers" = "nfs4" ]
+               # NFSv2, NFSv3, and NFSv4.0/4.1 only support default behavior of SEEK_HOLE,
+               # while NFSv4.2 supports non-default behavior
+               local nfsvers=`_mount() | grep $TEST_DEV | sed -n 's/^.*vers=\([0-9.]*\).*$/\1/p'`
+               [ "$nfsvers" = "4.2" ]
                return $?
                ;;
        overlay)
@@ -3784,6 +3849,14 @@ _create_loop_device()
 {
        local file=$1 dev
        dev=`losetup -f --show $file` || _fail "Cannot assign $file to a loop device"
+
+       # Try to enable asynchronous directio mode on the loopback device so
+       # that writeback started by a filesystem mounted on the loop device
+       # won't be throttled by buffered writes to the lower filesystem.  This
+       # is a performance optimization for tests that want to write a lot of
+       # data, so it isn't required to work.
+       test -b "$dev" && losetup --direct-io=on $dev 2> /dev/null
+
        echo $dev
 }
 
index 4b64eea..4d25d7c 100644 (file)
@@ -61,6 +61,7 @@ _rename_tests_source_dest()
        local source=$1
        local dest=$2
        local options=$3
+       local flags=$4
 
        for stype in none regu symb dire tree; do
                for dtype in none regu symb dire tree; do
@@ -90,11 +91,11 @@ _rename_tests()
        local flags=$2
 
        #same directory renames
-       _rename_tests_source_dest $testdir/src $testdir/dst     "samedir "
+       _rename_tests_source_dest $testdir/src $testdir/dst     "samedir " $flags
 
        #cross directory renames
        mkdir $testdir/x $testdir/y
-       _rename_tests_source_dest $testdir/x/src $testdir/y/dst "crossdir"
+       _rename_tests_source_dest $testdir/x/src $testdir/y/dst "crossdir" $flags
        rmdir $testdir/x $testdir/y
 }
 
index e798846..1e0ca25 100644 (file)
@@ -4,11 +4,32 @@
 #
 # Functions useful for tests on unique block devices
 
+. common/module
+
 _require_scsi_debug()
 {
-       # make sure we have the module and it's not already used
+       local mod_present=0
+
+       # make sure we have the module
        modinfo scsi_debug 2>&1 > /dev/null || _notrun "scsi_debug module not found"
-       lsmod | grep -wq scsi_debug && (rmmod scsi_debug || _notrun "scsi_debug module in use")
+
+       lsmod | grep -wq scsi_debug
+       if [[ $? -eq 0 ]]; then
+               mod_present=1
+       fi
+
+       if [[ $mod_present -eq 1 ]]; then
+               # We try to remove the module only once if MODPROBE_PATIENT_RM_TIMEOUT_SECONDS
+               # is set to forever because fstests does not leave modules
+               # lingering around. If you do have a module lingering around
+               # and its being used, it wasn't us who started it, so you
+               # likely would not want to wait forever for it really.
+               if [[ "$MODPROBE_PATIENT_RM_TIMEOUT_SECONDS" == "forever" ]]; then
+                       rmmod scsi_debug || _notrun "scsi_debug module in use and MODPROBE_PATIENT_RM_TIMEOUT_SECONDS set to forever, removing once failed"
+               else
+                       _patient_rmmod scsi_debug || _notrun "scsi_debug module in use"
+               fi
+       fi
        # make sure it has the features we need
        # logical/physical sectors plus unmap support all went in together
        modinfo scsi_debug | grep -wq sector_size || _notrun "scsi_debug too old"
@@ -44,14 +65,6 @@ _get_scsi_debug_dev()
 _put_scsi_debug_dev()
 {
        lsmod | grep -wq scsi_debug || return
-
-       n=2
-       # use redirection not -q option of modprobe here, because -q of old
-       # modprobe is only quiet when the module is not found, not when the
-       # module is in use.
-       while [ $n -ge 0 ] && ! modprobe -nr scsi_debug >/dev/null 2>&1; do
-               $UDEV_SETTLE_PROG
-               n=$((n-1))
-       done
-       rmmod scsi_debug || _fail "Could not remove scsi_debug module"
+       $UDEV_SETTLE_PROG
+       _patient_rmmod scsi_debug || _fail "Could not remove scsi_debug module"
 }
index c5e3942..bfb1bf1 100644 (file)
@@ -595,10 +595,14 @@ _check_xfs_filesystem()
                ok=0
        fi
 
-       # xfs_check runs out of memory on large files, so even providing the test
-       # option (-t) to avoid indexing the free space trees doesn't make it pass on
-       # large filesystems. Avoid it.
-       if [ "$LARGE_SCRATCH_DEV" != yes ]; then
+       # xfs_check runs out of memory on large files, so even providing the
+       # test option (-t) to avoid indexing the free space trees doesn't make
+       # it pass on large filesystems. Avoid it.
+       #
+       # As of August 2021, xfs_repair completely supersedes xfs_check's
+       # ability to find corruptions, so we no longer run xfs_check unless
+       # forced to run it.
+       if [ "$LARGE_SCRATCH_DEV" != yes ] && [ "$FORCE_XFS_CHECK_PROG" = "yes" ]; then
                _xfs_check $extra_log_options $device 2>&1 > $tmp.fs_check
        fi
        if [ -s $tmp.fs_check ]; then
index d6fc294..6e5ab39 100644 (file)
@@ -67,9 +67,42 @@ AC_PACKAGE_WANT_LINUX_FS_H
 AC_PACKAGE_WANT_LIBBTRFSUTIL
 
 AC_HAVE_COPY_FILE_RANGE
-
 AC_CHECK_FUNCS([renameat2])
+AC_CHECK_FUNCS([reallocarray])
 AC_CHECK_TYPES([struct mount_attr], [], [], [[#include <linux/mount.h>]])
+AC_CHECK_TYPES([struct btrfs_qgroup_limit], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_qgroup_inherit], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_ioctl_vol_args], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_ioctl_vol_args_v2], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_ioctl_ino_lookup_args], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_ioctl_ino_lookup_user_args], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_TYPES([struct btrfs_ioctl_get_subvol_rootref_args], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
+AC_CHECK_HEADERS([linux/btrfs.h linux/btrfs_tree.h])
+AC_CHECK_MEMBERS([struct btrfs_ioctl_vol_args_v2.subvolid], [], [], [[
+#include <stddef.h>
+#include <linux/btrfs.h>
+]])
 
 AC_CONFIG_HEADER(include/config.h)
 AC_CONFIG_FILES([include/builddefs])
diff --git a/doc/group-names.txt b/doc/group-names.txt
new file mode 100644 (file)
index 0000000..e8e3477
--- /dev/null
@@ -0,0 +1,135 @@
+======================= =======================================================
+Group Name:            Description:
+======================= =======================================================
+all                    All known tests, automatically generated by ./check at
+                       runtime
+auto                   Tests that should be run automatically.  These should
+                       not require more than ~5 minutes to run.
+quick                  Tests that should run in under 30 seconds.
+deprecated             Old tests that should not be run.
+
+acl                    Access Control Lists
+admin                  xfs_admin functionality
+aio                    general libaio async io tests
+atime                  file access time
+attr                   extended attributes
+attr2                  xfs v2 extended aributes
+balance                        btrfs tree rebalance
+bigtime                        timestamps beyond the year 2038
+blockdev               block device functionality
+broken                 broken tests
+cap                    Linux capabilities
+casefold               directory name casefolding
+ci                     ASCII case-insensitive directory name lookups
+clone                  FICLONE/FICLONERANGE ioctls
+clone_stress           stress testing FICLONE/FICLONERANGE
+collapse               fallocate FALLOC_FL_COLLAPSE_RANGE
+compress               file compression
+convert                        btrfs ext[34] conversion tool
+copy                   xfs_copy functionality
+copy_range             copy_file_range syscall
+copyup                 overlayfs copyup
+dangerous              dangerous test that can crash the system
+dangerous_bothrepair   fuzzers to evaluate xfs_scrub + xfs_repair repair
+dangerous_fuzzers      fuzzers that can crash your computer
+dangerous_norepair     fuzzers to evaluate kernel metadata verifiers
+dangerous_online_repair        fuzzers to evaluate xfs_scrub online repair
+dangerous_repair       fuzzers to evaluate xfs_repair offline repair
+dangerous_scrub                fuzzers to evaluate xfs_scrub checking
+data                   data loss checkers
+dax                    direct access mode for persistent memory files
+db                     xfs_db functional tests
+dedupe                 FIEDEDUPERANGE ioctl
+defrag                 filesystem defragmenters
+dir                    directory test functions
+dump                   dump and restore utilities
+eio                    IO error reporting
+encrypt                        encrypted file contents
+enospc                 ENOSPC error reporting
+exportfs               file handles
+filestreams            XFS filestreams allocator
+freeze                 filesystem freeze tests
+fsck                   general fsck tests
+fsmap                  FS_IOC_GETFSMAP ioctl
+fsr                    XFS free space reorganizer
+fuzzers                        filesystem fuzz tests
+growfs                 increasing the size of a filesystem
+hardlink               hardlinks
+health                 XFS health reporting
+idmapped               idmapped mount functionality
+inobtcount             XFS inode btree count tests
+insert                 fallocate FALLOC_FL_INSERT_RANGE
+ioctl                  general ioctl tests
+io_uring               general io_uring async io tests
+label                  filesystem labelling
+limit                  resource limits
+locks                  file locking
+log                    metadata logging
+logprint               xfs_logprint functional tests
+long_rw                        long-soak read write IO path exercisers
+metacopy               overlayfs metadata-only copy-up
+metadata               filesystem metadata update exercisers
+metadump               xfs_metadump/xfs_mdrestore functionality
+mkfs                   filesystem formatting tools
+mount                  mount option and functionality checks
+nested                 nested overlayfs instances
+nfs4_acl               NFSv4 access control lists
+nonsamefs              overlayfs layers on different filesystems
+online_repair          online repair functionality tests
+other                  dumping ground, do not add more tests to this group
+pattern                        specific IO pattern tests
+perms                  access control and permission checking
+pipe                   pipe functionality
+pnfs                   PNFS
+posix                  POSIX behavior conformance
+prealloc               fallocate for preallocating unwritten space
+preallocrw             fallocate, then read and write
+punch                  fallocate FALLOC_FL_PUNCH_HOLE
+qgroup                 btrfs qgroup feature
+quota                  filesystem usage quotas
+raid                   btrfs RAID
+realtime               XFS realtime volumes
+recoveryloop           crash recovery loops
+redirect               overlayfs redirect_dir feature
+remote                 dump and restore with a remote tape
+remount                        remounting filesystems
+rename                 rename system call
+repair                 xfs_repair functional tests
+replace                        btrfs device replace
+replay                 dm-logwrites replays
+resize                 resize2fs functionality tests
+richacl                        rich ACL feature
+rmap                   XFS reverse mapping exercisers
+rotate                 overlayfs upper layer rotate tests from the unionmount
+                       test suite
+rw                     read/write IO tests
+samefs                 overlayfs when all layers are on the same fs
+scrub                  filesystem metadata scrubbers
+seed                   btrfs seeded filesystems
+seek                   llseek functionality
+send                   btrfs send/receive
+shrinkfs               decreasing the size of a filesystem
+shutdown               FS_IOC_SHUTDOWN ioctl
+snapshot               btrfs snapshots
+soak                   long running soak tests of any kind
+spaceman               xfs_spaceman functional tests
+splice                 splice system call
+stress                 fsstress filesystem exerciser
+subvol                 btrfs subvolumes
+swap                   swap files
+symlink                        symbolic links
+tape                   dump and restore with a tape
+thin                   thin provisioning
+trim                   FITRIM ioctl
+udf                    UDF functionality tests
+union                  tests from the unionmount test suite
+unlink                 O_TMPFILE unlinked files
+unshare                        fallocate FALLOC_FL_UNSHARE_RANGE
+v2log                  XFS v2 log format tests
+verity                 fsverity
+volume                 btrfs volume management
+whiteout               overlayfs whiteout functionality
+xino                   overlayfs xino feature
+zero                   fallocate FALLOC_FL_ZERO_RANGE
+zone                   zoned (SMR) device support
+======================= =======================================================
index b4ddf5e..90ae432 100644 (file)
@@ -4504,9 +4504,9 @@ do_renameat2(int opno, long r, int mode)
        flist_t         *flp;
        int             id;
        pathname_t      newf;
-       int             oldid;
+       int             oldid = 0;
        int             parid;
-       int             oldparid;
+       int             oldparid = 0;
        int             which;
        int             v;
        int             v1;
diff --git a/new b/new
index 2097a88..9651e0e 100755 (executable)
--- a/new
+++ b/new
@@ -83,51 +83,57 @@ then
     exit 1
 fi
 
-if [ $# -eq 0 ]
-then
+# Extract group names from the documentation.
+group_names() {
+       awk '/^[[:lower:][:digit:]_]/ {
+               if ($1 != "" && $1 != "Group" && $2 != "Name:" && $1 != "all")
+                       printf("%s\n", $1);
+       }' doc/group-names.txt
+}
 
-    while true
-    do
-       echo -n "Add to group(s) [other] (separate by space, ? for list): "
-       read ans
-       [ -z "$ans" ] && ans=other
-       if [ "X$ans" = "X?" ]
-       then
-           for d in $SRC_GROUPS; do
-               (cd "tests/$d/" ; ../../tools/mkgroupfile "$tmpfile")
-               l=$(sed -n < "$tmpfile" \
-                   -e 's/#.*//' \
-                   -e 's/$/ /' \
-                   -e 's;\(^[0-9][0-9][0-9]\)\(.*$\);\2;p')
-               grpl="$grpl $l"
-           done
-           lst=`for word in $grpl; do echo $word; done | sort| uniq `
-           echo $lst
-       else
-           # only allow lower cases, spaces, digits and underscore in group
-           inval=`echo $ans | tr -d '[:lower:][:space:][:digit:]_'`
-           if [ "$inval" != "" ]; then
-               echo "Invalid characters in group(s): $inval"
-               echo "Only lower cases, digits and underscore are allowed in groups, separated by space"
-               continue
-           else
-               # remove redundant spaces/tabs
-               ans=`echo "$ans" | sed 's/\s\+/ /g'`
-               break
-           fi
-       fi
-    done
+# Make sure that the new test's groups fit the correct format and are listed
+# in the group documentation file.
+check_groups() {
+       for g in "$@"; do
+               local inval="$(echo "${g}" | tr -d '[:lower:][:space:][:digit:]_')"
+               if [ -n "${inval}" ]; then
+                       echo "Invalid characters in group(s): ${inval}"
+                       echo "Only lower cases, digits and underscore are allowed in groups, separated by space"
+                       return 1
+               elif [ "${g}" = "other" ]; then
+                       echo "Do not add more tests to group \"other\""
+                       return 1
+               elif ! group_names | grep -q -w "${g}"; then
+                       echo "Warning: group \"${g}\" not defined in documentation"
+                       return 1
+               fi
+       done
+
+       return 0
+}
+
+if [ $# -eq 0 ]; then
+       # interactive mode
+       prompt="Add to group(s) [auto] (separate by space, ? for list): "
+       while true; do
+               read -p "${prompt}" -a new_groups || exit 1
+               case "${#new_groups[@]}" in
+               0)
+                       new_groups=("auto")
+                       ;;
+               1)
+                       if [ "${new_groups[0]}" = "?" ]; then
+                               echo $(group_names | grep -v -w 'other')
+                               continue
+                       fi
+                       ;;
+               esac
+               check_groups "${new_groups[@]}" && break
+       done
 else
-    # expert mode, groups are on the command line
-    #
-    (cd "$tdir" ; ../../tools/mkgroupfile "$tmpfile")
-    for g in $*
-    do
-       if ! grep -q "[[:space:]]$g" "$tmpfile"; then
-           echo "Warning: group \"$g\" not defined in $tdir tests"
-       fi
-    done
-    ans="$*"
+       # expert mode, groups are on the command line
+       new_groups=("$@")
+       check_groups "${new_groups[@]}" || exit 1
 fi
 
 echo -n "Creating skeletal script for you to edit ..."
@@ -144,7 +150,7 @@ cat <<End-of-File >$tdir/$id
 # what am I here for?
 #
 . ./common/preamble
-_begin_fstest $ans
+_begin_fstest ${new_groups[@]}
 
 # Override the default cleanup function.
 # _cleanup()
index 884bd86..25ab061 100644 (file)
@@ -18,7 +18,7 @@ TARGETS = dirstress fill fill2 getpagesize holes lstat64 \
        t_ext4_dax_journal_corruption t_ext4_dax_inline_corruption \
        t_ofd_locks t_mmap_collision mmap-write-concurrent \
        t_get_file_time t_create_short_dirs t_create_long_dirs t_enospc \
-       t_mmap_writev_overlap checkpoint_journal
+       t_mmap_writev_overlap checkpoint_journal mmap-rw-fault
 
 LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \
        preallo_rw_pattern_writer ftrunc trunc fs_perms testx looptest \
index 615cb6e..ec28d64 100644 (file)
@@ -16,6 +16,7 @@ int verbose;
 int pid;
 
 int checkflag=0;
+int create_only=0;
 
 #define MKNOD_DEV 0
 
@@ -51,7 +52,7 @@ main(
        nprocs_per_dir = 1;
        keep = 0;
         verbose = 0;
-       while ((c = getopt(argc, argv, "d:p:f:s:n:kvc")) != EOF) {
+       while ((c = getopt(argc, argv, "d:p:f:s:n:kvcC")) != EOF) {
                switch(c) {
                        case 'p':
                                nprocs = atoi(optarg);
@@ -80,6 +81,9 @@ main(
                        case 'c':
                                 checkflag++;
                                 break;
+                       case 'C':
+                               create_only++;
+                               break;
                }
        }
        if (errflg || (dirname == NULL)) {
@@ -170,6 +174,7 @@ dirstress(
        if (create_entries(nfiles)) {
             printf("!! [%d] create failed\n", pid);
         } else {
+           if (create_only) return 0;
             if (verbose) fprintf(stderr,"** [%d] scramble entries\n", pid);
            if (scramble_entries(nfiles)) {
                 printf("!! [%d] scramble failed\n", pid);
index c34d1a9..cde2b42 100755 (executable)
@@ -5,15 +5,12 @@
 . ./common/config
 . ./common/dmerror
 
-_dmerror_setup
+if [ -z "$DMERROR_DEV" ]; then
+       echo "Caller should have run _dmerror_init."
+       exit 1
+fi
 
 case $1 in
-cleanup)
-       _dmerror_cleanup
-       ;;
-init)
-       _dmerror_init
-       ;;
 load_error_table)
        _dmerror_load_error_table
        ;;
@@ -21,7 +18,7 @@ load_working_table)
        _dmerror_load_working_table
        ;;
 *)
-       echo "Usage: $0 {init|cleanup|load_error_table|load_working_table}"
+       echo "Usage: $0 {load_error_table|load_working_table}"
        exit 1
        ;;
 esac
index 1c3383a..d5e5436 100755 (executable)
@@ -111,7 +111,7 @@ sub normal {
 #
 
 chomp($cwd = `pwd`);
-chomp($_ = `which fill2 2>&1 | head -1`);
+chomp($_ = `type -P fill2 | head -1`);
 if (-x $_) {
   # look in the path
   $fill2 = fill2;
index 2c21213..83b7c89 100644 (file)
 #include <sys/xattr.h>
 #include <unistd.h>
 
+#ifdef HAVE_LINUX_BTRFS_H
+# ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID
+#  define btrfs_ioctl_vol_args_v2 override_btrfs_ioctl_vol_args_v2
+# endif
+#include <linux/btrfs.h>
+# undef btrfs_ioctl_vol_args_v2
+#endif
+
+#ifdef HAVE_LINUX_BTRFS_TREE_H
+#include <linux/btrfs_tree.h>
+#endif
+
 #ifdef HAVE_SYS_CAPABILITY_H
 #include <sys/capability.h>
 #endif
@@ -91,12 +103,21 @@ const char *t_fstype;
 /* path of the test device */
 const char *t_device;
 
+/* path of the test scratch device */
+const char *t_device_scratch;
+
 /* mountpoint of the test device */
 const char *t_mountpoint;
 
+/* mountpoint of the test device */
+const char *t_mountpoint_scratch;
+
 /* fd for @t_mountpoint */
 int t_mnt_fd;
 
+/* fd for @t_mountpoint_scratch */
+int t_mnt_scratch_fd;
+
 /* fd for @T_DIR1 */
 int t_dir1_fd;
 
@@ -388,20 +409,6 @@ static inline bool switch_fsids(uid_t fsuid, gid_t fsgid)
        return true;
 }
 
-static inline bool switch_ids(uid_t uid, gid_t gid)
-{
-       if (setgroups(0, NULL))
-               return log_errno(false, "failure: setgroups");
-
-       if (setresgid(gid, gid, gid))
-               return log_errno(false, "failure: setresgid");
-
-       if (setresuid(uid, uid, uid))
-               return log_errno(false, "failure: setresuid");
-
-       return true;
-}
-
 static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
 {
        if (setns(fd, CLONE_NEWUSER))
@@ -3199,6 +3206,121 @@ out:
        return fret;
 }
 
+static int fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns(void)
+{
+       int fret = -1;
+       int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
+       if (file1_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* Skip if vfs caps are unsupported. */
+       if (set_dummy_vfs_caps(file1_fd, 0, 1000))
+               return 0;
+
+       if (fremovexattr(file1_fd, "security.capability")) {
+               log_stderr("failure: fremovexattr");
+               goto out;
+       }
+       if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
+               log_stderr("failure: expected_dummy_vfs_caps_uid");
+               goto out;
+       }
+       if (errno != ENODATA) {
+               log_stderr("failure: errno");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
+       if (file1_fd2 < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /*
+        * Verify we can set an v3 fscap for real root this was regressed at
+        * some point. Make sure this doesn't happen again!
+        */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
+                       die("failure: expected_dummy_vfs_caps_uid");
+               if (errno != ENODATA)
+                       die("failure: errno");
+
+               if (set_dummy_vfs_caps(file1_fd2, 0, 0))
+                       die("failure: set_dummy_vfs_caps");
+
+               if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
+                       die("failure: expected_dummy_vfs_caps_uid");
+
+               if (!expected_dummy_vfs_caps_uid(file1_fd, 0) && errno != EOVERFLOW)
+                       die("failure: expected_dummy_vfs_caps_uid");
+
+               exit(EXIT_SUCCESS);
+       }
+
+       if (wait_for_pid(pid))
+               goto out;
+
+       if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
+               log_stderr("failure: expected_dummy_vfs_caps_uid");
+               goto out;
+       }
+
+       if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
+               log_stderr("failure: expected_dummy_vfs_caps_uid");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(file1_fd);
+       safe_close(file1_fd2);
+       safe_close(open_tree_fd);
+
+       return fret;
+}
+
 static int fscaps_idmapped_mounts_in_userns_separate_userns(void)
 {
        int fret = -1;
@@ -8711,124 +8833,4663 @@ out:
        return fret;
 }
 
-static void usage(void)
+static int nested_userns(void)
 {
-       fprintf(stderr, "Description:\n");
-       fprintf(stderr, "    Run idmapped mount tests\n\n");
+       int fret = -1;
+       int ret;
+       pid_t pid;
+       unsigned int id;
+       struct list *it, *next;
+       struct userns_hierarchy hierarchy[] = {
+               { .level = 1, .fd_userns = -EBADF, },
+               { .level = 2, .fd_userns = -EBADF, },
+               { .level = 3, .fd_userns = -EBADF, },
+               { .level = 4, .fd_userns = -EBADF, },
+               /* Dummy entry that marks the end. */
+               { .level = MAX_USERNS_LEVEL, .fd_userns = -EBADF, },
+       };
+       struct mount_attr attr_level1 = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = -EBADF,
+       };
+       struct mount_attr attr_level2 = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = -EBADF,
+       };
+       struct mount_attr attr_level3 = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = -EBADF,
+       };
+       struct mount_attr attr_level4 = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = -EBADF,
+       };
+       int fd_dir1 = -EBADF,
+           fd_open_tree_level1 = -EBADF,
+           fd_open_tree_level2 = -EBADF,
+           fd_open_tree_level3 = -EBADF,
+           fd_open_tree_level4 = -EBADF;
+       const unsigned int id_file_range = 10000;
+
+       list_init(&hierarchy[0].id_map);
+       list_init(&hierarchy[1].id_map);
+       list_init(&hierarchy[2].id_map);
+       list_init(&hierarchy[3].id_map);
 
-       fprintf(stderr, "Arguments:\n");
-       fprintf(stderr, "-d --device        Device used in the tests\n");
-       fprintf(stderr, "-m --mountpoint    Mountpoint of device\n");
+       /*
+        * Give a large map to the outermost user namespace so we can create
+        * comfortable nested maps.
+        */
+       ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_UID);
+       if (ret) {
+               log_stderr("failure: adding uidmap for userns at level 1");
+               goto out;
+       }
 
-       _exit(EXIT_SUCCESS);
-}
+       ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_GID);
+       if (ret) {
+               log_stderr("failure: adding gidmap for userns at level 1");
+               goto out;
+       }
 
-static const struct option longopts[] = {
-       {"device",      required_argument,      0,      'd'},
-       {"fstype",      required_argument,      0,      'f'},
-       {"mountpoint",  required_argument,      0,      'm'},
-       {"supported",   no_argument,            0,      's'},
-       {"help",        no_argument,            0,      'h'},
-       {NULL,          0,                      0,      0  },
-};
+       /* This is uid:0->2000000:100000000 in init userns. */
+       ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_UID);
+       if (ret) {
+               log_stderr("failure: adding uidmap for userns at level 2");
+               goto out;
+       }
 
-struct t_idmapped_mounts {
-       int (*test)(void);
-       const char *description;
-} basic_suite[] = {
-       { acls,                                                         "posix acls on regular mounts",                                                                 },
-       { create_in_userns,                                             "create operations in user namespace",                                                          },
-       { device_node_in_userns,                                        "device node in user namespace",                                                                },
-       { expected_uid_gid_idmapped_mounts,                             "expected ownership on idmapped mounts",                                                        },
-       { fscaps,                                                       "fscaps on regular mounts",                                                                     },
-       { fscaps_idmapped_mounts,                                       "fscaps on idmapped mounts",                                                                    },
-       { fscaps_idmapped_mounts_in_userns,                             "fscaps on idmapped mounts in user namespace",                                                  },
-       { fscaps_idmapped_mounts_in_userns_separate_userns,             "fscaps on idmapped mounts in user namespace with different id mappings ",                      },
-       { fsids_mapped,                                                 "mapped fsids",                                                                                 },
-       { fsids_unmapped,                                               "unmapped fsids",                                                                               },
-       { hardlink_crossing_mounts,                                     "cross mount hardlink",                                                                         },
-       { hardlink_crossing_idmapped_mounts,                            "cross idmapped mount hardlink",                                                                },
-       { hardlink_from_idmapped_mount,                                 "hardlinks from idmapped mounts",                                                               },
-       { hardlink_from_idmapped_mount_in_userns,                       "hardlinks from idmapped mounts in user namespace",                                             },
-#ifdef HAVE_LIBURING_H
-       { io_uring,                                                     "io_uring",                                                                                     },
-       { io_uring_userns,                                              "io_uring in user namespace",                                                                   },
-       { io_uring_idmapped,                                            "io_uring from idmapped mounts",                                                                },
-       { io_uring_idmapped_userns,                                     "io_uring from idmapped mounts in user namespace",                                              },
-       { io_uring_idmapped_unmapped,                                   "io_uring from idmapped mounts with unmapped ids",                                              },
-       { io_uring_idmapped_unmapped_userns,                            "io_uring from idmapped mounts with unmapped ids in user namespace",                            },
-#endif
-       { protected_symlinks,                                           "following protected symlinks on regular mounts",                                               },
-       { protected_symlinks_idmapped_mounts,                           "following protected symlinks on idmapped mounts",                                              },
-       { protected_symlinks_idmapped_mounts_in_userns,                 "following protected symlinks on idmapped mounts in user namespace",                            },
-       { rename_crossing_mounts,                                       "cross mount rename",                                                                           },
-       { rename_crossing_idmapped_mounts,                              "cross idmapped mount rename",                                                                  },
-       { rename_from_idmapped_mount,                                   "rename from idmapped mounts",                                                                  },
-       { rename_from_idmapped_mount_in_userns,                         "rename from idmapped mounts in user namespace",                                                },
-       { setattr_truncate,                                             "setattr truncate",                                                                             },
-       { setattr_truncate_idmapped,                                    "setattr truncate on idmapped mounts",                                                          },
-       { setattr_truncate_idmapped_in_userns,                          "setattr truncate on idmapped mounts in user namespace",                                        },
-       { setgid_create,                                                "create operations in directories with setgid bit set",                                         },
-       { setgid_create_idmapped,                                       "create operations in directories with setgid bit set on idmapped mounts",                      },
-       { setgid_create_idmapped_in_userns,                             "create operations in directories with setgid bit set on idmapped mounts in user namespace",    },
-       { setid_binaries,                                               "setid binaries on regular mounts",                                                             },
-       { setid_binaries_idmapped_mounts,                               "setid binaries on idmapped mounts",                                                            },
-       { setid_binaries_idmapped_mounts_in_userns,                     "setid binaries on idmapped mounts in user namespace",                                          },
-       { setid_binaries_idmapped_mounts_in_userns_separate_userns,     "setid binaries on idmapped mounts in user namespace with different id mappings",               },
-       { sticky_bit_unlink,                                            "sticky bit unlink operations on regular mounts",                                               },
-       { sticky_bit_unlink_idmapped_mounts,                            "sticky bit unlink operations on idmapped mounts",                                              },
-       { sticky_bit_unlink_idmapped_mounts_in_userns,                  "sticky bit unlink operations on idmapped mounts in user namespace",                            },
-       { sticky_bit_rename,                                            "sticky bit rename operations on regular mounts",                                               },
-       { sticky_bit_rename_idmapped_mounts,                            "sticky bit rename operations on idmapped mounts",                                              },
-       { sticky_bit_rename_idmapped_mounts_in_userns,                  "sticky bit rename operations on idmapped mounts in user namespace",                            },
-       { symlink_regular_mounts,                                       "symlink from regular mounts",                                                                  },
-       { symlink_idmapped_mounts,                                      "symlink from idmapped mounts",                                                                 },
-       { symlink_idmapped_mounts_in_userns,                            "symlink from idmapped mounts in user namespace",                                               },
-       { threaded_idmapped_mount_interactions,                         "threaded operations on idmapped mounts",                                                       },
-};
+       /* This is gid:0->2000000:100000000 in init userns. */
+       ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_GID);
+       if (ret) {
+               log_stderr("failure: adding gidmap for userns at level 2");
+               goto out;
+       }
 
-static bool run_test(struct t_idmapped_mounts suite[], size_t suite_size)
-{
-       int i;
+       /* This is uid:0->3000000:999 in init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_UID);
+       if (ret) {
+               log_stderr("failure: adding uidmap for userns at level 3");
+               goto out;
+       }
 
-       for (i = 0; i < suite_size; i++) {
-               struct t_idmapped_mounts *t = &suite[i];
-               int ret;
-               pid_t pid;
+       /* This is gid:0->3000000:999 in the init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_GID);
+       if (ret) {
+               log_stderr("failure: adding gidmap for userns at level 3");
+               goto out;
+       }
 
-               test_setup();
+       /* id 999 will remain unmapped. */
 
-               pid = fork();
-               if (pid < 0)
-                       return false;
+       /* This is uid:1000->2001000:1 in init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_UID);
+       if (ret) {
+               log_stderr("failure: adding uidmap for userns at level 3");
+               goto out;
+       }
 
-               if (pid == 0) {
-                       ret = t->test();
-                       if (ret) {
-                               fprintf(stderr, "failure: %s\n", t->description);
-                               exit(EXIT_FAILURE);
-                       }
+       /* This is gid:1000->2001000:1 in init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_GID);
+       if (ret) {
+               log_stderr("failure: adding gidmap for userns at level 3");
+               goto out;
+       }
 
-                       exit(EXIT_SUCCESS);
-               }
+       /* This is uid:1001->3001001:10000 in init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_UID);
+       if (ret) {
+               log_stderr("failure: adding uidmap for userns at level 3");
+               goto out;
+       }
 
-               ret = wait_for_pid(pid);
-               test_cleanup();
+       /* This is gid:1001->3001001:10000 in init userns. */
+       ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_GID);
+       if (ret) {
+               log_stderr("failure: adding gidmap for userns at level 3");
+               goto out;
+       }
 
-               if (ret)
-                       return false;
+       /* Don't write a mapping in the 4th userns. */
+       list_empty(&hierarchy[4].id_map);
+
+       /* Create the actual userns hierarchy. */
+       ret = create_userns_hierarchy(hierarchy);
+       if (ret) {
+               log_stderr("failure: create userns hierarchy");
+               goto out;
        }
 
-       return true;
-}
+       attr_level1.userns_fd = hierarchy[0].fd_userns;
+       attr_level2.userns_fd = hierarchy[1].fd_userns;
+       attr_level3.userns_fd = hierarchy[2].fd_userns;
+       attr_level4.userns_fd = hierarchy[3].fd_userns;
 
-int main(int argc, char *argv[])
-{
-       int fret, ret;
+       /*
+        * Create one directory where we create files for each uid/gid within
+        * the first userns.
+        */
+       if (mkdirat(t_dir1_fd, DIR1, 0777)) {
+               log_stderr("failure: mkdirat");
+               goto out;
+       }
+
+       fd_dir1 = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
+       if (fd_dir1 < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       for (id = 0; id <= id_file_range; id++) {
+               char file[256];
+
+               snprintf(file, sizeof(file), DIR1 "/" FILE1 "_%u", id);
+
+               if (mknodat(t_dir1_fd, file, S_IFREG | 0644, 0)) {
+                       log_stderr("failure: create %s", file);
+                       goto out;
+               }
+
+               if (fchownat(t_dir1_fd, file, id, id, AT_SYMLINK_NOFOLLOW)) {
+                       log_stderr("failure: fchownat %s", file);
+                       goto out;
+               }
+
+               if (!expected_uid_gid(t_dir1_fd, file, 0, id, id)) {
+                       log_stderr("failure: check ownership %s", file);
+                       goto out;
+               }
+       }
+
+       /* Create detached mounts for all the user namespaces. */
+       fd_open_tree_level1 = sys_open_tree(t_dir1_fd, DIR1,
+                                           AT_NO_AUTOMOUNT |
+                                           AT_SYMLINK_NOFOLLOW |
+                                           OPEN_TREE_CLOEXEC |
+                                           OPEN_TREE_CLONE);
+       if (fd_open_tree_level1 < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       fd_open_tree_level2 = sys_open_tree(t_dir1_fd, DIR1,
+                                           AT_NO_AUTOMOUNT |
+                                           AT_SYMLINK_NOFOLLOW |
+                                           OPEN_TREE_CLOEXEC |
+                                           OPEN_TREE_CLONE);
+       if (fd_open_tree_level2 < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       fd_open_tree_level3 = sys_open_tree(t_dir1_fd, DIR1,
+                                           AT_NO_AUTOMOUNT |
+                                           AT_SYMLINK_NOFOLLOW |
+                                           OPEN_TREE_CLOEXEC |
+                                           OPEN_TREE_CLONE);
+       if (fd_open_tree_level3 < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       fd_open_tree_level4 = sys_open_tree(t_dir1_fd, DIR1,
+                                           AT_NO_AUTOMOUNT |
+                                           AT_SYMLINK_NOFOLLOW |
+                                           OPEN_TREE_CLOEXEC |
+                                           OPEN_TREE_CLONE);
+       if (fd_open_tree_level4 < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       /* Turn detached mounts into detached idmapped mounts. */
+       if (sys_mount_setattr(fd_open_tree_level1, "", AT_EMPTY_PATH,
+                             &attr_level1, sizeof(attr_level1))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       if (sys_mount_setattr(fd_open_tree_level2, "", AT_EMPTY_PATH,
+                             &attr_level2, sizeof(attr_level2))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       if (sys_mount_setattr(fd_open_tree_level3, "", AT_EMPTY_PATH,
+                             &attr_level3, sizeof(attr_level3))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       if (sys_mount_setattr(fd_open_tree_level4, "", AT_EMPTY_PATH,
+                             &attr_level4, sizeof(attr_level4))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /* Verify that ownership looks correct for callers in the init userns. */
+       for (id = 0; id <= id_file_range; id++) {
+               bool bret;
+               unsigned int id_level1, id_level2, id_level3;
+               char file[256];
+
+               snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+               id_level1 = id + 1000000;
+               if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1)) {
+                       log_stderr("failure: check ownership %s", file);
+                       goto out;
+               }
+
+               id_level2 = id + 2000000;
+               if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2)) {
+                       log_stderr("failure: check ownership %s", file);
+                       goto out;
+               }
+
+               if (id == 999) {
+                       /* This id is unmapped. */
+                       bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+               } else if (id == 1000) {
+                       id_level3 = id + 2000000; /* We punched a hole in the map at 1000. */
+                       bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+               } else {
+                       id_level3 = id + 3000000; /* Rest is business as usual. */
+                       bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+               }
+               if (!bret) {
+                       log_stderr("failure: check ownership %s", file);
+                       goto out;
+               }
+
+               if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid)) {
+                       log_stderr("failure: check ownership %s", file);
+                       goto out;
+               }
+       }
+
+       /* Verify that ownership looks correct for callers in the first userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       bool bret;
+                       unsigned int id_level1, id_level2, id_level3;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       id_level1 = id;
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
+                               die("failure: check ownership %s", file);
+
+                       id_level2 = id + 1000000;
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
+                               die("failure: check ownership %s", file);
+
+                       if (id == 999) {
+                               /* This id is unmapped. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+                       } else if (id == 1000) {
+                               id_level3 = id + 1000000; /* We punched a hole in the map at 1000. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       } else {
+                               id_level3 = id + 2000000; /* Rest is business as usual. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that ownership looks correct for callers in the second userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       bool bret;
+                       unsigned int id_level2, id_level3;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       id_level2 = id;
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
+                               die("failure: check ownership %s", file);
+
+                       if (id == 999) {
+                               /* This id is unmapped. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+                       } else if (id == 1000) {
+                               id_level3 = id; /* We punched a hole in the map at 1000. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       } else {
+                               id_level3 = id + 1000000; /* Rest is business as usual. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that ownership looks correct for callers in the third userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       bool bret;
+                       unsigned int id_level2, id_level3;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (id == 1000) {
+                               /*
+                                * The idmapping of the third userns has a hole
+                                * at uid/gid 1000. That means:
+                                * - 1000->userns_0(2000000) // init userns
+                                * - 1000->userns_1(2000000) // level 1
+                                * - 1000->userns_2(1000000) // level 2
+                                * - 1000->userns_3(1000)    // level 3 (because level 3 has a hole)
+                                */
+                               id_level2 = id;
+                               bret = expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2);
+                       } else {
+                               bret = expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+
+                       if (id == 999) {
+                               /* This id is unmapped. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+                       } else {
+                               id_level3 = id; /* Rest is business as usual. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that ownership looks correct for callers in the fourth userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that chown works correctly for callers in the first userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       bool bret;
+                       unsigned int id_level1, id_level2, id_level3, id_new;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       id_new = id + 1;
+                       if (fchownat(fd_open_tree_level1, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
+                               die("failure: fchownat %s", file);
+
+                       id_level1 = id_new;
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
+                               die("failure: check ownership %s", file);
+
+                       id_level2 = id_new + 1000000;
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
+                               die("failure: check ownership %s", file);
+
+                       if (id_new == 999) {
+                               /* This id is unmapped. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+                       } else if (id_new == 1000) {
+                               id_level3 = id_new + 1000000; /* We punched a hole in the map at 1000. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       } else {
+                               id_level3 = id_new + 2000000; /* Rest is business as usual. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       /* Revert ownership. */
+                       if (fchownat(fd_open_tree_level1, file, id, id, AT_SYMLINK_NOFOLLOW))
+                               die("failure: fchownat %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that chown works correctly for callers in the second userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       bool bret;
+                       unsigned int id_level2, id_level3, id_new;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       id_new = id + 1;
+                       if (fchownat(fd_open_tree_level2, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
+                               die("failure: fchownat %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       id_level2 = id_new;
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
+                               die("failure: check ownership %s", file);
+
+                       if (id_new == 999) {
+                               /* This id is unmapped. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
+                       } else if (id_new == 1000) {
+                               id_level3 = id_new; /* We punched a hole in the map at 1000. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       } else {
+                               id_level3 = id_new + 1000000; /* Rest is business as usual. */
+                               bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
+                       }
+                       if (!bret)
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       /* Revert ownership. */
+                       if (fchownat(fd_open_tree_level2, file, id, id, AT_SYMLINK_NOFOLLOW))
+                               die("failure: fchownat %s", file);
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that chown works correctly for callers in the third userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       unsigned int id_new;
+                       char file[256];
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       id_new = id + 1;
+                       if (id_new == 999 || id_new == 1000) {
+                               /*
+                                * We can't change ownership as we can't
+                                * chown from or to an unmapped id.
+                                */
+                               if (!fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
+                                       die("failure: fchownat %s", file);
+                       } else {
+                               if (fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
+                                       die("failure: fchownat %s", file);
+                       }
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       /* There's no id 1000 anymore as we changed ownership for id 1000 to 1001 above. */
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (id_new == 999) {
+                               /*
+                                * We did not change ownership as we can't
+                                * chown to an unmapped id.
+                                */
+                               if (!expected_uid_gid(fd_open_tree_level3, file, 0, id, id))
+                                       die("failure: check ownership %s", file);
+                       } else if (id_new == 1000) {
+                               /*
+                                * We did not change ownership as we can't
+                                * chown from an unmapped id.
+                                */
+                               if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
+                                       die("failure: check ownership %s", file);
+                       } else {
+                               if (!expected_uid_gid(fd_open_tree_level3, file, 0, id_new, id_new))
+                                       die("failure: check ownership %s", file);
+                       }
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       /* Revert ownership. */
+                       if (id_new != 999 && id_new != 1000) {
+                               if (fchownat(fd_open_tree_level3, file, id, id, AT_SYMLINK_NOFOLLOW))
+                                       die("failure: fchownat %s", file);
+                       }
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* Verify that chown works correctly for callers in the fourth userns. */
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
+                       die("failure: switch_userns");
+
+               for (id = 0; id <= id_file_range; id++) {
+                       char file[256];
+                       unsigned long id_new;
+
+                       snprintf(file, sizeof(file), FILE1 "_%u", id);
+
+                       id_new = id + 1;
+                       if (!fchownat(fd_open_tree_level4, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
+                               die("failure: fchownat %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+                       if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
+                               die("failure: check ownership %s", file);
+
+               }
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+
+out:
+       list_for_each_safe(it, &hierarchy[0].id_map, next) {
+               list_del(it);
+               free(it->elem);
+               free(it);
+       }
+
+       list_for_each_safe(it, &hierarchy[1].id_map, next) {
+               list_del(it);
+               free(it->elem);
+               free(it);
+       }
+
+       list_for_each_safe(it, &hierarchy[2].id_map, next) {
+               list_del(it);
+               free(it->elem);
+               free(it);
+       }
+
+       safe_close(hierarchy[0].fd_userns);
+       safe_close(hierarchy[1].fd_userns);
+       safe_close(hierarchy[2].fd_userns);
+       safe_close(fd_dir1);
+       safe_close(fd_open_tree_level1);
+       safe_close(fd_open_tree_level2);
+       safe_close(fd_open_tree_level3);
+       safe_close(fd_open_tree_level4);
+       return fret;
+}
+
+#ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS
+
+#ifndef BTRFS_PATH_NAME_MAX
+#define BTRFS_PATH_NAME_MAX 4087
+#endif
+
+struct btrfs_ioctl_vol_args {
+       __s64 fd;
+       char name[BTRFS_PATH_NAME_MAX + 1];
+};
+#endif
+
+#ifndef HAVE_STRUCT_BTRFS_QGROUP_LIMIT
+struct btrfs_qgroup_limit {
+       __u64 flags;
+       __u64 max_rfer;
+       __u64 max_excl;
+       __u64 rsv_rfer;
+       __u64 rsv_excl;
+};
+#endif
+
+#ifndef HAVE_STRUCT_BTRFS_QGROUP_INHERIT
+struct btrfs_qgroup_inherit {
+       __u64 flags;
+       __u64 num_qgroups;
+       __u64 num_ref_copies;
+       __u64 num_excl_copies;
+       struct btrfs_qgroup_limit lim;
+       __u64 qgroups[0];
+};
+#endif
+
+#if !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2) || !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID)
+
+#ifndef BTRFS_SUBVOL_NAME_MAX
+#define BTRFS_SUBVOL_NAME_MAX 4039
+#endif
+
+struct btrfs_ioctl_vol_args_v2 {
+       __s64 fd;
+       __u64 transid;
+       __u64 flags;
+       union {
+               struct {
+                       __u64 size;
+                       struct btrfs_qgroup_inherit *qgroup_inherit;
+               };
+               __u64 unused[4];
+       };
+       union {
+               char name[BTRFS_SUBVOL_NAME_MAX + 1];
+               __u64 devid;
+               __u64 subvolid;
+       };
+};
+#endif
+
+#ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_ARGS
+
+#ifndef BTRFS_INO_LOOKUP_PATH_MAX
+#define BTRFS_INO_LOOKUP_PATH_MAX 4080
+#endif
+struct btrfs_ioctl_ino_lookup_args {
+       __u64 treeid;
+       __u64 objectid;
+       char name[BTRFS_INO_LOOKUP_PATH_MAX];
+};
+#endif
+
+#ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_USER_ARGS
+
+#ifndef BTRFS_VOL_NAME_MAX
+#define BTRFS_VOL_NAME_MAX 255
+#endif
+
+#ifndef BTRFS_INO_LOOKUP_USER_PATH_MAX
+#define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
+#endif
+
+struct btrfs_ioctl_ino_lookup_user_args {
+       __u64 dirid;
+       __u64 treeid;
+       char name[BTRFS_VOL_NAME_MAX + 1];
+       char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
+};
+#endif
+
+#ifndef HAVE_STRUCT_BTRFS_IOCTL_GET_SUBVOL_ROOTREF_ARGS
+
+#ifndef BTRFS_MAX_ROOTREF_BUFFER_NUM
+#define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
+#endif
+
+struct btrfs_ioctl_get_subvol_rootref_args {
+       __u64 min_treeid;
+       struct {
+               __u64 treeid;
+               __u64 dirid;
+       } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
+       __u8 num_items;
+       __u8 align[7];
+};
+#endif
+
+#ifndef BTRFS_IOCTL_MAGIC
+#define BTRFS_IOCTL_MAGIC 0x94
+#endif
+
+#ifndef BTRFS_IOC_SNAP_DESTROY
+#define BTRFS_IOC_SNAP_DESTROY \
+       _IOW(BTRFS_IOCTL_MAGIC, 15, struct btrfs_ioctl_vol_args)
+#endif
+
+#ifndef BTRFS_IOC_SNAP_DESTROY_V2
+#define BTRFS_IOC_SNAP_DESTROY_V2 \
+       _IOW(BTRFS_IOCTL_MAGIC, 63, struct btrfs_ioctl_vol_args_v2)
+#endif
+
+#ifndef BTRFS_IOC_SNAP_CREATE_V2
+#define BTRFS_IOC_SNAP_CREATE_V2 \
+       _IOW(BTRFS_IOCTL_MAGIC, 23, struct btrfs_ioctl_vol_args_v2)
+#endif
+
+#ifndef BTRFS_IOC_SUBVOL_CREATE_V2
+#define BTRFS_IOC_SUBVOL_CREATE_V2 \
+       _IOW(BTRFS_IOCTL_MAGIC, 24, struct btrfs_ioctl_vol_args_v2)
+#endif
+
+#ifndef BTRFS_IOC_SUBVOL_GETFLAGS
+#define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
+#endif
+
+#ifndef BTRFS_IOC_SUBVOL_SETFLAGS
+#define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
+#endif
+
+#ifndef BTRFS_IOC_INO_LOOKUP
+#define BTRFS_IOC_INO_LOOKUP \
+       _IOWR(BTRFS_IOCTL_MAGIC, 18, struct btrfs_ioctl_ino_lookup_args)
+#endif
+
+#ifndef BTRFS_IOC_INO_LOOKUP_USER
+#define BTRFS_IOC_INO_LOOKUP_USER \
+       _IOWR(BTRFS_IOCTL_MAGIC, 62, struct btrfs_ioctl_ino_lookup_user_args)
+#endif
+
+#ifndef BTRFS_IOC_GET_SUBVOL_ROOTREF
+#define BTRFS_IOC_GET_SUBVOL_ROOTREF \
+       _IOWR(BTRFS_IOCTL_MAGIC, 61, struct btrfs_ioctl_get_subvol_rootref_args)
+#endif
+
+#ifndef BTRFS_SUBVOL_RDONLY
+#define BTRFS_SUBVOL_RDONLY (1ULL << 1)
+#endif
+
+#ifndef BTRFS_SUBVOL_SPEC_BY_ID
+#define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
+#endif
+
+#ifndef BTRFS_FIRST_FREE_OBJECTID
+#define BTRFS_FIRST_FREE_OBJECTID 256ULL
+#endif
+
+static int btrfs_delete_subvolume(int parent_fd, const char *name)
+{
+       struct btrfs_ioctl_vol_args args = {};
+       size_t len;
+       int ret;
+
+       len = strlen(name);
+       if (len >= sizeof(args.name))
+               return -ENAMETOOLONG;
+
+       memcpy(args.name, name, len);
+       args.name[len] = '\0';
+
+       ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY, &args);
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int btrfs_delete_subvolume_id(int parent_fd, uint64_t subvolid)
+{
+       struct btrfs_ioctl_vol_args_v2 args = {};
+       int ret;
+
+       args.flags = BTRFS_SUBVOL_SPEC_BY_ID;
+       args.subvolid = subvolid;
+
+       ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY_V2, &args);
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int btrfs_create_subvolume(int parent_fd, const char *name)
+{
+       struct btrfs_ioctl_vol_args_v2 args = {};
+       size_t len;
+       int ret;
+
+       len = strlen(name);
+       if (len >= sizeof(args.name))
+               return -ENAMETOOLONG;
+
+       memcpy(args.name, name, len);
+       args.name[len] = '\0';
+
+       ret = ioctl(parent_fd, BTRFS_IOC_SUBVOL_CREATE_V2, &args);
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int btrfs_create_snapshot(int fd, int parent_fd, const char *name,
+                                int flags)
+{
+       struct btrfs_ioctl_vol_args_v2 args = {
+               .fd = fd,
+       };
+       size_t len;
+       int ret;
+
+       if (flags & ~BTRFS_SUBVOL_RDONLY)
+               return -EINVAL;
+
+       len = strlen(name);
+       if (len >= sizeof(args.name))
+               return -ENAMETOOLONG;
+       memcpy(args.name, name, len);
+       args.name[len] = '\0';
+
+       if (flags & BTRFS_SUBVOL_RDONLY)
+               args.flags |= BTRFS_SUBVOL_RDONLY;
+       ret = ioctl(parent_fd, BTRFS_IOC_SNAP_CREATE_V2, &args);
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int btrfs_get_subvolume_ro(int fd, bool *read_only_ret)
+{
+       uint64_t flags;
+       int ret;
+
+       ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
+       if (ret < 0)
+               return -1;
+
+       *read_only_ret = flags & BTRFS_SUBVOL_RDONLY;
+       return 0;
+}
+
+static int btrfs_set_subvolume_ro(int fd, bool read_only)
+{
+       uint64_t flags;
+       int ret;
+
+       ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
+       if (ret < 0)
+               return -1;
+
+       if (read_only)
+               flags |= BTRFS_SUBVOL_RDONLY;
+       else
+               flags &= ~BTRFS_SUBVOL_RDONLY;
+
+       ret = ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
+       if (ret < 0)
+               return -1;
+
+       return 0;
+}
+
+static int btrfs_get_subvolume_id(int fd, uint64_t *id_ret)
+{
+       struct btrfs_ioctl_ino_lookup_args args = {
+           .treeid = 0,
+           .objectid = BTRFS_FIRST_FREE_OBJECTID,
+       };
+       int ret;
+
+       ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
+       if (ret < 0)
+               return -1;
+
+       *id_ret = args.treeid;
+
+       return 0;
+}
+
+/*
+ * The following helpers are adapted from the btrfsutils library. We can't use
+ * the library directly since we need full control over how the subvolume
+ * iteration happens. We need to be able to check whether unprivileged
+ * subvolume iteration is possible, i.e. whether BTRFS_IOC_INO_LOOKUP_USER is
+ * available and also ensure that it is actually used when looking up paths.
+ */
+struct btrfs_stack {
+       uint64_t tree_id;
+       struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
+       size_t items_pos;
+       size_t path_len;
+};
+
+struct btrfs_iter {
+       int fd;
+       int cur_fd;
+
+       struct btrfs_stack *search_stack;
+       size_t stack_len;
+       size_t stack_capacity;
+
+       char *cur_path;
+       size_t cur_path_capacity;
+};
+
+static struct btrfs_stack *top_stack_entry(struct btrfs_iter *iter)
+{
+       return &iter->search_stack[iter->stack_len - 1];
+}
+
+static int pop_stack(struct btrfs_iter *iter)
+{
+       struct btrfs_stack *top, *parent;
+       int fd, parent_fd;
+       size_t i;
+
+       if (iter->stack_len == 1) {
+               iter->stack_len--;
+               return 0;
+       }
+
+       top = top_stack_entry(iter);
+       iter->stack_len--;
+       parent = top_stack_entry(iter);
+
+       fd = iter->cur_fd;
+       for (i = parent->path_len; i < top->path_len; i++) {
+               if (i == 0 || iter->cur_path[i] == '/') {
+                       parent_fd = openat(fd, "..", O_RDONLY);
+                       if (fd != iter->cur_fd)
+                               close(fd);
+                       if (parent_fd == -1)
+                               return -1;
+                       fd = parent_fd;
+               }
+       }
+       if (iter->cur_fd != iter->fd)
+               close(iter->cur_fd);
+       iter->cur_fd = fd;
+
+       return 0;
+}
+
+static int append_stack(struct btrfs_iter *iter, uint64_t tree_id, size_t path_len)
+{
+       struct btrfs_stack *entry;
+
+       if (iter->stack_len >= iter->stack_capacity) {
+               size_t new_capacity = iter->stack_capacity * 2;
+               struct btrfs_stack *new_search_stack;
+#ifdef HAVE_REALLOCARRAY
+               new_search_stack = reallocarray(iter->search_stack, new_capacity,
+                                               sizeof(*iter->search_stack));
+#else
+               new_search_stack = realloc(iter->search_stack, new_capacity * sizeof(*iter->search_stack));
+#endif
+               if (!new_search_stack)
+                       return -ENOMEM;
+
+               iter->stack_capacity = new_capacity;
+               iter->search_stack = new_search_stack;
+       }
+
+       entry = &iter->search_stack[iter->stack_len];
+
+       memset(entry, 0, sizeof(*entry));
+       entry->path_len = path_len;
+       entry->tree_id = tree_id;
+
+       if (iter->stack_len) {
+               struct btrfs_stack *top;
+               char *path;
+               int fd;
+
+               top = top_stack_entry(iter);
+               path = &iter->cur_path[top->path_len];
+               if (*path == '/')
+                       path++;
+               fd = openat(iter->cur_fd, path, O_RDONLY);
+               if (fd == -1)
+                       return -errno;
+
+               close(iter->cur_fd);
+               iter->cur_fd = fd;
+       }
+
+       iter->stack_len++;
+
+       return 0;
+}
+
+static int btrfs_iterator_start(int fd, uint64_t top, struct btrfs_iter **ret)
+{
+       struct btrfs_iter *iter;
+       int err;
+
+       iter = malloc(sizeof(*iter));
+       if (!iter)
+               return -ENOMEM;
+
+       iter->fd = fd;
+       iter->cur_fd = fd;
+
+       iter->stack_len = 0;
+       iter->stack_capacity = 4;
+       iter->search_stack = malloc(sizeof(*iter->search_stack) *
+                                   iter->stack_capacity);
+       if (!iter->search_stack) {
+               err = -ENOMEM;
+               goto out_iter;
+       }
+
+       iter->cur_path_capacity = 256;
+       iter->cur_path = malloc(iter->cur_path_capacity);
+       if (!iter->cur_path) {
+               err = -ENOMEM;
+               goto out_search_stack;
+       }
+
+       err = append_stack(iter, top, 0);
+       if (err)
+               goto out_cur_path;
+
+       *ret = iter;
+
+       return 0;
+
+out_cur_path:
+       free(iter->cur_path);
+out_search_stack:
+       free(iter->search_stack);
+out_iter:
+       free(iter);
+       return err;
+}
+
+static void btrfs_iterator_end(struct btrfs_iter *iter)
+{
+       if (iter) {
+               free(iter->cur_path);
+               free(iter->search_stack);
+               if (iter->cur_fd != iter->fd)
+                       close(iter->cur_fd);
+               close(iter->fd);
+               free(iter);
+       }
+}
+
+static int __append_path(struct btrfs_iter *iter, const char *name,
+                        size_t name_len, const char *dir, size_t dir_len,
+                        size_t *path_len_ret)
+{
+       struct btrfs_stack *top = top_stack_entry(iter);
+       size_t path_len;
+       char *p;
+
+       path_len = top->path_len;
+       /*
+        * We need a joining slash if we have a current path and a subdirectory.
+        */
+       if (top->path_len && dir_len)
+               path_len++;
+       path_len += dir_len;
+       /*
+        * We need another joining slash if we have a current path and a name,
+        * but not if we have a subdirectory, because the lookup ioctl includes
+        * a trailing slash.
+        */
+       if (top->path_len && !dir_len && name_len)
+               path_len++;
+       path_len += name_len;
+
+       /* We need one extra character for the NUL terminator. */
+       if (path_len + 1 > iter->cur_path_capacity) {
+               char *tmp = realloc(iter->cur_path, path_len + 1);
+
+               if (!tmp)
+                       return -ENOMEM;
+               iter->cur_path = tmp;
+               iter->cur_path_capacity = path_len + 1;
+       }
+
+       p = iter->cur_path + top->path_len;
+       if (top->path_len && dir_len)
+               *p++ = '/';
+       memcpy(p, dir, dir_len);
+       p += dir_len;
+       if (top->path_len && !dir_len && name_len)
+               *p++ = '/';
+       memcpy(p, name, name_len);
+       p += name_len;
+       *p = '\0';
+
+       *path_len_ret = path_len;
+
+       return 0;
+}
+
+static int get_subvolume_path(struct btrfs_iter *iter, uint64_t treeid,
+                             uint64_t dirid, size_t *path_len_ret)
+{
+       struct btrfs_ioctl_ino_lookup_user_args args = {
+               .treeid = treeid,
+               .dirid = dirid,
+       };
+       int ret;
+
+       ret = ioctl(iter->cur_fd, BTRFS_IOC_INO_LOOKUP_USER, &args);
+       if (ret == -1)
+               return -1;
+
+       return __append_path(iter, args.name, strlen(args.name), args.path,
+                            strlen(args.path), path_len_ret);
+}
+
+static int btrfs_iterator_next(struct btrfs_iter *iter, char **path_ret,
+                              uint64_t *id_ret)
+{
+       struct btrfs_stack *top;
+       uint64_t treeid, dirid;
+       size_t path_len;
+       int ret, err;
+
+       for (;;) {
+               for (;;) {
+                       if (iter->stack_len == 0)
+                               return 1;
+
+                       top = top_stack_entry(iter);
+                       if (top->items_pos < top->rootref_args.num_items) {
+                               break;
+                       } else {
+                               ret = ioctl(iter->cur_fd,
+                                           BTRFS_IOC_GET_SUBVOL_ROOTREF,
+                                           &top->rootref_args);
+                               if (ret == -1 && errno != EOVERFLOW)
+                                       return -1;
+                               top->items_pos = 0;
+
+                               if (top->rootref_args.num_items == 0) {
+                                       err = pop_stack(iter);
+                                       if (err)
+                                               return err;
+                               }
+                       }
+               }
+
+               treeid = top->rootref_args.rootref[top->items_pos].treeid;
+               dirid = top->rootref_args.rootref[top->items_pos].dirid;
+               top->items_pos++;
+               err = get_subvolume_path(iter, treeid, dirid, &path_len);
+               if (err) {
+                       /* Skip the subvolume if we can't access it. */
+                       if (errno == EACCES)
+                               continue;
+                       return err;
+               }
+
+               err = append_stack(iter, treeid, path_len);
+               if (err) {
+                       /*
+                        * Skip the subvolume if it does not exist (which can
+                        * happen if there is another filesystem mounted over a
+                        * parent directory) or we don't have permission to
+                        * access it.
+                        */
+                       if (errno == ENOENT || errno == EACCES)
+                               continue;
+                       return err;
+               }
+
+               top = top_stack_entry(iter);
+               goto out;
+       }
+
+out:
+       if (path_ret) {
+               *path_ret = malloc(top->path_len + 1);
+               if (!*path_ret)
+                       return -ENOMEM;
+               memcpy(*path_ret, iter->cur_path, top->path_len);
+               (*path_ret)[top->path_len] = '\0';
+       }
+       if (id_ret)
+               *id_ret = top->tree_id;
+       return 0;
+}
+
+#define BTRFS_SUBVOLUME1 "subvol1"
+#define BTRFS_SUBVOLUME1_SNAPSHOT1 "subvol1_snapshot1"
+#define BTRFS_SUBVOLUME1_SNAPSHOT1_RO "subvol1_snapshot1_ro"
+#define BTRFS_SUBVOLUME1_RENAME "subvol1_rename"
+#define BTRFS_SUBVOLUME2 "subvol2"
+
+static int btrfs_subvolumes_fsids_mapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_up())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must succeed.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: check ownership");
+
+               /* remove subvolume */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: check ownership");
+
+               if (!caps_down())
+                       die("failure: lower caps");
+
+               /*
+                * The filesystem is not mounted with user_subvol_rm_allowed so
+                * subvolume deletion must fail.
+                */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+               die("failure: check ownership");
+
+       /* remove subvolume */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_fsids_mapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /* The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: check ownership");
+
+               /* remove subvolume */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove subvolume */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_fsids_unmapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       /* create directory for rename test */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       /* change ownership of all files to uid 0 */
+       if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: fchownat");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       if (!switch_fsids(0, 0)) {
+               log_stderr("failure: switch_fsids");
+               goto out;
+       }
+
+       /*
+        * The caller's fsids don't have a mappings in the idmapped mount so
+        * any file creation must fail.
+        */
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+       if (errno != EOVERFLOW) {
+               log_stderr("failure: errno");
+               goto out;
+       }
+
+       /* try to rename a subvolume */
+       if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
+                      BTRFS_SUBVOLUME1_RENAME)) {
+               log_stderr("failure: renameat");
+               goto out;
+       }
+       if (errno != EOVERFLOW) {
+               log_stderr("failure: errno");
+               goto out;
+       }
+
+       /* The caller is privileged over the inode so file deletion must work. */
+
+       /* remove subvolume */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_fsids_unmapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       /* create directory for rename test */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       /* change ownership of all files to uid 0 */
+       if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: fchownat");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       userns_fd = get_userns_fd(0, 30000, 10000);
+       if (userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               /*
+                * The caller's fsids don't have a mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
+                       die("failure: btrfs_create_subvolume");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /* try to rename a subvolume */
+               if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
+                                       BTRFS_SUBVOLUME1_RENAME))
+                       die("failure: renameat");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /*
+                * The caller is not privileged over the inode so subvolume
+                * deletion must fail.
+                */
+
+               /* remove subvolume */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove subvolume */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+       safe_close(userns_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_mapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_up())
+                       die("failure: raise caps");
+
+               /* The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               /* create read-only snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                         BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               safe_close(subvolume_fd);
+
+               /* remove subvolume */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* remove read-write snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* remove read-only snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* create directory */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               /* create read-only snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                         BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove read-write snapshot */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove read-only snapshot */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_mapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               /* create read-only snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                         BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+               die("failure: expected_uid_gid");
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
+               die("failure: expected_uid_gid");
+
+       /* remove read-write snapshot */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
+               die("failure: expected_uid_gid");
+
+       /* remove read-only snapshot */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_unmapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* create directory for rename test */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       /* change ownership of all files to uid 0 */
+       if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: fchownat");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+                             sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+
+               if (!switch_fsids(0, 0)) {
+                       log_stderr("failure: switch_fsids");
+                       goto out;
+               }
+
+               /*
+                * The caller's fsids don't have a mappings in the idmapped
+                * mount so any file creation must fail.
+                */
+
+               /*
+                * The open_tree() syscall returns an O_PATH file descriptor
+                * which we can't use with ioctl(). So let's reopen it as a
+                * proper file descriptor.
+                */
+               tree_fd = openat(open_tree_fd, ".",
+                                O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (tree_fd < 0)
+                       die("failure: openat");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create directory */
+               if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
+                       die("failure: btrfs_create_subvolume");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /* create read-write snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /* create read-only snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                          BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /* try to rename a directory */
+               if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
+                              BTRFS_SUBVOLUME1_RENAME))
+                       die("failure: renameat");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               if (!caps_down())
+                       die("failure: caps_down");
+
+               /* create read-write snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               /* create read-only snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                          BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               /*
+                * The caller is not privileged over the inode so subvolume
+                * deletion must fail.
+                */
+
+               /* remove directory */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               if (!caps_up())
+                       die("failure: caps_down");
+
+               /*
+                * The caller is privileged over the inode so subvolume
+                * deletion must work.
+                */
+
+               /* remove directory */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_unmapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
+           userns_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* create directory for rename test */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       /* change ownership of all files to uid 0 */
+       if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: fchownat");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       userns_fd = get_userns_fd(0, 30000, 10000);
+       if (userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+                             sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor
+        * which we can't use with ioctl(). So let's reopen it as a
+        * proper file descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".",
+                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               /*
+                * The caller's fsids don't have a mappings in the idmapped
+                * mount so any file creation must fail.
+                */
+
+               /* create directory */
+               if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
+                       die("failure: btrfs_create_subvolume");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /* create read-write snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               /* create read-only snapshot */
+               if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                          BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                          BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               /* try to rename a directory */
+               if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
+                              BTRFS_SUBVOLUME1_RENAME))
+                       die("failure: renameat");
+               if (errno != EOVERFLOW)
+                       die("failure: errno");
+
+               /*
+                * The caller is not privileged over the inode so subvolume
+                * deletion must fail.
+                */
+
+               /* remove directory */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+               die("failure: btrfs_delete_subvolume");
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(subvolume_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must succedd.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: check ownership");
+
+               /*
+                * The scratch device is mounted with user_subvol_rm_allowed so
+                * subvolume deletion must succeed.
+                */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /* The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: check ownership");
+
+               /*
+                * The scratch device is mounted with user_subvol_rm_allowed so
+                * subvolume deletion must succeed.
+                */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must succeed.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               /* create read-only snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                         BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               safe_close(subvolume_fd);
+
+               /* remove subvolume */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* remove read-write snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
+                       die("failure: btrfs_delete_subvolume");
+
+               /* remove read-only snapshot */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
+                       die("failure: btrfs_delete_subvolume");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               safe_close(subvolume_fd);
+
+               /* remove read-only snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               /* create read-only snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                         BTRFS_SUBVOL_RDONLY))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               /* remove directory */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_delete_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               /* remove read-write snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
+                       die("failure: btrfs_delete_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               /* remove read-only snapshot */
+               if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
+                       die("failure: btrfs_delete_subvolume");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               safe_close(subvolume_fd);
+
+               /* remove read-only snapshot */
+               if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
+                       die("failure: btrfs_delete_subvolume");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_delete_by_spec_id(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
+       uint64_t subvolume_id1 = -EINVAL, subvolume_id2 = -EINVAL;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_mnt_scratch_fd, "A")) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_mnt_scratch_fd, "B")) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       subvolume_fd = openat(t_mnt_scratch_fd, "B", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(subvolume_fd, "C")) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       safe_close(subvolume_fd);
+
+       subvolume_fd = openat(t_mnt_scratch_fd, "A", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id1)) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+       subvolume_fd = openat(t_mnt_scratch_fd, "B/C", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id2)) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+       if (sys_mount(t_device_scratch, t_mountpoint, "btrfs", 0, "subvol=B/C")) {
+               log_stderr("failure: mount");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(-EBADF, t_mountpoint,
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               /*
+                * The subvolume isn't exposed in the idmapped mount so
+                * delation via spec id must fail.
+                */
+               if (!btrfs_delete_subvolume_id(tree_fd, subvolume_id1))
+                       die("failure: btrfs_delete_subvolume_id");
+               if (errno != EOPNOTSUPP)
+                       die("failure: errno");
+
+               if (btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id1))
+                       die("failure: btrfs_delete_subvolume_id");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+       sys_umount2(t_mountpoint, MNT_DETACH);
+       btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id2);
+       btrfs_delete_subvolume(t_mnt_scratch_fd, "B");
+
+       return fret;
+}
+
+static int btrfs_subvolumes_setflags_fsids_mapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /* The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (!read_only)
+                       die("failure: not read_only");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_setflags_fsids_mapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /* The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must fail.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (!read_only)
+                       die("failure: not read_only");
+
+               if (btrfs_set_subvolume_ro(subvolume_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_setflags_fsids_unmapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (!switch_fsids(0, 0))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids don't have mappings in the idmapped mount
+                * so any file creation must fail.
+                */
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (!btrfs_set_subvolume_ro(subvolume_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_subvolumes_setflags_fsids_unmapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       userns_fd = get_userns_fd(0, 30000, 10000);
+       if (userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               /*
+                * The caller's fsids don't have mappings in the idmapped mount
+                * so any file creation must fail.
+                */
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               if (!switch_userns(userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (!btrfs_set_subvolume_ro(subvolume_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+       safe_close(userns_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_setflags_fsids_mapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               if (!switch_fsids(10000, 10000))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids now have mappings in the idmapped mount
+                * so any file creation must succeed.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
+                       die("failure: expected_uid_gid");
+
+               snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
+                                    O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (snapshot_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (btrfs_set_subvolume_ro(snapshot_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (!read_only)
+                       die("failure: not read_only");
+
+               if (btrfs_set_subvolume_ro(snapshot_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               safe_close(snapshot_fd);
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_setflags_fsids_mapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
+               bool read_only = false;
+
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               /*
+                * The caller's fsids now have mappings in the idmapped mount so
+                * any file creation must succeed.
+                */
+
+               /* create subvolume */
+               if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
+                       die("failure: btrfs_create_subvolume");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
+                                     O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (subvolume_fd < 0)
+                       die("failure: openat");
+
+               /* create read-write snapshot */
+               if (btrfs_create_snapshot(subvolume_fd, tree_fd,
+                                         BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
+                       die("failure: btrfs_create_snapshot");
+
+               if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
+                       die("failure: expected_uid_gid");
+
+               snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
+                                    O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (snapshot_fd < 0)
+                       die("failure: openat");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (btrfs_set_subvolume_ro(snapshot_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (!read_only)
+                       die("failure: not read_only");
+
+               if (btrfs_set_subvolume_ro(snapshot_fd, false))
+                       die("failure: btrfs_set_subvolume_ro");
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               safe_close(snapshot_fd);
+               safe_close(subvolume_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_setflags_fsids_unmapped(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
+                             O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create read-write snapshot */
+       if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
+                                 BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
+               log_stderr("failure: btrfs_create_snapshot");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int snapshot_fd = -EBADF;
+               bool read_only = false;
+
+               snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
+                                    O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (snapshot_fd < 0)
+                       die("failure: openat");
+
+               if (!switch_fsids(0, 0))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: raise caps");
+
+               /*
+                * The caller's fsids don't have mappings in the idmapped mount
+                * so any file creation must fail.
+                */
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (!btrfs_set_subvolume_ro(snapshot_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               safe_close(snapshot_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(subvolume_fd);
+       safe_close(tree_fd);
+
+       return fret;
+}
+
+static int btrfs_snapshots_setflags_fsids_unmapped_userns(void)
+{
+       int fret = -1;
+       int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
+           userns_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+
+       if (!caps_supported())
+               return 0;
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       /* Changing mount properties on a detached mount. */
+       userns_fd = get_userns_fd(0, 30000, 10000);
+       if (userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(t_dir1_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
+                             O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create read-write snapshot */
+       if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
+                                 BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
+               log_stderr("failure: btrfs_create_snapshot");
+               goto out;
+       }
+
+       if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
+               log_stderr("failure: expected_uid_gid");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               int snapshot_fd = -EBADF;
+               bool read_only = false;
+
+               /*
+                * The caller's fsids don't have mappings in the idmapped mount
+                * so any file creation must fail.
+                */
+
+               snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
+                                    O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+               if (snapshot_fd < 0)
+                       die("failure: openat");
+
+
+               if (!switch_userns(userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
+                                     t_overflowuid, t_overflowgid))
+                       die("failure: expected_uid_gid");
+
+               /*
+                * The caller's fsids don't have mappings in the idmapped mount
+                * so any file creation must fail.
+                */
+
+               if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
+                       die("failure: btrfs_get_subvolume_ro");
+
+               if (read_only)
+                       die("failure: read_only");
+
+               if (!btrfs_set_subvolume_ro(snapshot_fd, true))
+                       die("failure: btrfs_set_subvolume_ro");
+               if (errno != EPERM)
+                       die("failure: errno");
+
+               safe_close(snapshot_fd);
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       /* remove directory */
+       if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
+               log_stderr("failure: btrfs_delete_subvolume");
+               goto out;
+       }
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(attr.userns_fd);
+       safe_close(open_tree_fd);
+       safe_close(subvolume_fd);
+       safe_close(tree_fd);
+       safe_close(userns_fd);
+
+       return fret;
+}
+
+#define BTRFS_SUBVOLUME_SUBVOL1 "subvol1"
+#define BTRFS_SUBVOLUME_SUBVOL2 "subvol2"
+#define BTRFS_SUBVOLUME_SUBVOL3 "subvol3"
+#define BTRFS_SUBVOLUME_SUBVOL4 "subvol4"
+
+#define BTRFS_SUBVOLUME_SUBVOL1_ID 0
+#define BTRFS_SUBVOLUME_SUBVOL2_ID 1
+#define BTRFS_SUBVOLUME_SUBVOL3_ID 2
+#define BTRFS_SUBVOLUME_SUBVOL4_ID 3
+
+#define BTRFS_SUBVOLUME_DIR1 "dir1"
+#define BTRFS_SUBVOLUME_DIR2 "dir2"
+
+#define BTRFS_SUBVOLUME_MNT "mnt_subvolume1"
+
+#define BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3 "subvol1/subvol3"
+#define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2 "subvol1/dir1/dir2"
+#define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4 "subvol1/dir1/dir2/subvol4"
+
+/*
+ * We create the following mount layout to test lookup:
+ *
+ * |-/mnt/test                    /dev/loop0                   btrfs       rw,relatime,space_cache,subvolid=5,subvol=/
+ * | |-/mnt/test/mnt1             /dev/loop1[/subvol1]         btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=268,subvol=/subvol1
+ * '-/mnt/scratch                 /dev/loop1                   btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=5,subvol=/
+ */
+static int btrfs_subvolume_lookup_user(void)
+{
+       int fret = -1, i;
+       int dir1_fd = -EBADF, dir2_fd = -EBADF, mnt_fd = -EBADF,
+           open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
+       int subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
+       uint64_t subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
+       uint64_t subvolid = -EINVAL;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+       pid_t pid;
+       struct btrfs_iter *iter;
+
+       if (!caps_supported())
+               return 0;
+
+       for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
+               subvolume_fds[i] = -EBADF;
+
+       for (i = 0; i < ARRAY_SIZE(subvolume_ids); i++)
+               subvolume_ids[i] = -EINVAL;
+
+       if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL1)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL2)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] = openat(t_mnt_scratch_fd,
+                                                          BTRFS_SUBVOLUME_SUBVOL1,
+                                                          O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       /* create subvolume */
+       if (btrfs_create_subvolume(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_SUBVOL3)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (mkdirat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1, 0777)) {
+               log_stderr("failure: mkdirat");
+               goto out;
+       }
+
+       dir1_fd = openat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1,
+                        O_CLOEXEC | O_DIRECTORY);
+       if (dir1_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (mkdirat(dir1_fd, BTRFS_SUBVOLUME_DIR2, 0777)) {
+               log_stderr("failure: mkdirat");
+               goto out;
+       }
+
+       dir2_fd = openat(dir1_fd, BTRFS_SUBVOLUME_DIR2, O_CLOEXEC | O_DIRECTORY);
+       if (dir2_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_create_subvolume(dir2_fd, BTRFS_SUBVOLUME_SUBVOL4)) {
+               log_stderr("failure: btrfs_create_subvolume");
+               goto out;
+       }
+
+       if (mkdirat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, 0777)) {
+               log_stderr("failure: mkdirat");
+               goto out;
+       }
+
+       snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
+       if (sys_mount(t_device_scratch, t_buf, "btrfs", 0,
+                     "subvol=" BTRFS_SUBVOLUME_SUBVOL1)) {
+               log_stderr("failure: mount");
+               goto out;
+       }
+
+       mnt_fd = openat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, O_CLOEXEC | O_DIRECTORY);
+       if (mnt_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (chown_r(t_mnt_scratch_fd, ".", 1000, 1000)) {
+               log_stderr("failure: chown_r");
+               goto out;
+       }
+
+       subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] = openat(t_mnt_scratch_fd,
+                                                          BTRFS_SUBVOLUME_SUBVOL2,
+                                                          O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID],
+                                  &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL1_ID])) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID],
+                                  &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL2_ID])) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+       subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] = openat(t_mnt_scratch_fd,
+                                                          BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3,
+                                                          O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID],
+                                  &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+       subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] = openat(t_mnt_scratch_fd,
+                                                          BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4,
+                                                          O_CLOEXEC | O_DIRECTORY);
+       if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID],
+                                  &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])) {
+               log_stderr("failure: btrfs_get_subvolume_id");
+               goto out;
+       }
+
+
+       if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
+               log_stderr("failure: fchmod");
+               goto out;
+       }
+
+       if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
+               log_stderr("failure: fchmod");
+               goto out;
+       }
+
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       if (attr.userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       open_tree_fd = sys_open_tree(mnt_fd, "",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       if (open_tree_fd < 0) {
+               log_stderr("failure: sys_open_tree");
+               goto out;
+       }
+
+       if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
+               log_stderr("failure: sys_mount_setattr");
+               goto out;
+       }
+
+       /*
+        * The open_tree() syscall returns an O_PATH file descriptor which we
+        * can't use with ioctl(). So let's reopen it as a proper file
+        * descriptor.
+        */
+       tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
+       if (tree_fd < 0) {
+               log_stderr("failure: openat");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               bool subvolume3_found = false, subvolume4_found = false;
+
+               if (!switch_fsids(11000, 11000))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: lower caps");
+
+               if (btrfs_iterator_start(tree_fd, 0, &iter))
+                       die("failure: btrfs_iterator_start");
+
+               for (;;) {
+                       char *subvol_path = NULL;
+                       int ret;
+
+                       ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
+                       if (ret == 1)
+                               break;
+                       else if (ret)
+                               die("failure: btrfs_iterator_next");
+
+                       if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
+                           subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
+                               die("failure: subvolume id %llu->%s",
+                                   (long long unsigned)subvolid, subvol_path);
+
+                       if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
+                               subvolume3_found = true;
+
+                       if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
+                               subvolume4_found = true;
+
+                       free(subvol_path);
+               }
+               btrfs_iterator_end(iter);
+
+               if (!subvolume3_found || !subvolume4_found)
+                       die("failure: subvolume id");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               bool subvolume3_found = false, subvolume4_found = false;
+
+               if (!switch_userns(attr.userns_fd, 0, 0, false))
+                       die("failure: switch_userns");
+
+               if (btrfs_iterator_start(tree_fd, 0, &iter))
+                       die("failure: btrfs_iterator_start");
+
+               for (;;) {
+                       char *subvol_path = NULL;
+                       int ret;
+
+                       ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
+                       if (ret == 1)
+                               break;
+                       else if (ret)
+                               die("failure: btrfs_iterator_next");
+
+                       if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
+                           subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
+                               die("failure: subvolume id %llu->%s",
+                                   (long long unsigned)subvolid, subvol_path);
+
+                       if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
+                               subvolume3_found = true;
+
+                       if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
+                               subvolume4_found = true;
+
+                       free(subvol_path);
+               }
+               btrfs_iterator_end(iter);
+
+               if (!subvolume3_found || !subvolume4_found)
+                       die("failure: subvolume id");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               bool subvolume_found = false;
+
+               if (!switch_fsids(0, 0))
+                       die("failure: switch fsids");
+
+               if (!caps_down())
+                       die("failure: lower caps");
+
+               if (btrfs_iterator_start(tree_fd, 0, &iter))
+                       die("failure: btrfs_iterator_start");
+
+               for (;;) {
+                       char *subvol_path = NULL;
+                       int ret;
+
+                       ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
+                       if (ret == 1)
+                               break;
+                       else if (ret)
+                               die("failure: btrfs_iterator_next");
+
+                       free(subvol_path);
+
+                       subvolume_found = true;
+                       break;
+               }
+               btrfs_iterator_end(iter);
+
+               if (subvolume_found)
+                       die("failure: subvolume id");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       userns_fd = get_userns_fd(0, 30000, 10000);
+       if (userns_fd < 0) {
+               log_stderr("failure: get_userns_fd");
+               goto out;
+       }
+
+       pid = fork();
+       if (pid < 0) {
+               log_stderr("failure: fork");
+               goto out;
+       }
+       if (pid == 0) {
+               bool subvolume_found = false;
+
+               if (!switch_userns(userns_fd, 0, 0, true))
+                       die("failure: switch_userns");
+
+               if (btrfs_iterator_start(tree_fd, 0, &iter))
+                       die("failure: btrfs_iterator_start");
+
+               for (;;) {
+                       char *subvol_path = NULL;
+                       int ret;
+
+                       ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
+                       if (ret == 1)
+                               break;
+                       else if (ret)
+                               die("failure: btrfs_iterator_next");
+
+                       free(subvol_path);
+
+                       subvolume_found = true;
+                       break;
+               }
+               btrfs_iterator_end(iter);
+
+               if (subvolume_found)
+                       die("failure: subvolume id");
+
+               exit(EXIT_SUCCESS);
+       }
+       if (wait_for_pid(pid))
+               goto out;
+
+       fret = 0;
+       log_debug("Ran test");
+out:
+       safe_close(dir1_fd);
+       safe_close(dir2_fd);
+       safe_close(open_tree_fd);
+       safe_close(tree_fd);
+       safe_close(userns_fd);
+       for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
+               safe_close(subvolume_fds[i]);
+       snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
+       sys_umount2(t_buf, MNT_DETACH);
+       unlinkat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, AT_REMOVEDIR);
+
+       return fret;
+}
+
+static void usage(void)
+{
+       fprintf(stderr, "Description:\n");
+       fprintf(stderr, "    Run idmapped mount tests\n\n");
+
+       fprintf(stderr, "Arguments:\n");
+       fprintf(stderr, "--device                     Device used in the tests\n");
+       fprintf(stderr, "--fstype                     Filesystem type used in the tests\n");
+       fprintf(stderr, "--help                       Print help\n");
+       fprintf(stderr, "--mountpoint                 Mountpoint of device\n");
+       fprintf(stderr, "--supported                  Test whether idmapped mounts are supported on this filesystem\n");
+       fprintf(stderr, "--scratch-mountpoint         Mountpoint of scratch device used in the tests\n");
+       fprintf(stderr, "--scratch-device             Scratch device used in the tests\n");
+       fprintf(stderr, "--test-core                  Run core idmapped mount testsuite\n");
+       fprintf(stderr, "--test-fscaps-regression     Run fscap regression tests\n");
+       fprintf(stderr, "--test-nested-userns         Run nested userns idmapped mount testsuite\n");
+       fprintf(stderr, "--test-btrfs                 Run btrfs specific idmapped mount testsuite\n");
+
+       _exit(EXIT_SUCCESS);
+}
+
+static const struct option longopts[] = {
+       {"device",                      required_argument,      0,      'd'},
+       {"fstype",                      required_argument,      0,      'f'},
+       {"mountpoint",                  required_argument,      0,      'm'},
+       {"scratch-mountpoint",          required_argument,      0,      'a'},
+       {"scratch-device",              required_argument,      0,      'e'},
+       {"supported",                   no_argument,            0,      's'},
+       {"help",                        no_argument,            0,      'h'},
+       {"test-core",                   no_argument,            0,      'c'},
+       {"test-fscaps-regression",      no_argument,            0,      'g'},
+       {"test-nested-userns",          no_argument,            0,      'n'},
+       {"test-btrfs",                  no_argument,            0,      'b'},
+       {NULL,                          0,                      0,        0},
+};
+
+struct t_idmapped_mounts {
+       int (*test)(void);
+       const char *description;
+} basic_suite[] = {
+       { acls,                                                         "posix acls on regular mounts",                                                                 },
+       { create_in_userns,                                             "create operations in user namespace",                                                          },
+       { device_node_in_userns,                                        "device node in user namespace",                                                                },
+       { expected_uid_gid_idmapped_mounts,                             "expected ownership on idmapped mounts",                                                        },
+       { fscaps,                                                       "fscaps on regular mounts",                                                                     },
+       { fscaps_idmapped_mounts,                                       "fscaps on idmapped mounts",                                                                    },
+       { fscaps_idmapped_mounts_in_userns,                             "fscaps on idmapped mounts in user namespace",                                                  },
+       { fscaps_idmapped_mounts_in_userns_separate_userns,             "fscaps on idmapped mounts in user namespace with different id mappings",                       },
+       { fsids_mapped,                                                 "mapped fsids",                                                                                 },
+       { fsids_unmapped,                                               "unmapped fsids",                                                                               },
+       { hardlink_crossing_mounts,                                     "cross mount hardlink",                                                                         },
+       { hardlink_crossing_idmapped_mounts,                            "cross idmapped mount hardlink",                                                                },
+       { hardlink_from_idmapped_mount,                                 "hardlinks from idmapped mounts",                                                               },
+       { hardlink_from_idmapped_mount_in_userns,                       "hardlinks from idmapped mounts in user namespace",                                             },
+#ifdef HAVE_LIBURING_H
+       { io_uring,                                                     "io_uring",                                                                                     },
+       { io_uring_userns,                                              "io_uring in user namespace",                                                                   },
+       { io_uring_idmapped,                                            "io_uring from idmapped mounts",                                                                },
+       { io_uring_idmapped_userns,                                     "io_uring from idmapped mounts in user namespace",                                              },
+       { io_uring_idmapped_unmapped,                                   "io_uring from idmapped mounts with unmapped ids",                                              },
+       { io_uring_idmapped_unmapped_userns,                            "io_uring from idmapped mounts with unmapped ids in user namespace",                            },
+#endif
+       { protected_symlinks,                                           "following protected symlinks on regular mounts",                                               },
+       { protected_symlinks_idmapped_mounts,                           "following protected symlinks on idmapped mounts",                                              },
+       { protected_symlinks_idmapped_mounts_in_userns,                 "following protected symlinks on idmapped mounts in user namespace",                            },
+       { rename_crossing_mounts,                                       "cross mount rename",                                                                           },
+       { rename_crossing_idmapped_mounts,                              "cross idmapped mount rename",                                                                  },
+       { rename_from_idmapped_mount,                                   "rename from idmapped mounts",                                                                  },
+       { rename_from_idmapped_mount_in_userns,                         "rename from idmapped mounts in user namespace",                                                },
+       { setattr_truncate,                                             "setattr truncate",                                                                             },
+       { setattr_truncate_idmapped,                                    "setattr truncate on idmapped mounts",                                                          },
+       { setattr_truncate_idmapped_in_userns,                          "setattr truncate on idmapped mounts in user namespace",                                        },
+       { setgid_create,                                                "create operations in directories with setgid bit set",                                         },
+       { setgid_create_idmapped,                                       "create operations in directories with setgid bit set on idmapped mounts",                      },
+       { setgid_create_idmapped_in_userns,                             "create operations in directories with setgid bit set on idmapped mounts in user namespace",    },
+       { setid_binaries,                                               "setid binaries on regular mounts",                                                             },
+       { setid_binaries_idmapped_mounts,                               "setid binaries on idmapped mounts",                                                            },
+       { setid_binaries_idmapped_mounts_in_userns,                     "setid binaries on idmapped mounts in user namespace",                                          },
+       { setid_binaries_idmapped_mounts_in_userns_separate_userns,     "setid binaries on idmapped mounts in user namespace with different id mappings",               },
+       { sticky_bit_unlink,                                            "sticky bit unlink operations on regular mounts",                                               },
+       { sticky_bit_unlink_idmapped_mounts,                            "sticky bit unlink operations on idmapped mounts",                                              },
+       { sticky_bit_unlink_idmapped_mounts_in_userns,                  "sticky bit unlink operations on idmapped mounts in user namespace",                            },
+       { sticky_bit_rename,                                            "sticky bit rename operations on regular mounts",                                               },
+       { sticky_bit_rename_idmapped_mounts,                            "sticky bit rename operations on idmapped mounts",                                              },
+       { sticky_bit_rename_idmapped_mounts_in_userns,                  "sticky bit rename operations on idmapped mounts in user namespace",                            },
+       { symlink_regular_mounts,                                       "symlink from regular mounts",                                                                  },
+       { symlink_idmapped_mounts,                                      "symlink from idmapped mounts",                                                                 },
+       { symlink_idmapped_mounts_in_userns,                            "symlink from idmapped mounts in user namespace",                                               },
+       { threaded_idmapped_mount_interactions,                         "threaded operations on idmapped mounts",                                                       },
+};
+
+struct t_idmapped_mounts fscaps_in_ancestor_userns[] = {
+       { fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns,    "fscaps on idmapped mounts in user namespace writing fscap valid in ancestor userns",           },
+};
+
+struct t_idmapped_mounts t_nested_userns[] = {
+       { nested_userns,                                                "test that nested user namespaces behave correctly when attached to idmapped mounts",           },
+};
+
+struct t_idmapped_mounts t_btrfs[] = {
+       { btrfs_subvolumes_fsids_mapped,                                "test subvolumes with mapped fsids",                                                            },
+       { btrfs_subvolumes_fsids_mapped_userns,                         "test subvolumes with mapped fsids inside user namespace",                                      },
+       { btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed,         "test subvolume deletion with user_subvol_rm_allowed mount option",                             },
+       { btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed,  "test subvolume deletion with user_subvol_rm_allowed mount option inside user namespace",       },
+       { btrfs_subvolumes_fsids_unmapped,                              "test subvolumes with unmapped fsids",                                                          },
+       { btrfs_subvolumes_fsids_unmapped_userns,                       "test subvolumes with unmapped fsids inside user namespace",                                    },
+       { btrfs_snapshots_fsids_mapped,                                 "test snapshots with mapped fsids",                                                             },
+       { btrfs_snapshots_fsids_mapped_userns,                          "test snapshots with mapped fsids inside user namespace",                                       },
+       { btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed,          "test snapshots deletion with user_subvol_rm_allowed mount option",                             },
+       { btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed,   "test snapshots deletion with user_subvol_rm_allowed mount option inside user namespace",       },
+       { btrfs_snapshots_fsids_unmapped,                               "test snapshots with unmapped fsids",                                                           },
+       { btrfs_snapshots_fsids_unmapped_userns,                        "test snapshots with unmapped fsids inside user namespace",                                     },
+       { btrfs_delete_by_spec_id,                                      "test subvolume deletion by spec id",                                                           },
+       { btrfs_subvolumes_setflags_fsids_mapped,                       "test subvolume flags with mapped fsids",                                                       },
+       { btrfs_subvolumes_setflags_fsids_mapped_userns,                "test subvolume flags with mapped fsids inside user namespace",                                 },
+       { btrfs_subvolumes_setflags_fsids_unmapped,                     "test subvolume flags with unmapped fsids",                                                     },
+       { btrfs_subvolumes_setflags_fsids_unmapped_userns,              "test subvolume flags with unmapped fsids inside user namespace",                               },
+       { btrfs_snapshots_setflags_fsids_mapped,                        "test snapshots flags with mapped fsids",                                                       },
+       { btrfs_snapshots_setflags_fsids_mapped_userns,                 "test snapshots flags with mapped fsids inside user namespace",                                 },
+       { btrfs_snapshots_setflags_fsids_unmapped,                      "test snapshots flags with unmapped fsids",                                                     },
+       { btrfs_snapshots_setflags_fsids_unmapped_userns,               "test snapshots flags with unmapped fsids inside user namespace",                               },
+       { btrfs_subvolume_lookup_user,                                  "test unprivileged subvolume lookup",                                                           },
+};
+
+static bool run_test(struct t_idmapped_mounts suite[], size_t suite_size)
+{
+       int i;
+
+       for (i = 0; i < suite_size; i++) {
+               struct t_idmapped_mounts *t = &suite[i];
+               int ret;
+               pid_t pid;
+
+               test_setup();
+
+               pid = fork();
+               if (pid < 0)
+                       return false;
+
+               if (pid == 0) {
+                       ret = t->test();
+                       if (ret)
+                               die("failure: %s", t->description);
+
+                       exit(EXIT_SUCCESS);
+               }
+
+               ret = wait_for_pid(pid);
+               test_cleanup();
+
+               if (ret)
+                       return false;
+       }
+
+       return true;
+}
+
+int main(int argc, char *argv[])
+{
+       int fret, ret;
        int index = 0;
-       bool supported = false;
+       bool supported = false, test_btrfs = false, test_core = false,
+            test_fscaps_regression = false, test_nested_userns = false;
 
-       while ((ret = getopt_long(argc, argv, "", longopts, &index)) != -1) {
+       while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
                switch (ret) {
                case 'd':
                        t_device = optarg;
@@ -8842,6 +13503,24 @@ int main(int argc, char *argv[])
                case 's':
                        supported = true;
                        break;
+               case 'c':
+                       test_core = true;
+                       break;
+               case 'g':
+                       test_fscaps_regression = true;
+                       break;
+               case 'n':
+                       test_nested_userns = true;
+                       break;
+               case 'b':
+                       test_btrfs = true;
+                       break;
+               case 'a':
+                       t_mountpoint_scratch = optarg;
+                       break;
+               case 'e':
+                       t_device_scratch = optarg;
+                       break;
                case 'h':
                        /* fallthrough */
                default:
@@ -8870,6 +13549,10 @@ int main(int argc, char *argv[])
        if (t_mnt_fd < 0)
                die("failed to open %s", t_mountpoint);
 
+       t_mnt_scratch_fd = openat(-EBADF, t_mountpoint_scratch, O_CLOEXEC | O_DIRECTORY);
+       if (t_mnt_fd < 0)
+               die("failed to open %s", t_mountpoint_scratch);
+
        /*
         * Caller just wants to know whether the filesystem we're on supports
         * idmapped mounts.
@@ -8911,7 +13594,19 @@ int main(int argc, char *argv[])
 
        fret = EXIT_FAILURE;
 
-       if (!run_test(basic_suite, ARRAY_SIZE(basic_suite)))
+       if (test_core && !run_test(basic_suite, ARRAY_SIZE(basic_suite)))
+               goto out;
+
+       if (test_fscaps_regression &&
+           !run_test(fscaps_in_ancestor_userns,
+                     ARRAY_SIZE(fscaps_in_ancestor_userns)))
+               goto out;
+
+       if (test_nested_userns &&
+           !run_test(t_nested_userns, ARRAY_SIZE(t_nested_userns)))
+               goto out;
+
+       if (test_btrfs && !run_test(t_btrfs, ARRAY_SIZE(t_btrfs)))
                goto out;
 
        fret = EXIT_SUCCESS;
index 219104e..d8490be 100644 (file)
 #include "missing.h"
 #include "utils.h"
 
-/* A few helpful macros. */
-#define STRLITERALLEN(x) (sizeof(""x"") - 1)
-
-#define INTTYPE_TO_STRLEN(type)             \
-       (2 + (sizeof(type) <= 1             \
-                 ? 3                       \
-                 : sizeof(type) <= 2       \
-                       ? 5                 \
-                       : sizeof(type) <= 4 \
-                             ? 10          \
-                             : sizeof(type) <= 8 ? 20 : sizeof(int[-2 * (sizeof(type) > 8)])))
-
-#define syserror(format, ...)                           \
-       ({                                              \
-               fprintf(stderr, format, ##__VA_ARGS__); \
-               (-errno);                               \
-       })
-
-#define syserror_set(__ret__, format, ...)                    \
-       ({                                                    \
-               typeof(__ret__) __internal_ret__ = (__ret__); \
-               errno = labs(__ret__);                        \
-               fprintf(stderr, format, ##__VA_ARGS__);       \
-               __internal_ret__;                             \
-       })
-
-struct list {
-       void *elem;
-       struct list *next;
-       struct list *prev;
-};
-
-#define list_for_each(__iterator, __list) \
-       for (__iterator = (__list)->next; __iterator != __list; __iterator = __iterator->next)
-
-static inline void list_init(struct list *list)
-{
-       list->elem = NULL;
-       list->next = list->prev = list;
-}
-
-static inline int list_empty(const struct list *list)
-{
-       return list == list->next;
-}
-
-static inline void __list_add(struct list *new, struct list *prev, struct list *next)
-{
-       next->prev = new;
-       new->next = next;
-       new->prev = prev;
-       prev->next = new;
-}
-
-static inline void list_add_tail(struct list *head, struct list *list)
-{
-       __list_add(list, head->prev, head);
-}
-
-typedef enum idmap_type_t {
-       ID_TYPE_UID,
-       ID_TYPE_GID
-} idmap_type_t;
-
-struct id_map {
-       idmap_type_t map_type;
-       __u32 nsid;
-       __u32 hostid;
-       __u32 range;
-};
-
 static struct list active_map;
 
-static int add_map_entry(__u32 id_host,
-                        __u32 id_ns,
-                        __u32 range,
-                        idmap_type_t map_type)
-{
-       struct list *new_list = NULL;
-       struct id_map *newmap = NULL;
-
-       newmap = malloc(sizeof(*newmap));
-       if (!newmap)
-               return -ENOMEM;
-
-       new_list = malloc(sizeof(struct list));
-       if (!new_list) {
-               free(newmap);
-               return -ENOMEM;
-       }
-
-       *newmap = (struct id_map){
-               .hostid         = id_host,
-               .nsid           = id_ns,
-               .range          = range,
-               .map_type       = map_type,
-       };
-
-       new_list->elem = newmap;
-       list_add_tail(&active_map, new_list);
-       return 0;
-}
-
 static int parse_map(char *map)
 {
        char types[2] = {'u', 'g'};
@@ -158,7 +57,7 @@ static int parse_map(char *map)
                else
                        map_type = ID_TYPE_GID;
 
-               ret = add_map_entry(id_host, id_ns, range, map_type);
+               ret = add_map_entry(&active_map, id_host, id_ns, range, map_type);
                if (ret < 0)
                        return ret;
        }
@@ -166,132 +65,6 @@ static int parse_map(char *map)
        return 0;
 }
 
-static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
-{
-       int fd = -EBADF, setgroups_fd = -EBADF;
-       int fret = -1;
-       int ret;
-       char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
-                 STRLITERALLEN("/setgroups") + 1];
-
-       if (geteuid() != 0 && map_type == ID_TYPE_GID) {
-               ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
-               if (ret < 0 || ret >= sizeof(path))
-                       goto out;
-
-               setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
-               if (setgroups_fd < 0 && errno != ENOENT) {
-                       syserror("Failed to open \"%s\"", path);
-                       goto out;
-               }
-
-               if (setgroups_fd >= 0) {
-                       ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
-                       if (ret != STRLITERALLEN("deny\n")) {
-                               syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
-                               goto out;
-                       }
-               }
-       }
-
-       ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
-       if (ret < 0 || ret >= sizeof(path))
-               goto out;
-
-       fd = open(path, O_WRONLY | O_CLOEXEC);
-       if (fd < 0) {
-               syserror("Failed to open \"%s\"", path);
-               goto out;
-       }
-
-       ret = write_nointr(fd, buf, buf_size);
-       if (ret != buf_size) {
-               syserror("Failed to write %cid mapping to \"%s\"",
-                        map_type == ID_TYPE_UID ? 'u' : 'g', path);
-               goto out;
-       }
-
-       fret = 0;
-out:
-       if (fd >= 0)
-               close(fd);
-       if (setgroups_fd >= 0)
-               close(setgroups_fd);
-
-       return fret;
-}
-
-static int map_ids_from_idmap(struct list *idmap, pid_t pid)
-{
-       int fill, left;
-       char mapbuf[4096] = {};
-       bool had_entry = false;
-       idmap_type_t map_type, u_or_g;
-
-       for (map_type = ID_TYPE_UID, u_or_g = 'u';
-            map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
-               char *pos = mapbuf;
-               int ret;
-               struct list *iterator;
-
-
-               list_for_each(iterator, idmap) {
-                       struct id_map *map = iterator->elem;
-                       if (map->map_type != map_type)
-                               continue;
-
-                       had_entry = true;
-
-                       left = 4096 - (pos - mapbuf);
-                       fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
-                       /*
-                        * The kernel only takes <= 4k for writes to
-                        * /proc/<pid>/{g,u}id_map
-                        */
-                       if (fill <= 0 || fill >= left)
-                               return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
-
-                       pos += fill;
-               }
-               if (!had_entry)
-                       continue;
-
-               ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
-               if (ret < 0)
-                       return syserror("Failed to write mapping: %s", mapbuf);
-
-               memset(mapbuf, 0, sizeof(mapbuf));
-       }
-
-       return 0;
-}
-
-static int get_userns_fd_from_idmap(struct list *idmap)
-{
-       int ret;
-       pid_t pid;
-       char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
-                 STRLITERALLEN("/ns/user") + 1];
-
-       pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
-       if (pid < 0)
-               return -errno;
-
-       ret = map_ids_from_idmap(idmap, pid);
-       if (ret < 0)
-               return ret;
-
-       ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
-       if (ret < 0 || (size_t)ret >= sizeof(path_ns))
-               ret = -EIO;
-       else
-               ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
-
-       (void)kill(pid, SIGKILL);
-       (void)wait_for_pid(pid);
-       return ret;
-}
-
 static inline bool strnequal(const char *str, const char *eq, size_t len)
 {
        return strncmp(str, eq, len) == 0;
index 977443f..c2afa8d 100644 (file)
@@ -3,11 +3,15 @@
 #define _GNU_SOURCE
 #endif
 #include <fcntl.h>
+#include <grp.h>
 #include <linux/limits.h>
+#include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <sched.h>
+#include <sys/eventfd.h>
 #include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <sys/wait.h>
@@ -36,99 +40,347 @@ ssize_t write_nointr(int fd, const void *buf, size_t count)
        return ret;
 }
 
-static int write_file(const char *path, const void *buf, size_t count)
+#define __STACK_SIZE (8 * 1024 * 1024)
+pid_t do_clone(int (*fn)(void *), void *arg, int flags)
 {
-       int fd;
-       ssize_t ret;
+       void *stack;
 
-       fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
-       if (fd < 0)
-               return -1;
+       stack = malloc(__STACK_SIZE);
+       if (!stack)
+               return -ENOMEM;
 
-       ret = write_nointr(fd, buf, count);
-       close(fd);
-       if (ret < 0 || (size_t)ret != count)
-               return -1;
+#ifdef __ia64__
+       return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+       return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
 
+static int get_userns_fd_cb(void *data)
+{
        return 0;
 }
 
-static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
-                  unsigned long range)
+int wait_for_pid(pid_t pid)
 {
-       char map[100], procfile[256];
+       int status, ret;
 
-       snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
-       snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
-       if (write_file(procfile, map, strlen(map)))
-               return -1;
+again:
+       ret = waitpid(pid, &status, 0);
+       if (ret == -1) {
+               if (errno == EINTR)
+                       goto again;
 
+               return -1;
+       }
 
-       snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
-       snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
-       if (write_file(procfile, map, strlen(map)))
+       if (!WIFEXITED(status))
                return -1;
 
-       return 0;
+       return WEXITSTATUS(status);
 }
 
-#define __STACK_SIZE (8 * 1024 * 1024)
-pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
 {
-       void *stack;
+       int fd = -EBADF, setgroups_fd = -EBADF;
+       int fret = -1;
+       int ret;
+       char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
+                 STRLITERALLEN("/setgroups") + 1];
 
-       stack = malloc(__STACK_SIZE);
-       if (!stack)
-               return -ENOMEM;
+       if (geteuid() != 0 && map_type == ID_TYPE_GID) {
+               ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
+               if (ret < 0 || ret >= sizeof(path))
+                       goto out;
 
-#ifdef __ia64__
-       return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
-#else
-       return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
-#endif
+               setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
+               if (setgroups_fd < 0 && errno != ENOENT) {
+                       syserror("Failed to open \"%s\"", path);
+                       goto out;
+               }
+
+               if (setgroups_fd >= 0) {
+                       ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
+                       if (ret != STRLITERALLEN("deny\n")) {
+                               syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
+                               goto out;
+                       }
+               }
+       }
+
+       ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
+       if (ret < 0 || ret >= sizeof(path))
+               goto out;
+
+       fd = open(path, O_WRONLY | O_CLOEXEC);
+       if (fd < 0) {
+               syserror("Failed to open \"%s\"", path);
+               goto out;
+       }
+
+       ret = write_nointr(fd, buf, buf_size);
+       if (ret != buf_size) {
+               syserror("Failed to write %cid mapping to \"%s\"",
+                        map_type == ID_TYPE_UID ? 'u' : 'g', path);
+               goto out;
+       }
+
+       fret = 0;
+out:
+       if (fd >= 0)
+               close(fd);
+       if (setgroups_fd >= 0)
+               close(setgroups_fd);
+
+       return fret;
 }
 
-int get_userns_fd_cb(void *data)
+static int map_ids_from_idmap(struct list *idmap, pid_t pid)
 {
-       return kill(getpid(), SIGSTOP);
+       int fill, left;
+       char mapbuf[4096] = {};
+       bool had_entry = false;
+       idmap_type_t map_type, u_or_g;
+
+       if (list_empty(idmap))
+               return 0;
+
+       for (map_type = ID_TYPE_UID, u_or_g = 'u';
+            map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
+               char *pos = mapbuf;
+               int ret;
+               struct list *iterator;
+
+
+               list_for_each(iterator, idmap) {
+                       struct id_map *map = iterator->elem;
+                       if (map->map_type != map_type)
+                               continue;
+
+                       had_entry = true;
+
+                       left = 4096 - (pos - mapbuf);
+                       fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
+                       /*
+                        * The kernel only takes <= 4k for writes to
+