idmapped-mounts: remove redundant fchownat() call in setgid tests
[xfstests-dev.git] / src / idmapped-mounts / idmapped-mounts.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5
6 #include "../global.h"
7
8 #include <dirent.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <grp.h>
13 #include <limits.h>
14 #include <linux/limits.h>
15 #include <linux/types.h>
16 #include <pthread.h>
17 #include <pwd.h>
18 #include <sched.h>
19 #include <stdbool.h>
20 #include <sys/fsuid.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/xattr.h>
24 #include <unistd.h>
25
26 #ifdef HAVE_LINUX_BTRFS_H
27 # ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID
28 #  define btrfs_ioctl_vol_args_v2 override_btrfs_ioctl_vol_args_v2
29 # endif
30 #include <linux/btrfs.h>
31 # undef btrfs_ioctl_vol_args_v2
32 #endif
33
34 #ifdef HAVE_LINUX_BTRFS_TREE_H
35 #include <linux/btrfs_tree.h>
36 #endif
37
38 #ifdef HAVE_SYS_CAPABILITY_H
39 #include <sys/capability.h>
40 #endif
41
42 #ifdef HAVE_LIBURING_H
43 #include <liburing.h>
44 #endif
45
46 #include "missing.h"
47 #include "utils.h"
48
49 #define T_DIR1 "idmapped_mounts_1"
50 #define FILE1 "file1"
51 #define FILE1_RENAME "file1_rename"
52 #define FILE2 "file2"
53 #define FILE2_RENAME "file2_rename"
54 #define DIR1 "dir1"
55 #define DIR2 "dir2"
56 #define DIR3 "dir3"
57 #define DIR1_RENAME "dir1_rename"
58 #define HARDLINK1 "hardlink1"
59 #define SYMLINK1 "symlink1"
60 #define SYMLINK_USER1 "symlink_user1"
61 #define SYMLINK_USER2 "symlink_user2"
62 #define SYMLINK_USER3 "symlink_user3"
63 #define CHRDEV1 "chrdev1"
64
65 #define log_stderr(format, ...)                                                         \
66         fprintf(stderr, "%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, __func__, \
67                 ##__VA_ARGS__)
68
69 #ifdef DEBUG_TRACE
70 #define log_debug(format, ...)                                           \
71         fprintf(stderr, "%s: %d: %s - " format "\n", __FILE__, __LINE__, \
72                 __func__, ##__VA_ARGS__)
73 #else
74 #define log_debug(format, ...)
75 #endif
76
77 #define log_error_errno(__ret__, __errno__, format, ...)      \
78         ({                                                    \
79                 typeof(__ret__) __internal_ret__ = (__ret__); \
80                 errno = (__errno__);                          \
81                 log_stderr(format, ##__VA_ARGS__);            \
82                 __internal_ret__;                             \
83         })
84
85 #define log_errno(__ret__, format, ...) log_error_errno(__ret__, errno, format, ##__VA_ARGS__)
86
87 #define die_errno(__errno__, format, ...)          \
88         ({                                         \
89                 errno = (__errno__);               \
90                 log_stderr(format, ##__VA_ARGS__); \
91                 exit(EXIT_FAILURE);                \
92         })
93
94 #define die(format, ...) die_errno(errno, format, ##__VA_ARGS__)
95
96 #define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
97
98 uid_t t_overflowuid = 65534;
99 gid_t t_overflowgid = 65534;
100
101 /* path of the test device */
102 const char *t_fstype;
103
104 /* path of the test device */
105 const char *t_device;
106
107 /* path of the test scratch device */
108 const char *t_device_scratch;
109
110 /* mountpoint of the test device */
111 const char *t_mountpoint;
112
113 /* mountpoint of the test device */
114 const char *t_mountpoint_scratch;
115
116 /* fd for @t_mountpoint */
117 int t_mnt_fd;
118
119 /* fd for @t_mountpoint_scratch */
120 int t_mnt_scratch_fd;
121
122 /* fd for @T_DIR1 */
123 int t_dir1_fd;
124
125 /* temporary buffer */
126 char t_buf[PATH_MAX];
127
128 static void stash_overflowuid(void)
129 {
130         int fd;
131         ssize_t ret;
132         char buf[256];
133
134         fd = open("/proc/sys/fs/overflowuid", O_RDONLY | O_CLOEXEC);
135         if (fd < 0)
136                 return;
137
138         ret = read(fd, buf, sizeof(buf));
139         close(fd);
140         if (ret < 0)
141                 return;
142
143         t_overflowuid = atoi(buf);
144 }
145
146 static void stash_overflowgid(void)
147 {
148         int fd;
149         ssize_t ret;
150         char buf[256];
151
152         fd = open("/proc/sys/fs/overflowgid", O_RDONLY | O_CLOEXEC);
153         if (fd < 0)
154                 return;
155
156         ret = read(fd, buf, sizeof(buf));
157         close(fd);
158         if (ret < 0)
159                 return;
160
161         t_overflowgid = atoi(buf);
162 }
163
164 static bool is_xfs(void)
165 {
166         static int enabled = -1;
167
168         if (enabled == -1)
169                 enabled = !strcmp(t_fstype, "xfs");
170
171         return enabled;
172 }
173
174 static bool protected_symlinks_enabled(void)
175 {
176         static int enabled = -1;
177
178         if (enabled == -1) {
179                 int fd;
180                 ssize_t ret;
181                 char buf[256];
182
183                 enabled = 0;
184
185                 fd = open("/proc/sys/fs/protected_symlinks", O_RDONLY | O_CLOEXEC);
186                 if (fd < 0)
187                         return false;
188
189                 ret = read(fd, buf, sizeof(buf));
190                 close(fd);
191                 if (ret < 0)
192                         return false;
193
194                 if (atoi(buf) >= 1)
195                         enabled = 1;
196         }
197
198         return enabled == 1;
199 }
200
201 static bool xfs_irix_sgid_inherit_enabled(void)
202 {
203         static int enabled = -1;
204
205         if (enabled == -1) {
206                 int fd;
207                 ssize_t ret;
208                 char buf[256];
209
210                 enabled = 0;
211
212                 if (is_xfs()) {
213                         fd = open("/proc/sys/fs/xfs/irix_sgid_inherit", O_RDONLY | O_CLOEXEC);
214                         if (fd < 0)
215                                 return false;
216
217                         ret = read(fd, buf, sizeof(buf));
218                         close(fd);
219                         if (ret < 0)
220                                 return false;
221
222                         if (atoi(buf) >= 1)
223                                 enabled = 1;
224                 }
225         }
226
227         return enabled == 1;
228 }
229
230 static inline bool caps_supported(void)
231 {
232         bool ret = false;
233
234 #ifdef HAVE_SYS_CAPABILITY_H
235         ret = true;
236 #endif
237
238         return ret;
239 }
240
241 /* caps_down - lower all effective caps */
242 static int caps_down(void)
243 {
244         bool fret = false;
245 #ifdef HAVE_SYS_CAPABILITY_H
246         cap_t caps = NULL;
247         int ret = -1;
248
249         caps = cap_get_proc();
250         if (!caps)
251                 goto out;
252
253         ret = cap_clear_flag(caps, CAP_EFFECTIVE);
254         if (ret)
255                 goto out;
256
257         ret = cap_set_proc(caps);
258         if (ret)
259                 goto out;
260
261         fret = true;
262
263 out:
264         cap_free(caps);
265 #endif
266         return fret;
267 }
268
269 /* caps_up - raise all permitted caps */
270 static int caps_up(void)
271 {
272         bool fret = false;
273 #ifdef HAVE_SYS_CAPABILITY_H
274         cap_t caps = NULL;
275         cap_value_t cap;
276         int ret = -1;
277
278         caps = cap_get_proc();
279         if (!caps)
280                 goto out;
281
282         for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
283                 cap_flag_value_t flag;
284
285                 ret = cap_get_flag(caps, cap, CAP_PERMITTED, &flag);
286                 if (ret) {
287                         if (errno == EINVAL)
288                                 break;
289                         else
290                                 goto out;
291                 }
292
293                 ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, flag);
294                 if (ret)
295                         goto out;
296         }
297
298         ret = cap_set_proc(caps);
299         if (ret)
300                 goto out;
301
302         fret = true;
303 out:
304         cap_free(caps);
305 #endif
306         return fret;
307 }
308
309 /* __expected_uid_gid - check whether file is owned by the provided uid and gid */
310 static bool __expected_uid_gid(int dfd, const char *path, int flags,
311                                uid_t expected_uid, gid_t expected_gid, bool log)
312 {
313         int ret;
314         struct stat st;
315
316         ret = fstatat(dfd, path, &st, flags);
317         if (ret < 0)
318                 return log_errno(false, "failure: fstatat");
319
320         if (log && st.st_uid != expected_uid)
321                 log_stderr("failure: uid(%d) != expected_uid(%d)", st.st_uid, expected_uid);
322
323         if (log && st.st_gid != expected_gid)
324                 log_stderr("failure: gid(%d) != expected_gid(%d)", st.st_gid, expected_gid);
325
326         errno = 0; /* Don't report misleading errno. */
327         return st.st_uid == expected_uid && st.st_gid == expected_gid;
328 }
329
330 static bool expected_uid_gid(int dfd, const char *path, int flags,
331                              uid_t expected_uid, gid_t expected_gid)
332 {
333         return __expected_uid_gid(dfd, path, flags,
334                                   expected_uid, expected_gid, true);
335 }
336
337 static bool expected_file_size(int dfd, const char *path,
338                                int flags, off_t expected_size)
339 {
340         int ret;
341         struct stat st;
342
343         ret = fstatat(dfd, path, &st, flags);
344         if (ret < 0)
345                 return log_errno(false, "failure: fstatat");
346
347         if (st.st_size != expected_size)
348                 return log_errno(false, "failure: st_size(%zu) != expected_size(%zu)",
349                                  (size_t)st.st_size, (size_t)expected_size);
350
351         return true;
352 }
353
354 /* is_setid - check whether file is S_ISUID and S_ISGID */
355 static bool is_setid(int dfd, const char *path, int flags)
356 {
357         int ret;
358         struct stat st;
359
360         ret = fstatat(dfd, path, &st, flags);
361         if (ret < 0)
362                 return false;
363
364         errno = 0; /* Don't report misleading errno. */
365         return (st.st_mode & S_ISUID) || (st.st_mode & S_ISGID);
366 }
367
368 /* is_setgid - check whether file or directory is S_ISGID */
369 static bool is_setgid(int dfd, const char *path, int flags)
370 {
371         int ret;
372         struct stat st;
373
374         ret = fstatat(dfd, path, &st, flags);
375         if (ret < 0)
376                 return false;
377
378         errno = 0; /* Don't report misleading errno. */
379         return (st.st_mode & S_ISGID);
380 }
381
382 /* is_sticky - check whether file is S_ISVTX */
383 static bool is_sticky(int dfd, const char *path, int flags)
384 {
385         int ret;
386         struct stat st;
387
388         ret = fstatat(dfd, path, &st, flags);
389         if (ret < 0)
390                 return false;
391
392         errno = 0; /* Don't report misleading errno. */
393         return (st.st_mode & S_ISVTX) > 0;
394 }
395
396 static inline bool switch_fsids(uid_t fsuid, gid_t fsgid)
397 {
398         if (setfsgid(fsgid))
399                 return log_errno(false, "failure: setfsgid");
400
401         if (setfsgid(-1) != fsgid)
402                 return log_errno(false, "failure: setfsgid(-1)");
403
404         if (setfsuid(fsuid))
405                 return log_errno(false, "failure: setfsuid");
406
407         if (setfsuid(-1) != fsuid)
408                 return log_errno(false, "failure: setfsuid(-1)");
409
410         return true;
411 }
412
413 static inline bool switch_resids(uid_t uid, gid_t gid)
414 {
415         if (setresgid(gid, gid, gid))
416                 return log_errno(false, "failure: setregid");
417
418         if (setresuid(uid, uid, uid))
419                 return log_errno(false, "failure: setresuid");
420
421         if (setfsgid(-1) != gid)
422                 return log_errno(false, "failure: setfsgid(-1)");
423
424         if (setfsuid(-1) != uid)
425                 return log_errno(false, "failure: setfsuid(-1)");
426
427         return true;
428 }
429
430 static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
431 {
432         if (setns(fd, CLONE_NEWUSER))
433                 return log_errno(false, "failure: setns");
434
435         if (!switch_ids(uid, gid))
436                 return log_errno(false, "failure: switch_ids");
437
438         if (drop_caps && !caps_down())
439                 return log_errno(false, "failure: caps_down");
440
441         return true;
442 }
443
444 /* rm_r - recursively remove all files */
445 static int rm_r(int fd, const char *path)
446 {
447         int dfd, ret;
448         DIR *dir;
449         struct dirent *direntp;
450
451         if (!path || strcmp(path, "") == 0)
452                 return -1;
453
454         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
455         if (dfd < 0)
456                 return -1;
457
458         dir = fdopendir(dfd);
459         if (!dir) {
460                 close(dfd);
461                 return -1;
462         }
463
464         while ((direntp = readdir(dir))) {
465                 struct stat st;
466
467                 if (!strcmp(direntp->d_name, ".") ||
468                     !strcmp(direntp->d_name, ".."))
469                         continue;
470
471                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
472                 if (ret < 0 && errno != ENOENT)
473                         break;
474
475                 if (S_ISDIR(st.st_mode))
476                         ret = rm_r(dfd, direntp->d_name);
477                 else
478                         ret = unlinkat(dfd, direntp->d_name, 0);
479                 if (ret < 0 && errno != ENOENT)
480                         break;
481         }
482
483         ret = unlinkat(fd, path, AT_REMOVEDIR);
484         closedir(dir);
485         return ret;
486 }
487
488 /* chown_r - recursively change ownership of all files */
489 static int chown_r(int fd, const char *path, uid_t uid, gid_t gid)
490 {
491         int dfd, ret;
492         DIR *dir;
493         struct dirent *direntp;
494
495         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
496         if (dfd < 0)
497                 return -1;
498
499         dir = fdopendir(dfd);
500         if (!dir) {
501                 close(dfd);
502                 return -1;
503         }
504
505         while ((direntp = readdir(dir))) {
506                 struct stat st;
507
508                 if (!strcmp(direntp->d_name, ".") ||
509                     !strcmp(direntp->d_name, ".."))
510                         continue;
511
512                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
513                 if (ret < 0 && errno != ENOENT)
514                         break;
515
516                 if (S_ISDIR(st.st_mode))
517                         ret = chown_r(dfd, direntp->d_name, uid, gid);
518                 else
519                         ret = fchownat(dfd, direntp->d_name, uid, gid, AT_SYMLINK_NOFOLLOW);
520                 if (ret < 0 && errno != ENOENT)
521                         break;
522         }
523
524         ret = fchownat(fd, path, uid, gid, AT_SYMLINK_NOFOLLOW);
525         closedir(dir);
526         return ret;
527 }
528
529 /*
530  * There'll be scenarios where you'll want to see the attributes associated with
531  * a directory tree during debugging or just to make sure things look correct.
532  * Simply uncomment and place the print_r() helper where you need it.
533  */
534 #ifdef DEBUG_TRACE
535 static int fd_cloexec(int fd, bool cloexec)
536 {
537         int oflags, nflags;
538
539         oflags = fcntl(fd, F_GETFD, 0);
540         if (oflags < 0)
541                 return -errno;
542
543         if (cloexec)
544                 nflags = oflags | FD_CLOEXEC;
545         else
546                 nflags = oflags & ~FD_CLOEXEC;
547
548         if (nflags == oflags)
549                 return 0;
550
551         if (fcntl(fd, F_SETFD, nflags) < 0)
552                 return -errno;
553
554         return 0;
555 }
556
557 static inline int dup_cloexec(int fd)
558 {
559         int fd_dup;
560
561         fd_dup = dup(fd);
562         if (fd_dup < 0)
563                 return -errno;
564
565         if (fd_cloexec(fd_dup, true)) {
566                 close(fd_dup);
567                 return -errno;
568         }
569
570         return fd_dup;
571 }
572
573 __attribute__((unused)) static int print_r(int fd, const char *path)
574 {
575         int ret = 0;
576         int dfd, dfd_dup;
577         DIR *dir;
578         struct dirent *direntp;
579         struct stat st;
580
581         if (!path || *path == '\0') {
582                 char buf[sizeof("/proc/self/fd/") + 30];
583
584                 ret = snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd);
585                 if (ret < 0 || (size_t)ret >= sizeof(buf))
586                         return -1;
587
588                 /*
589                  * O_PATH file descriptors can't be used so we need to re-open
590                  * just in case.
591                  */
592                 dfd = openat(-EBADF, buf, O_CLOEXEC | O_DIRECTORY, 0);
593         } else {
594                 dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY, 0);
595         }
596         if (dfd < 0)
597                 return -1;
598
599         /*
600          * When fdopendir() below succeeds it assumes ownership of the fd so we
601          * to make sure we always have an fd that fdopendir() can own which is
602          * why we dup() in the case where the caller wants us to operate on the
603          * fd directly.
604          */
605         dfd_dup = dup_cloexec(dfd);
606         if (dfd_dup < 0) {
607                 close(dfd);
608                 return -1;
609         }
610
611         dir = fdopendir(dfd);
612         if (!dir) {
613                 close(dfd);
614                 close(dfd_dup);
615                 return -1;
616         }
617         /* Transfer ownership to fdopendir(). */
618         dfd = -EBADF;
619
620         while ((direntp = readdir(dir))) {
621                 if (!strcmp(direntp->d_name, ".") ||
622                     !strcmp(direntp->d_name, ".."))
623                         continue;
624
625                 ret = fstatat(dfd_dup, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
626                 if (ret < 0 && errno != ENOENT)
627                         break;
628
629                 ret = 0;
630                 if (S_ISDIR(st.st_mode))
631                         ret = print_r(dfd_dup, direntp->d_name);
632                 else
633                         fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %d/%s\n",
634                                 (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
635                                 dfd_dup, direntp->d_name);
636                 if (ret < 0 && errno != ENOENT)
637                         break;
638         }
639
640         if (!path || *path == '\0')
641                 ret = fstatat(fd, "", &st,
642                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
643                               AT_EMPTY_PATH);
644         else
645                 ret = fstatat(fd, path, &st,
646                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW);
647         if (!ret)
648                 fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %s\n",
649                         (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
650                         (path && *path) ? path : "(null)");
651
652         close(dfd_dup);
653         closedir(dir);
654
655         return ret;
656 }
657 #else
658 __attribute__((unused)) static int print_r(int fd, const char *path)
659 {
660         return 0;
661 }
662 #endif
663
664 /* fd_to_fd - transfer data from one fd to another */
665 static int fd_to_fd(int from, int to)
666 {
667         for (;;) {
668                 uint8_t buf[PATH_MAX];
669                 uint8_t *p = buf;
670                 ssize_t bytes_to_write;
671                 ssize_t bytes_read;
672
673                 bytes_read = read_nointr(from, buf, sizeof buf);
674                 if (bytes_read < 0)
675                         return -1;
676                 if (bytes_read == 0)
677                         break;
678
679                 bytes_to_write = (size_t)bytes_read;
680                 do {
681                         ssize_t bytes_written;
682
683                         bytes_written = write_nointr(to, p, bytes_to_write);
684                         if (bytes_written < 0)
685                                 return -1;
686
687                         bytes_to_write -= bytes_written;
688                         p += bytes_written;
689                 } while (bytes_to_write > 0);
690         }
691
692         return 0;
693 }
694
695 static int sys_execveat(int fd, const char *path, char **argv, char **envp,
696                         int flags)
697 {
698 #ifdef __NR_execveat
699         return syscall(__NR_execveat, fd, path, argv, envp, flags);
700 #else
701         errno = ENOSYS;
702         return -1;
703 #endif
704 }
705
706 #ifndef CAP_NET_RAW
707 #define CAP_NET_RAW 13
708 #endif
709
710 #ifndef VFS_CAP_FLAGS_EFFECTIVE
711 #define VFS_CAP_FLAGS_EFFECTIVE 0x000001
712 #endif
713
714 #ifndef VFS_CAP_U32_3
715 #define VFS_CAP_U32_3 2
716 #endif
717
718 #ifndef VFS_CAP_U32
719 #define VFS_CAP_U32 VFS_CAP_U32_3
720 #endif
721
722 #ifndef VFS_CAP_REVISION_1
723 #define VFS_CAP_REVISION_1 0x01000000
724 #endif
725
726 #ifndef VFS_CAP_REVISION_2
727 #define VFS_CAP_REVISION_2 0x02000000
728 #endif
729
730 #ifndef VFS_CAP_REVISION_3
731 #define VFS_CAP_REVISION_3 0x03000000
732 struct vfs_ns_cap_data {
733         __le32 magic_etc;
734         struct {
735                 __le32 permitted;
736                 __le32 inheritable;
737         } data[VFS_CAP_U32];
738         __le32 rootid;
739 };
740 #endif
741
742 #if __BYTE_ORDER == __BIG_ENDIAN
743 #define cpu_to_le16(w16) le16_to_cpu(w16)
744 #define le16_to_cpu(w16) ((u_int16_t)((u_int16_t)(w16) >> 8) | (u_int16_t)((u_int16_t)(w16) << 8))
745 #define cpu_to_le32(w32) le32_to_cpu(w32)
746 #define le32_to_cpu(w32)                                                                       \
747         ((u_int32_t)((u_int32_t)(w32) >> 24) | (u_int32_t)(((u_int32_t)(w32) >> 8) & 0xFF00) | \
748          (u_int32_t)(((u_int32_t)(w32) << 8) & 0xFF0000) | (u_int32_t)((u_int32_t)(w32) << 24))
749 #elif __BYTE_ORDER == __LITTLE_ENDIAN
750 #define cpu_to_le16(w16) ((u_int16_t)(w16))
751 #define le16_to_cpu(w16) ((u_int16_t)(w16))
752 #define cpu_to_le32(w32) ((u_int32_t)(w32))
753 #define le32_to_cpu(w32) ((u_int32_t)(w32))
754 #else
755 #error Expected endianess macro to be set
756 #endif
757
758 /* expected_dummy_vfs_caps_uid - check vfs caps are stored with the provided uid */
759 static bool expected_dummy_vfs_caps_uid(int fd, uid_t expected_uid)
760 {
761 #define __cap_raised_permitted(x, ns_cap_data)                                 \
762         ((ns_cap_data.data[(x) >> 5].permitted) & (1 << ((x)&31)))
763         struct vfs_ns_cap_data ns_xattr = {};
764         ssize_t ret;
765
766         ret = fgetxattr(fd, "security.capability", &ns_xattr, sizeof(ns_xattr));
767         if (ret < 0 || ret == 0)
768                 return false;
769
770         if (ns_xattr.magic_etc & VFS_CAP_REVISION_3) {
771
772                 if (le32_to_cpu(ns_xattr.rootid) != expected_uid) {
773                         errno = EINVAL;
774                         log_stderr("failure: rootid(%d) != expected_rootid(%d)", le32_to_cpu(ns_xattr.rootid), expected_uid);
775                 }
776
777                 return (le32_to_cpu(ns_xattr.rootid) == expected_uid) &&
778                        (__cap_raised_permitted(CAP_NET_RAW, ns_xattr) > 0);
779         } else {
780                 log_stderr("failure: fscaps version");
781         }
782
783         return false;
784 }
785
786 /* set_dummy_vfs_caps - set dummy vfs caps for the provided uid */
787 static int set_dummy_vfs_caps(int fd, int flags, int rootuid)
788 {
789 #define __raise_cap_permitted(x, ns_cap_data)                                  \
790         ns_cap_data.data[(x) >> 5].permitted |= (1 << ((x)&31))
791
792         struct vfs_ns_cap_data ns_xattr;
793
794         memset(&ns_xattr, 0, sizeof(ns_xattr));
795         __raise_cap_permitted(CAP_NET_RAW, ns_xattr);
796         ns_xattr.magic_etc |= VFS_CAP_REVISION_3 | VFS_CAP_FLAGS_EFFECTIVE;
797         ns_xattr.rootid = cpu_to_le32(rootuid);
798
799         return fsetxattr(fd, "security.capability",
800                          &ns_xattr, sizeof(ns_xattr), flags);
801 }
802
803 #define safe_close(fd)      \
804         if (fd >= 0) {           \
805                 int _e_ = errno; \
806                 close(fd);       \
807                 errno = _e_;     \
808                 fd = -EBADF;     \
809         }
810
811 static void test_setup(void)
812 {
813         if (mkdirat(t_mnt_fd, T_DIR1, 0777))
814                 die("failure: mkdirat");
815
816         t_dir1_fd = openat(t_mnt_fd, T_DIR1, O_CLOEXEC | O_DIRECTORY);
817         if (t_dir1_fd < 0)
818                 die("failure: openat");
819
820         if (fchmod(t_dir1_fd, 0777))
821                 die("failure: fchmod");
822 }
823
824 static void test_cleanup(void)
825 {
826         safe_close(t_dir1_fd);
827         if (rm_r(t_mnt_fd, T_DIR1))
828                 die("failure: rm_r");
829 }
830
831 /* Validate that basic file operations on idmapped mounts. */
832 static int fsids_unmapped(void)
833 {
834         int fret = -1;
835         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
836         struct mount_attr attr = {
837                 .attr_set = MOUNT_ATTR_IDMAP,
838         };
839
840         /* create hardlink target */
841         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
842         if (hardlink_target_fd < 0) {
843                 log_stderr("failure: openat");
844                 goto out;
845         }
846
847         /* create directory for rename test */
848         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
849                 log_stderr("failure: mkdirat");
850                 goto out;
851         }
852
853         /* change ownership of all files to uid 0 */
854         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
855                 log_stderr("failure: chown_r");
856                 goto out;
857         }
858
859         /* Changing mount properties on a detached mount. */
860         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
861         if (attr.userns_fd < 0) {
862                 log_stderr("failure: get_userns_fd");
863                 goto out;
864         }
865
866         open_tree_fd = sys_open_tree(t_dir1_fd, "",
867                                      AT_EMPTY_PATH |
868                                      AT_NO_AUTOMOUNT |
869                                      AT_SYMLINK_NOFOLLOW |
870                                      OPEN_TREE_CLOEXEC |
871                                      OPEN_TREE_CLONE);
872         if (open_tree_fd < 0) {
873                 log_stderr("failure: sys_open_tree");
874                 goto out;
875         }
876
877         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
878                 log_stderr("failure: sys_mount_setattr");
879                 goto out;
880         }
881
882         if (!switch_fsids(0, 0)) {
883                 log_stderr("failure: switch_fsids");
884                 goto out;
885         }
886
887         /* The caller's fsids don't have a mappings in the idmapped mount so any
888          * file creation must fail.
889          */
890
891         /* create hardlink */
892         if (!linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
893                 log_stderr("failure: linkat");
894                 goto out;
895         }
896         if (errno != EOVERFLOW) {
897                 log_stderr("failure: errno");
898                 goto out;
899         }
900
901         /* try to rename a file */
902         if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
903                 log_stderr("failure: renameat");
904                 goto out;
905         }
906         if (errno != EOVERFLOW) {
907                 log_stderr("failure: errno");
908                 goto out;
909         }
910
911         /* try to rename a directory */
912         if (!renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME)) {
913                 log_stderr("failure: renameat");
914                 goto out;
915         }
916         if (errno != EOVERFLOW) {
917                 log_stderr("failure: errno");
918                 goto out;
919         }
920
921         /* The caller is privileged over the inode so file deletion must work. */
922
923         /* remove file */
924         if (unlinkat(open_tree_fd, FILE1, 0)) {
925                 log_stderr("failure: unlinkat");
926                 goto out;
927         }
928
929         /* remove directory */
930         if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) {
931                 log_stderr("failure: unlinkat");
932                 goto out;
933         }
934
935         /* The caller's fsids don't have a mappings in the idmapped mount so
936          * any file creation must fail.
937          */
938
939         /* create regular file via open() */
940         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
941         if (file1_fd >= 0) {
942                 log_stderr("failure: create");
943                 goto out;
944         }
945         if (errno != EOVERFLOW) {
946                 log_stderr("failure: errno");
947                 goto out;
948         }
949
950         /* create regular file via mknod */
951         if (!mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0)) {
952                 log_stderr("failure: mknodat");
953                 goto out;
954         }
955         if (errno != EOVERFLOW) {
956                 log_stderr("failure: errno");
957                 goto out;
958         }
959
960         /* create character device */
961         if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
962                 log_stderr("failure: mknodat");
963                 goto out;
964         }
965         if (errno != EOVERFLOW) {
966                 log_stderr("failure: errno");
967                 goto out;
968         }
969
970         /* create symlink */
971         if (!symlinkat(FILE2, open_tree_fd, SYMLINK1)) {
972                 log_stderr("failure: symlinkat");
973                 goto out;
974         }
975         if (errno != EOVERFLOW) {
976                 log_stderr("failure: errno");
977                 goto out;
978         }
979
980         /* create directory */
981         if (!mkdirat(open_tree_fd, DIR1, 0700)) {
982                 log_stderr("failure: mkdirat");
983                 goto out;
984         }
985         if (errno != EOVERFLOW) {
986                 log_stderr("failure: errno");
987                 goto out;
988         }
989
990         fret = 0;
991         log_debug("Ran test");
992 out:
993         safe_close(attr.userns_fd);
994         safe_close(hardlink_target_fd);
995         safe_close(file1_fd);
996         safe_close(open_tree_fd);
997
998         return fret;
999 }
1000
1001 static int fsids_mapped(void)
1002 {
1003         int fret = -1;
1004         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
1005         struct mount_attr attr = {
1006                 .attr_set = MOUNT_ATTR_IDMAP,
1007         };
1008         pid_t pid;
1009
1010         if (!caps_supported())
1011                 return 0;
1012
1013         /* create hardlink target */
1014         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1015         if (hardlink_target_fd < 0) {
1016                 log_stderr("failure: openat");
1017                 goto out;
1018         }
1019
1020         /* create directory for rename test */
1021         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
1022                 log_stderr("failure: mkdirat");
1023                 goto out;
1024         }
1025
1026         /* change ownership of all files to uid 0 */
1027         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1028                 log_stderr("failure: chown_r");
1029                 goto out;
1030         }
1031
1032         /* Changing mount properties on a detached mount. */
1033         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1034         if (attr.userns_fd < 0) {
1035                 log_stderr("failure: get_userns_fd");
1036                 goto out;
1037         }
1038
1039         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1040                                      AT_EMPTY_PATH |
1041                                      AT_NO_AUTOMOUNT |
1042                                      AT_SYMLINK_NOFOLLOW |
1043                                      OPEN_TREE_CLOEXEC |
1044                                      OPEN_TREE_CLONE);
1045         if (open_tree_fd < 0) {
1046                 log_stderr("failure: sys_open_tree");
1047                 goto out;
1048         }
1049
1050         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1051                 log_stderr("failure: sys_mount_setattr");
1052                 goto out;
1053         }
1054
1055         pid = fork();
1056         if (pid < 0) {
1057                 log_stderr("failure: fork");
1058                 goto out;
1059         }
1060         if (pid == 0) {
1061                 if (!switch_fsids(10000, 10000))
1062                         die("failure: switch fsids");
1063
1064                 if (!caps_up())
1065                         die("failure: raise caps");
1066
1067                 /* The caller's fsids now have mappings in the idmapped mount so
1068                  * any file creation must fail.
1069                  */
1070
1071                 /* create hardlink */
1072                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1073                         die("failure: create hardlink");
1074
1075                 /* try to rename a file */
1076                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1077                         die("failure: rename");
1078
1079                 /* try to rename a directory */
1080                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1081                         die("failure: rename");
1082
1083                 /* remove file */
1084                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1085                         die("failure: delete");
1086
1087                 /* remove directory */
1088                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1089                         die("failure: delete");
1090
1091                 /* The caller's fsids have mappings in the idmapped mount so any
1092                  * file creation must fail.
1093                  */
1094
1095                 /* create regular file via open() */
1096                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1097                 if (file1_fd < 0)
1098                         die("failure: create");
1099
1100                 /* create regular file via mknod */
1101                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1102                         die("failure: create");
1103
1104                 /* create character device */
1105                 if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
1106                         die("failure: create");
1107
1108                 /* create symlink */
1109                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1110                         die("failure: create");
1111
1112                 /* create directory */
1113                 if (mkdirat(open_tree_fd, DIR1, 0700))
1114                         die("failure: create");
1115
1116                 exit(EXIT_SUCCESS);
1117         }
1118         if (wait_for_pid(pid))
1119                 goto out;
1120
1121         fret = 0;
1122         log_debug("Ran test");
1123 out:
1124         safe_close(attr.userns_fd);
1125         safe_close(file1_fd);
1126         safe_close(hardlink_target_fd);
1127         safe_close(open_tree_fd);
1128
1129         return fret;
1130 }
1131
1132 /* Validate that basic file operations on idmapped mounts from a user namespace. */
1133 static int create_in_userns(void)
1134 {
1135         int fret = -1;
1136         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1137         struct mount_attr attr = {
1138                 .attr_set = MOUNT_ATTR_IDMAP,
1139         };
1140         pid_t pid;
1141
1142         /* change ownership of all files to uid 0 */
1143         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1144                 log_stderr("failure: chown_r");
1145                 goto out;
1146         }
1147
1148         /* Changing mount properties on a detached mount. */
1149         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1150         if (attr.userns_fd < 0) {
1151                 log_stderr("failure: get_userns_fd");
1152                 goto out;
1153         }
1154
1155         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1156                                      AT_EMPTY_PATH |
1157                                      AT_NO_AUTOMOUNT |
1158                                      AT_SYMLINK_NOFOLLOW |
1159                                      OPEN_TREE_CLOEXEC |
1160                                      OPEN_TREE_CLONE);
1161         if (open_tree_fd < 0) {
1162                 log_stderr("failure: sys_open_tree");
1163                 goto out;
1164         }
1165
1166         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1167                 log_stderr("failure: sys_mount_setattr");
1168                 goto out;
1169         }
1170
1171         pid = fork();
1172         if (pid < 0) {
1173                 log_stderr("failure: fork");
1174                 goto out;
1175         }
1176         if (pid == 0) {
1177                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1178                         die("failure: switch_userns");
1179
1180                 /* create regular file via open() */
1181                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1182                 if (file1_fd < 0)
1183                         die("failure: open file");
1184                 safe_close(file1_fd);
1185
1186                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1187                         die("failure: check ownership");
1188
1189                 /* create regular file via mknod */
1190                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1191                         die("failure: create");
1192
1193                 if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0))
1194                         die("failure: check ownership");
1195
1196                 /* create symlink */
1197                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1198                         die("failure: create");
1199
1200                 if (!expected_uid_gid(open_tree_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0))
1201                         die("failure: check ownership");
1202
1203                 /* create directory */
1204                 if (mkdirat(open_tree_fd, DIR1, 0700))
1205                         die("failure: create");
1206
1207                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
1208                         die("failure: check ownership");
1209
1210                 /* try to rename a file */
1211                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1212                         die("failure: create");
1213
1214                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1215                         die("failure: check ownership");
1216
1217                 /* try to rename a file */
1218                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1219                         die("failure: create");
1220
1221                 if (!expected_uid_gid(open_tree_fd, DIR1_RENAME, 0, 0, 0))
1222                         die("failure: check ownership");
1223
1224                 /* remove file */
1225                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1226                         die("failure: remove");
1227
1228                 /* remove directory */
1229                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1230                         die("failure: remove");
1231
1232                 exit(EXIT_SUCCESS);
1233         }
1234
1235         if (wait_for_pid(pid))
1236                 goto out;
1237
1238         fret = 0;
1239         log_debug("Ran test");
1240 out:
1241         safe_close(attr.userns_fd);
1242         safe_close(file1_fd);
1243         safe_close(open_tree_fd);
1244
1245         return fret;
1246 }
1247
1248 static int hardlink_crossing_mounts(void)
1249 {
1250         int fret = -1;
1251         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1252
1253         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1254                 log_stderr("failure: chown_r");
1255                 goto out;
1256         }
1257
1258         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1259                                      AT_EMPTY_PATH |
1260                                      AT_NO_AUTOMOUNT |
1261                                      AT_SYMLINK_NOFOLLOW |
1262                                      OPEN_TREE_CLOEXEC |
1263                                      OPEN_TREE_CLONE);
1264         if (open_tree_fd < 0) {
1265                 log_stderr("failure: sys_open_tree");
1266                 goto out;
1267         }
1268
1269         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1270         if (file1_fd < 0) {
1271                 log_stderr("failure: openat");
1272                 goto out;
1273         }
1274
1275         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1276                 log_stderr("failure: mkdirat");
1277                 goto out;
1278         }
1279
1280         /* We're crossing a mountpoint so this must fail.
1281          *
1282          * Note that this must also fail for non-idmapped mounts but here we're
1283          * interested in making sure we're not introducing an accidental way to
1284          * violate that restriction or that suddenly this becomes possible.
1285          */
1286         if (!linkat(open_tree_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
1287                 log_stderr("failure: linkat");
1288                 goto out;
1289         }
1290         if (errno != EXDEV) {
1291                 log_stderr("failure: errno");
1292                 goto out;
1293         }
1294
1295         fret = 0;
1296         log_debug("Ran test");
1297 out:
1298         safe_close(file1_fd);
1299         safe_close(open_tree_fd);
1300
1301         return fret;
1302 }
1303
1304 static int hardlink_crossing_idmapped_mounts(void)
1305 {
1306         int fret = -1;
1307         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1308         struct mount_attr attr = {
1309                 .attr_set = MOUNT_ATTR_IDMAP,
1310         };
1311
1312         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1313                 log_stderr("failure: chown_r");
1314                 goto out;
1315         }
1316
1317         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1318         if (attr.userns_fd < 0) {
1319                 log_stderr("failure: get_userns_fd");
1320                 goto out;
1321         }
1322
1323         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1324                                      AT_EMPTY_PATH |
1325                                      AT_NO_AUTOMOUNT |
1326                                      AT_SYMLINK_NOFOLLOW |
1327                                      OPEN_TREE_CLOEXEC |
1328                                      OPEN_TREE_CLONE);
1329         if (open_tree_fd1 < 0) {
1330                 log_stderr("failure: sys_open_tree");
1331                 goto out;
1332         }
1333
1334         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1335                 log_stderr("failure: sys_mount_setattr");
1336                 goto out;
1337         }
1338
1339         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1340         if (file1_fd < 0) {
1341                 log_stderr("failure: openat");
1342                 goto out;
1343         }
1344
1345         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1346                 log_stderr("failure: expected_uid_gid");
1347                 goto out;
1348         }
1349
1350         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1351                 log_stderr("failure: expected_uid_gid");
1352                 goto out;
1353         }
1354
1355         safe_close(file1_fd);
1356
1357         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1358                 log_stderr("failure: mkdirat");
1359                 goto out;
1360         }
1361
1362         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1363                                       AT_NO_AUTOMOUNT |
1364                                       AT_SYMLINK_NOFOLLOW |
1365                                       OPEN_TREE_CLOEXEC |
1366                                       OPEN_TREE_CLONE |
1367                                       AT_RECURSIVE);
1368         if (open_tree_fd2 < 0) {
1369                 log_stderr("failure: sys_open_tree");
1370                 goto out;
1371         }
1372
1373         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1374                 log_stderr("failure: sys_mount_setattr");
1375                 goto out;
1376         }
1377
1378         /* We're crossing a mountpoint so this must fail.
1379          *
1380          * Note that this must also fail for non-idmapped mounts but here we're
1381          * interested in making sure we're not introducing an accidental way to
1382          * violate that restriction or that suddenly this becomes possible.
1383          */
1384         if (!linkat(open_tree_fd1, FILE1, open_tree_fd2, HARDLINK1, 0)) {
1385                 log_stderr("failure: linkat");
1386                 goto out;
1387         }
1388         if (errno != EXDEV) {
1389                 log_stderr("failure: errno");
1390                 goto out;
1391         }
1392
1393         fret = 0;
1394         log_debug("Ran test");
1395 out:
1396         safe_close(attr.userns_fd);
1397         safe_close(file1_fd);
1398         safe_close(open_tree_fd1);
1399         safe_close(open_tree_fd2);
1400
1401         return fret;
1402 }
1403
1404 static int hardlink_from_idmapped_mount(void)
1405 {
1406         int fret = -1;
1407         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1408         struct mount_attr attr = {
1409                 .attr_set = MOUNT_ATTR_IDMAP,
1410         };
1411
1412         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1413                 log_stderr("failure: chown_r");
1414                 goto out;
1415         }
1416
1417         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1418         if (attr.userns_fd < 0) {
1419                 log_stderr("failure: get_userns_fd");
1420                 goto out;
1421         }
1422
1423         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1424                                      AT_EMPTY_PATH |
1425                                      AT_NO_AUTOMOUNT |
1426                                      AT_SYMLINK_NOFOLLOW |
1427                                      OPEN_TREE_CLOEXEC |
1428                                      OPEN_TREE_CLONE);
1429         if (open_tree_fd < 0) {
1430                 log_stderr("failure: sys_open_tree");
1431                 goto out;
1432         }
1433
1434         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1435                 log_stderr("failure: sys_mount_setattr");
1436                 goto out;
1437         }
1438
1439         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1440         if (file1_fd < 0) {
1441                 log_stderr("failure: openat");
1442                 goto out;
1443         }
1444         safe_close(file1_fd);
1445
1446         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1447                 log_stderr("failure: expected_uid_gid");
1448                 goto out;
1449         }
1450
1451         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1452                 log_stderr("failure: expected_uid_gid");
1453                 goto out;
1454         }
1455
1456         /* We're not crossing a mountpoint so this must succeed. */
1457         if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
1458                 log_stderr("failure: linkat");
1459                 goto out;
1460         }
1461
1462
1463         fret = 0;
1464         log_debug("Ran test");
1465 out:
1466         safe_close(attr.userns_fd);
1467         safe_close(file1_fd);
1468         safe_close(open_tree_fd);
1469
1470         return fret;
1471 }
1472
1473 static int hardlink_from_idmapped_mount_in_userns(void)
1474 {
1475         int fret = -1;
1476         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1477         struct mount_attr attr = {
1478                 .attr_set = MOUNT_ATTR_IDMAP,
1479         };
1480         pid_t pid;
1481
1482         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1483                 log_stderr("failure: chown_r");
1484                 goto out;
1485         }
1486
1487         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1488         if (attr.userns_fd < 0) {
1489                 log_stderr("failure: get_userns_fd");
1490                 goto out;
1491         }
1492
1493         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1494                                      AT_EMPTY_PATH |
1495                                      AT_NO_AUTOMOUNT |
1496                                      AT_SYMLINK_NOFOLLOW |
1497                                      OPEN_TREE_CLOEXEC |
1498                                      OPEN_TREE_CLONE);
1499         if (open_tree_fd < 0) {
1500                 log_stderr("failure: sys_open_tree");
1501                 goto out;
1502         }
1503
1504         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1505                 log_stderr("failure: sys_mount_setattr");
1506                 goto out;
1507         }
1508
1509         pid = fork();
1510         if (pid < 0) {
1511                 log_stderr("failure: fork");
1512                 goto out;
1513         }
1514         if (pid == 0) {
1515                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1516                         die("failure: switch_userns");
1517
1518                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1519                 if (file1_fd < 0)
1520                         die("failure: create");
1521
1522                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1523                         die("failure: check ownership");
1524
1525                 /* We're not crossing a mountpoint so this must succeed. */
1526                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1527                         die("failure: create");
1528
1529                 if (!expected_uid_gid(open_tree_fd, HARDLINK1, 0, 0, 0))
1530                         die("failure: check ownership");
1531
1532                 exit(EXIT_SUCCESS);
1533         }
1534
1535         if (wait_for_pid(pid))
1536                 goto out;
1537
1538         fret = 0;
1539         log_debug("Ran test");
1540 out:
1541         safe_close(attr.userns_fd);
1542         safe_close(file1_fd);
1543         safe_close(open_tree_fd);
1544
1545         return fret;
1546 }
1547
1548 static int rename_crossing_mounts(void)
1549 {
1550         int fret = -1;
1551         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1552
1553         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1554                 log_stderr("failure: chown_r");
1555                 goto out;
1556         }
1557
1558         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1559                                      AT_EMPTY_PATH |
1560                                      AT_NO_AUTOMOUNT |
1561                                      AT_SYMLINK_NOFOLLOW |
1562                                      OPEN_TREE_CLOEXEC |
1563                                      OPEN_TREE_CLONE);
1564         if (open_tree_fd < 0) {
1565                 log_stderr("failure: sys_open_tree");
1566                 goto out;
1567         }
1568
1569         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1570         if (file1_fd < 0) {
1571                 log_stderr("failure: openat");
1572                 goto out;
1573         }
1574
1575         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1576                 log_stderr("failure: mkdirat");
1577                 goto out;
1578         }
1579
1580         /* We're crossing a mountpoint so this must fail.
1581          *
1582          * Note that this must also fail for non-idmapped mounts but here we're
1583          * interested in making sure we're not introducing an accidental way to
1584          * violate that restriction or that suddenly this becomes possible.
1585          */
1586         if (!renameat(open_tree_fd, FILE1, t_dir1_fd, FILE1_RENAME)) {
1587                 log_stderr("failure: renameat");
1588                 goto out;
1589         }
1590         if (errno != EXDEV) {
1591                 log_stderr("failure: errno");
1592                 goto out;
1593         }
1594
1595         fret = 0;
1596         log_debug("Ran test");
1597 out:
1598         safe_close(file1_fd);
1599         safe_close(open_tree_fd);
1600
1601         return fret;
1602 }
1603
1604 static int rename_crossing_idmapped_mounts(void)
1605 {
1606         int fret = -1;
1607         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1608         struct mount_attr attr = {
1609                 .attr_set = MOUNT_ATTR_IDMAP,
1610         };
1611
1612         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1613                 log_stderr("failure: chown_r");
1614                 goto out;
1615         }
1616
1617         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1618         if (attr.userns_fd < 0) {
1619                 log_stderr("failure: get_userns_fd");
1620                 goto out;
1621         }
1622
1623         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1624                                      AT_EMPTY_PATH |
1625                                      AT_NO_AUTOMOUNT |
1626                                      AT_SYMLINK_NOFOLLOW |
1627                                      OPEN_TREE_CLOEXEC |
1628                                      OPEN_TREE_CLONE);
1629         if (open_tree_fd1 < 0) {
1630                 log_stderr("failure: sys_open_tree");
1631                 goto out;
1632         }
1633
1634         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1635                 log_stderr("failure: sys_mount_setattr");
1636                 goto out;
1637         }
1638
1639         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1640         if (file1_fd < 0) {
1641                 log_stderr("failure: openat");
1642                 goto out;
1643         }
1644
1645         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1646                 log_stderr("failure: expected_uid_gid");
1647                 goto out;
1648         }
1649
1650         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1651                 log_stderr("failure: expected_uid_gid");
1652                 goto out;
1653         }
1654
1655         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1656                 log_stderr("failure: mkdirat");
1657                 goto out;
1658         }
1659
1660         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1661                                       AT_NO_AUTOMOUNT |
1662                                       AT_SYMLINK_NOFOLLOW |
1663                                       OPEN_TREE_CLOEXEC |
1664                                       OPEN_TREE_CLONE |
1665                                       AT_RECURSIVE);
1666         if (open_tree_fd2 < 0) {
1667                 log_stderr("failure: sys_open_tree");
1668                 goto out;
1669         }
1670
1671         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1672                 log_stderr("failure: sys_mount_setattr");
1673                 goto out;
1674         }
1675
1676         /* We're crossing a mountpoint so this must fail.
1677          *
1678          * Note that this must also fail for non-idmapped mounts but here we're
1679          * interested in making sure we're not introducing an accidental way to
1680          * violate that restriction or that suddenly this becomes possible.
1681          */
1682         if (!renameat(open_tree_fd1, FILE1, open_tree_fd2, FILE1_RENAME)) {
1683                 log_stderr("failure: renameat");
1684                 goto out;
1685         }
1686         if (errno != EXDEV) {
1687                 log_stderr("failure: errno");
1688                 goto out;
1689         }
1690
1691         fret = 0;
1692         log_debug("Ran test");
1693 out:
1694         safe_close(attr.userns_fd);
1695         safe_close(file1_fd);
1696         safe_close(open_tree_fd1);
1697         safe_close(open_tree_fd2);
1698
1699         return fret;
1700 }
1701
1702 static int rename_from_idmapped_mount(void)
1703 {
1704         int fret = -1;
1705         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1706         struct mount_attr attr = {
1707                 .attr_set = MOUNT_ATTR_IDMAP,
1708         };
1709
1710         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1711                 log_stderr("failure: chown_r");
1712                 goto out;
1713         }
1714
1715         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1716         if (attr.userns_fd < 0) {
1717                 log_stderr("failure: get_userns_fd");
1718                 goto out;
1719         }
1720
1721         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1722                                      AT_EMPTY_PATH |
1723                                      AT_NO_AUTOMOUNT |
1724                                      AT_SYMLINK_NOFOLLOW |
1725                                      OPEN_TREE_CLOEXEC |
1726                                      OPEN_TREE_CLONE);
1727         if (open_tree_fd < 0) {
1728                 log_stderr("failure: sys_open_tree");
1729                 goto out;
1730         }
1731
1732         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1733                 log_stderr("failure: sys_mount_setattr");
1734                 goto out;
1735         }
1736
1737         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1738         if (file1_fd < 0) {
1739                 log_stderr("failure: openat");
1740                 goto out;
1741         }
1742
1743         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1744                 log_stderr("failure: expected_uid_gid");
1745                 goto out;
1746         }
1747
1748         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1749                 log_stderr("failure: expected_uid_gid");
1750                 goto out;
1751         }
1752
1753         /* We're not crossing a mountpoint so this must succeed. */
1754         if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
1755                 log_stderr("failure: renameat");
1756                 goto out;
1757         }
1758
1759         fret = 0;
1760         log_debug("Ran test");
1761 out:
1762         safe_close(attr.userns_fd);
1763         safe_close(file1_fd);
1764         safe_close(open_tree_fd);
1765
1766         return fret;
1767 }
1768
1769 static int rename_from_idmapped_mount_in_userns(void)
1770 {
1771         int fret = -1;
1772         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1773         pid_t pid;
1774         struct mount_attr attr = {
1775                 .attr_set = MOUNT_ATTR_IDMAP,
1776         };
1777
1778         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1779                 log_stderr("failure: chown_r");
1780                 goto out;
1781         }
1782
1783         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1784         if (attr.userns_fd < 0) {
1785                 log_stderr("failure: get_userns_fd");
1786                 goto out;
1787         }
1788
1789         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1790                                      AT_EMPTY_PATH |
1791                                      AT_NO_AUTOMOUNT |
1792                                      AT_SYMLINK_NOFOLLOW |
1793                                      OPEN_TREE_CLOEXEC |
1794                                      OPEN_TREE_CLONE);
1795         if (open_tree_fd < 0) {
1796                 log_stderr("failure: sys_open_tree");
1797                 goto out;
1798         }
1799
1800         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1801                 log_stderr("failure: sys_mount_setattr");
1802                 goto out;
1803         }
1804
1805         pid = fork();
1806         if (pid < 0) {
1807                 log_stderr("failure: fork");
1808                 goto out;
1809         }
1810         if (pid == 0) {
1811                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1812                         die("failure: switch_userns");
1813
1814                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1815                 if (file1_fd < 0)
1816                         die("failure: create");
1817
1818                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1819                         die("failure: check ownership");
1820
1821                 /* We're not crossing a mountpoint so this must succeed. */
1822                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1823                         die("failure: create");
1824
1825                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1826                         die("failure: check ownership");
1827
1828                 exit(EXIT_SUCCESS);
1829         }
1830
1831         if (wait_for_pid(pid))
1832                 goto out;
1833
1834         fret = 0;
1835         log_debug("Ran test");
1836 out:
1837         safe_close(attr.userns_fd);
1838         safe_close(file1_fd);
1839         safe_close(open_tree_fd);
1840
1841         return fret;
1842 }
1843
1844 static int symlink_regular_mounts(void)
1845 {
1846         int fret = -1;
1847         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1848         struct stat st;
1849
1850         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1851         if (file1_fd < 0) {
1852                 log_stderr("failure: openat");
1853                 goto out;
1854         }
1855
1856         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1857                 log_stderr("failure: chown_r");
1858                 goto out;
1859         }
1860
1861         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1862                                      AT_EMPTY_PATH |
1863                                      AT_NO_AUTOMOUNT |
1864                                      AT_SYMLINK_NOFOLLOW |
1865                                      OPEN_TREE_CLOEXEC |
1866                                      OPEN_TREE_CLONE);
1867         if (open_tree_fd < 0) {
1868                 log_stderr("failure: sys_open_tree");
1869                 goto out;
1870         }
1871
1872         if (symlinkat(FILE1, open_tree_fd, FILE2)) {
1873                 log_stderr("failure: symlinkat");
1874                 goto out;
1875         }
1876
1877         if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW)) {
1878                 log_stderr("failure: fchownat");
1879                 goto out;
1880         }
1881
1882         if (fstatat(open_tree_fd, FILE2, &st, AT_SYMLINK_NOFOLLOW)) {
1883                 log_stderr("failure: fstatat");
1884                 goto out;
1885         }
1886
1887         if (st.st_uid != 15000 || st.st_gid != 15000) {
1888                 log_stderr("failure: compare ids");
1889                 goto out;
1890         }
1891
1892         if (fstatat(open_tree_fd, FILE1, &st, 0)) {
1893                 log_stderr("failure: fstatat");
1894                 goto out;
1895         }
1896
1897         if (st.st_uid != 10000 || st.st_gid != 10000) {
1898                 log_stderr("failure: compare ids");
1899                 goto out;
1900         }
1901
1902         fret = 0;
1903         log_debug("Ran test");
1904 out:
1905         safe_close(file1_fd);
1906         safe_close(open_tree_fd);
1907
1908         return fret;
1909 }
1910
1911 static int symlink_idmapped_mounts(void)
1912 {
1913         int fret = -1;
1914         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1915         struct mount_attr attr = {
1916                 .attr_set = MOUNT_ATTR_IDMAP,
1917         };
1918         pid_t pid;
1919
1920         if (!caps_supported())
1921                 return 0;
1922
1923         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1924         if (file1_fd < 0) {
1925                 log_stderr("failure: openat");
1926                 goto out;
1927         }
1928
1929         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1930                 log_stderr("failure: chown_r");
1931                 goto out;
1932         }
1933
1934         /* Changing mount properties on a detached mount. */
1935         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1936         if (attr.userns_fd < 0) {
1937                 log_stderr("failure: get_userns_fd");
1938                 goto out;
1939         }
1940
1941         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1942                                      AT_EMPTY_PATH |
1943                                      AT_NO_AUTOMOUNT |
1944                                      AT_SYMLINK_NOFOLLOW |
1945                                      OPEN_TREE_CLOEXEC |
1946                                      OPEN_TREE_CLONE);
1947         if (open_tree_fd < 0) {
1948                 log_stderr("failure: sys_open_tree");
1949                 goto out;
1950         }
1951
1952         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1953                 log_stderr("failure: sys_mount_setattr");
1954                 goto out;
1955         }
1956
1957         pid = fork();
1958         if (pid < 0) {
1959                 log_stderr("failure: fork");
1960                 goto out;
1961         }
1962         if (pid == 0) {
1963                 if (!switch_fsids(10000, 10000))
1964                         die("failure: switch fsids");
1965
1966                 if (!caps_up())
1967                         die("failure: raise caps");
1968
1969                 if (symlinkat(FILE1, open_tree_fd, FILE2))
1970                         die("failure: create");
1971
1972                 if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW))
1973                         die("failure: change ownership");
1974
1975                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 15000, 15000))
1976                         die("failure: check ownership");
1977
1978                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
1979                         die("failure: check ownership");
1980
1981                 exit(EXIT_SUCCESS);
1982         }
1983         if (wait_for_pid(pid))
1984                 goto out;
1985
1986         fret = 0;
1987         log_debug("Ran test");
1988 out:
1989         safe_close(attr.userns_fd);
1990         safe_close(file1_fd);
1991         safe_close(open_tree_fd);
1992
1993         return fret;
1994 }
1995
1996 static int symlink_idmapped_mounts_in_userns(void)
1997 {
1998         int fret = -1;
1999         int file1_fd = -EBADF, open_tree_fd = -EBADF;
2000         struct mount_attr attr = {
2001                 .attr_set = MOUNT_ATTR_IDMAP,
2002         };
2003         pid_t pid;
2004
2005         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
2006                 log_stderr("failure: chown_r");
2007                 goto out;
2008         }
2009
2010         /* Changing mount properties on a detached mount. */
2011         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2012         if (attr.userns_fd < 0) {
2013                 log_stderr("failure: get_userns_fd");
2014                 goto out;
2015         }
2016
2017         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2018                                      AT_EMPTY_PATH |
2019                                      AT_NO_AUTOMOUNT |
2020                                      AT_SYMLINK_NOFOLLOW |
2021                                      OPEN_TREE_CLOEXEC |
2022                                      OPEN_TREE_CLONE);
2023         if (open_tree_fd < 0) {
2024                 log_stderr("failure: sys_open_tree");
2025                 goto out;
2026         }
2027
2028         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2029                 log_stderr("failure: sys_mount_setattr");
2030                 goto out;
2031         }
2032
2033         pid = fork();
2034         if (pid < 0) {
2035                 log_stderr("failure: fork");
2036                 goto out;
2037         }
2038         if (pid == 0) {
2039                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2040                         die("failure: switch_userns");
2041
2042                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2043                 if (file1_fd < 0)
2044                         die("failure: create");
2045                 safe_close(file1_fd);
2046
2047                 if (symlinkat(FILE1, open_tree_fd, FILE2))
2048                         die("failure: create");
2049
2050                 if (fchownat(open_tree_fd, FILE2, 5000, 5000, AT_SYMLINK_NOFOLLOW))
2051                         die("failure: change ownership");
2052
2053                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000))
2054                         die("failure: check ownership");
2055
2056                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
2057                         die("failure: check ownership");
2058
2059                 exit(EXIT_SUCCESS);
2060         }
2061
2062         if (wait_for_pid(pid))
2063                 goto out;
2064
2065         if (!expected_uid_gid(t_dir1_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000)) {
2066                 log_stderr("failure: expected_uid_gid");
2067                 goto out;
2068         }
2069
2070         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2071                 log_stderr("failure: expected_uid_gid");
2072                 goto out;
2073         }
2074
2075         fret = 0;
2076         log_debug("Ran test");
2077 out:
2078         safe_close(attr.userns_fd);
2079         safe_close(file1_fd);
2080         safe_close(open_tree_fd);
2081
2082         return fret;
2083 }
2084
2085 /* Validate that a caller whose fsids map into the idmapped mount within it's
2086  * user namespace cannot create any device nodes.
2087  */
2088 static int device_node_in_userns(void)
2089 {
2090         int fret = -1;
2091         int open_tree_fd = -EBADF;
2092         struct mount_attr attr = {
2093                 .attr_set = MOUNT_ATTR_IDMAP,
2094         };
2095         pid_t pid;
2096
2097         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2098         if (attr.userns_fd < 0) {
2099                 log_stderr("failure: get_userns_fd");
2100                 goto out;
2101         }
2102
2103         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2104                                      AT_EMPTY_PATH |
2105                                      AT_NO_AUTOMOUNT |
2106                                      AT_SYMLINK_NOFOLLOW |
2107                                      OPEN_TREE_CLOEXEC |
2108                                      OPEN_TREE_CLONE);
2109         if (open_tree_fd < 0) {
2110                 log_stderr("failure: sys_open_tree");
2111                 goto out;
2112         }
2113
2114         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2115                 log_stderr("failure: sys_mount_setattr");
2116                 goto out;
2117         }
2118
2119         pid = fork();
2120         if (pid < 0) {
2121                 log_stderr("failure: fork");
2122                 goto out;
2123         }
2124         if (pid == 0) {
2125                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2126                         die("failure: switch_userns");
2127
2128                 /* create character device */
2129                 if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
2130                         die("failure: create");
2131
2132                 exit(EXIT_SUCCESS);
2133         }
2134
2135         if (wait_for_pid(pid))
2136                 goto out;
2137
2138         fret = 0;
2139         log_debug("Ran test");
2140 out:
2141         safe_close(attr.userns_fd);
2142         safe_close(open_tree_fd);
2143
2144         return fret;
2145 }
2146
2147
2148 /* Validate that changing file ownership works correctly on idmapped mounts. */
2149 static int expected_uid_gid_idmapped_mounts(void)
2150 {
2151         int fret = -1;
2152         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
2153         struct mount_attr attr1 = {
2154                 .attr_set = MOUNT_ATTR_IDMAP,
2155         };
2156         struct mount_attr attr2 = {
2157                 .attr_set = MOUNT_ATTR_IDMAP,
2158         };
2159         pid_t pid;
2160
2161         if (!switch_fsids(0, 0)) {
2162                 log_stderr("failure: switch_fsids");
2163                 goto out;
2164         }
2165
2166         /* create regular file via open() */
2167         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2168         if (file1_fd < 0) {
2169                 log_stderr("failure: openat");
2170                 goto out;
2171         }
2172
2173         /* create regular file via mknod */
2174         if (mknodat(t_dir1_fd, FILE2, S_IFREG | 0000, 0)) {
2175                 log_stderr("failure: mknodat");
2176                 goto out;
2177         }
2178
2179         /* create character device */
2180         if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
2181                 log_stderr("failure: mknodat");
2182                 goto out;
2183         }
2184
2185         /* create hardlink */
2186         if (linkat(t_dir1_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
2187                 log_stderr("failure: linkat");
2188                 goto out;
2189         }
2190
2191         /* create symlink */
2192         if (symlinkat(FILE2, t_dir1_fd, SYMLINK1)) {
2193                 log_stderr("failure: symlinkat");
2194                 goto out;
2195         }
2196
2197         /* create directory */
2198         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
2199                 log_stderr("failure: mkdirat");
2200                 goto out;
2201         }
2202
2203         /* Changing mount properties on a detached mount. */
2204         attr1.userns_fd = get_userns_fd(0, 10000, 10000);
2205         if (attr1.userns_fd < 0) {
2206                 log_stderr("failure: get_userns_fd");
2207                 goto out;
2208         }
2209
2210         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
2211                                      AT_EMPTY_PATH |
2212                                      AT_NO_AUTOMOUNT |
2213                                      AT_SYMLINK_NOFOLLOW |
2214                                      OPEN_TREE_CLOEXEC |
2215                                      OPEN_TREE_CLONE);
2216         if (open_tree_fd1 < 0) {
2217                 log_stderr("failure: sys_open_tree");
2218                 goto out;
2219         }
2220
2221         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr1, sizeof(attr1))) {
2222                 log_stderr("failure: sys_mount_setattr");
2223                 goto out;
2224         }
2225
2226         /* Validate that all files created through the image mountpoint are
2227          * owned by the callers fsuid and fsgid.
2228          */
2229         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2230                 log_stderr("failure: expected_uid_gid");
2231                 goto out;
2232         }
2233         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2234                 log_stderr("failure: expected_uid_gid");
2235                 goto out;
2236         }
2237         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2238                 log_stderr("failure: expected_uid_gid");
2239                 goto out;
2240         }
2241         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2242                 log_stderr("failure: expected_uid_gid");
2243                 goto out;
2244         }
2245         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
2246                 log_stderr("failure: expected_uid_gid");
2247                 goto out;
2248         }
2249         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2250                 log_stderr("failure: expected_uid_gid");
2251                 goto out;
2252         }
2253         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2254                 log_stderr("failure: expected_uid_gid");
2255                 goto out;
2256         }
2257
2258         /* Validate that all files are owned by the uid and gid specified in
2259          * the idmapping of the mount they are accessed from.
2260          */
2261         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2262                 log_stderr("failure: expected_uid_gid");
2263                 goto out;
2264         }
2265         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2266                 log_stderr("failure: expected_uid_gid");
2267                 goto out;
2268         }
2269         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2270                 log_stderr("failure: expected_uid_gid");
2271                 goto out;
2272         }
2273         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2274                 log_stderr("failure: expected_uid_gid");
2275                 goto out;
2276         }
2277         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
2278                 log_stderr("failure: expected_uid_gid");
2279                 goto out;
2280         }
2281         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2282                 log_stderr("failure: expected_uid_gid");
2283                 goto out;
2284         }
2285         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2286                 log_stderr("failure: expected_uid_gid");
2287                 goto out;
2288         }
2289
2290         /* Changing mount properties on a detached mount. */
2291         attr2.userns_fd = get_userns_fd(0, 30000, 2001);
2292         if (attr2.userns_fd < 0) {
2293                 log_stderr("failure: get_userns_fd");
2294                 goto out;
2295         }
2296
2297         open_tree_fd2 = sys_open_tree(t_dir1_fd, "",
2298                                      AT_EMPTY_PATH |
2299                                      AT_NO_AUTOMOUNT |
2300                                      AT_SYMLINK_NOFOLLOW |
2301                                      OPEN_TREE_CLOEXEC |
2302                                      OPEN_TREE_CLONE);
2303         if (open_tree_fd2 < 0) {
2304                 log_stderr("failure: sys_open_tree");
2305                 goto out;
2306         }
2307
2308         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr2, sizeof(attr2))) {
2309                 log_stderr("failure: sys_mount_setattr");
2310                 goto out;
2311         }
2312
2313         /* Validate that all files are owned by the uid and gid specified in
2314          * the idmapping of the mount they are accessed from.
2315          */
2316         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2317                 log_stderr("failure: expected_uid_gid");
2318                 goto out;
2319         }
2320         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2321                 log_stderr("failure: expected_uid_gid");
2322                 goto out;
2323         }
2324         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2325                 log_stderr("failure: expected_uid_gid");
2326                 goto out;
2327         }
2328         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2329                 log_stderr("failure: expected_uid_gid");
2330                 goto out;
2331         }
2332         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 30000, 30000)) {
2333                 log_stderr("failure: expected_uid_gid");
2334                 goto out;
2335         }
2336         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2337                 log_stderr("failure: expected_uid_gid");
2338                 goto out;
2339         }
2340         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2341                 log_stderr("failure: expected_uid_gid");
2342                 goto out;
2343         }
2344
2345         /* Change ownership throught original image mountpoint. */
2346         if (fchownat(t_dir1_fd, FILE1, 2000, 2000, 0)) {
2347                 log_stderr("failure: fchownat");
2348                 goto out;
2349         }
2350         if (fchownat(t_dir1_fd, FILE2, 2000, 2000, 0)) {
2351                 log_stderr("failure: fchownat");
2352                 goto out;
2353         }
2354         if (fchownat(t_dir1_fd, HARDLINK1, 2000, 2000, 0)) {
2355                 log_stderr("failure: fchownat");
2356                 goto out;
2357         }
2358         if (fchownat(t_dir1_fd, CHRDEV1, 2000, 2000, 0)) {
2359                 log_stderr("failure: fchownat");
2360                 goto out;
2361         }
2362         if (fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)) {
2363                 log_stderr("failure: fchownat");
2364                 goto out;
2365         }
2366         if (fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH)) {
2367                 log_stderr("failure: fchownat");
2368                 goto out;
2369         }
2370         if (fchownat(t_dir1_fd, DIR1, 2000, 2000, AT_EMPTY_PATH)) {
2371                 log_stderr("failure: fchownat");
2372                 goto out;
2373         }
2374
2375         /* Check ownership through original mount. */
2376         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 2000, 2000)) {
2377                 log_stderr("failure: expected_uid_gid");
2378                 goto out;
2379         }
2380         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 2000, 2000)) {
2381                 log_stderr("failure: expected_uid_gid");
2382                 goto out;
2383         }
2384         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 2000, 2000)) {
2385                 log_stderr("failure: expected_uid_gid");
2386                 goto out;
2387         }
2388         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 2000, 2000)) {
2389                 log_stderr("failure: expected_uid_gid");
2390                 goto out;
2391         }
2392         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 3000, 3000)) {
2393                 log_stderr("failure: expected_uid_gid");
2394                 goto out;
2395         }
2396         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 2000, 2000)) {
2397                 log_stderr("failure: expected_uid_gid");
2398                 goto out;
2399         }
2400         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 2000, 2000)) {
2401                 log_stderr("failure: expected_uid_gid");
2402                 goto out;
2403         }
2404
2405         /* Check ownership through first idmapped mount. */
2406         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 12000, 12000)) {
2407                 log_stderr("failure:expected_uid_gid ");
2408                 goto out;
2409         }
2410         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 12000, 12000)) {
2411                 log_stderr("failure: expected_uid_gid");
2412                 goto out;
2413         }
2414         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 12000, 12000)) {
2415                 log_stderr("failure: expected_uid_gid");
2416                 goto out;
2417         }
2418         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 12000, 12000)) {
2419                 log_stderr("failure: expected_uid_gid");
2420                 goto out;
2421         }
2422         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 13000, 13000)) {
2423                 log_stderr("failure: expected_uid_gid");
2424                 goto out;
2425         }
2426         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 12000, 12000)) {
2427                 log_stderr("failure:expected_uid_gid ");
2428                 goto out;
2429         }
2430         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 12000, 12000)) {
2431                 log_stderr("failure: expected_uid_gid");
2432                 goto out;
2433         }
2434
2435         /* Check ownership through second idmapped mount. */
2436         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 32000, 32000)) {
2437                 log_stderr("failure: expected_uid_gid");
2438                 goto out;
2439         }
2440         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 32000, 32000)) {
2441                 log_stderr("failure: expected_uid_gid");
2442                 goto out;
2443         }
2444         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 32000, 32000)) {
2445                 log_stderr("failure: expected_uid_gid");
2446                 goto out;
2447         }
2448         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 32000, 32000)) {
2449                 log_stderr("failure: expected_uid_gid");
2450                 goto out;
2451         }
2452         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid)) {
2453                 log_stderr("failure: expected_uid_gid");
2454                 goto out;
2455         }
2456         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 32000, 32000)) {
2457                 log_stderr("failure: expected_uid_gid");
2458                 goto out;
2459         }
2460         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 32000, 32000)) {
2461                 log_stderr("failure: expected_uid_gid");
2462                 goto out;
2463         }
2464
2465         pid = fork();
2466         if (pid < 0) {
2467                 log_stderr("failure: fork");
2468                 goto out;
2469         }
2470         if (pid == 0) {
2471                 if (!switch_userns(attr1.userns_fd, 0, 0, false))
2472                         die("failure: switch_userns");
2473
2474                 if (!fchownat(t_dir1_fd, FILE1, 1000, 1000, 0))
2475                         die("failure: fchownat");
2476                 if (!fchownat(t_dir1_fd, FILE2, 1000, 1000, 0))
2477                         die("failure: fchownat");
2478                 if (!fchownat(t_dir1_fd, HARDLINK1, 1000, 1000, 0))
2479                         die("failure: fchownat");
2480                 if (!fchownat(t_dir1_fd, CHRDEV1, 1000, 1000, 0))
2481                         die("failure: fchownat");
2482                 if (!fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2483                         die("failure: fchownat");
2484                 if (!fchownat(t_dir1_fd, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2485                         die("failure: fchownat");
2486                 if (!fchownat(t_dir1_fd, DIR1, 1000, 1000, AT_EMPTY_PATH))
2487                         die("failure: fchownat");
2488
2489                 if (!fchownat(open_tree_fd2, FILE1, 1000, 1000, 0))
2490                         die("failure: fchownat");
2491                 if (!fchownat(open_tree_fd2, FILE2, 1000, 1000, 0))
2492                         die("failure: fchownat");
2493                 if (!fchownat(open_tree_fd2, HARDLINK1, 1000, 1000, 0))
2494                         die("failure: fchownat");
2495                 if (!fchownat(open_tree_fd2, CHRDEV1, 1000, 1000, 0))
2496                         die("failure: fchownat");
2497                 if (!fchownat(open_tree_fd2, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2498                         die("failure: fchownat");
2499                 if (!fchownat(open_tree_fd2, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2500                         die("failure: fchownat");
2501                 if (!fchownat(open_tree_fd2, DIR1, 1000, 1000, AT_EMPTY_PATH))
2502                         die("failure: fchownat");
2503
2504                 if (fchownat(open_tree_fd1, FILE1, 1000, 1000, 0))
2505                         die("failure: fchownat");
2506                 if (fchownat(open_tree_fd1, FILE2, 1000, 1000, 0))
2507                         die("failure: fchownat");
2508                 if (fchownat(open_tree_fd1, HARDLINK1, 1000, 1000, 0))
2509                         die("failure: fchownat");
2510                 if (fchownat(open_tree_fd1, CHRDEV1, 1000, 1000, 0))
2511                         die("failure: fchownat");
2512                 if (fchownat(open_tree_fd1, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2513                         die("failure: fchownat");
2514                 if (fchownat(open_tree_fd1, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2515                         die("failure: fchownat");
2516                 if (fchownat(open_tree_fd1, DIR1, 1000, 1000, AT_EMPTY_PATH))
2517                         die("failure: fchownat");
2518
2519                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2520                         die("failure: expected_uid_gid");
2521                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2522                         die("failure: expected_uid_gid");
2523                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2524                         die("failure: expected_uid_gid");
2525                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2526                         die("failure: expected_uid_gid");
2527                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2528                         die("failure: expected_uid_gid");
2529                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2530                         die("failure: expected_uid_gid");
2531                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2532                         die("failure: expected_uid_gid");
2533
2534                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, t_overflowuid, t_overflowgid))
2535                         die("failure: expected_uid_gid");
2536                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, t_overflowuid, t_overflowgid))
2537                         die("failure: expected_uid_gid");
2538                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2539                         die("failure: expected_uid_gid");
2540                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2541                         die("failure: expected_uid_gid");
2542                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2543                         die("failure: expected_uid_gid");
2544                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2545                         die("failure: expected_uid_gid");
2546                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, t_overflowuid, t_overflowgid))
2547                         die("failure: expected_uid_gid");
2548
2549                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 1000, 1000))
2550                         die("failure: expected_uid_gid");
2551                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 1000, 1000))
2552                         die("failure: expected_uid_gid");
2553                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 1000, 1000))
2554                         die("failure: expected_uid_gid");
2555                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 1000, 1000))
2556                         die("failure: expected_uid_gid");
2557                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2558                         die("failure: expected_uid_gid");
2559                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 1000, 1000))
2560                         die("failure: expected_uid_gid");
2561                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 1000, 1000))
2562                         die("failure: expected_uid_gid");
2563
2564                 exit(EXIT_SUCCESS);
2565         }
2566
2567         if (wait_for_pid(pid))
2568                 goto out;
2569
2570         /* Check ownership through original mount. */
2571         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 1000, 1000)) {
2572                 log_stderr("failure: expected_uid_gid");
2573                 goto out;
2574         }
2575         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 1000, 1000)) {
2576                 log_stderr("failure: expected_uid_gid");
2577                 goto out;
2578         }
2579         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 1000, 1000)) {
2580                 log_stderr("failure: expected_uid_gid");
2581                 goto out;
2582         }
2583         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 1000, 1000)) {
2584                 log_stderr("failure: expected_uid_gid");
2585                 goto out;
2586         }
2587         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2588                 log_stderr("failure: expected_uid_gid");
2589                 goto out;
2590         }
2591         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 1000, 1000)) {
2592                 log_stderr("failure: expected_uid_gid");
2593                 goto out;
2594         }
2595         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 1000, 1000)) {
2596                 log_stderr("failure: expected_uid_gid");
2597                 goto out;
2598         }
2599
2600         /* Check ownership through first idmapped mount. */
2601         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 11000, 11000)) {
2602                 log_stderr("failure: expected_uid_gid");
2603                 goto out;
2604         }
2605         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 11000, 11000)) {
2606                 log_stderr("failure: expected_uid_gid");
2607                 goto out;
2608         }
2609         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 11000, 11000)) {
2610                 log_stderr("failure: expected_uid_gid");
2611                 goto out;
2612         }
2613         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 11000, 11000)) {
2614                 log_stderr("failure: expected_uid_gid");
2615                 goto out;
2616         }
2617         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2618                 log_stderr("failure: expected_uid_gid");
2619                 goto out;
2620         }
2621         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 11000, 11000)) {
2622                 log_stderr("failure: expected_uid_gid");
2623                 goto out;
2624         }
2625         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 11000, 11000)) {
2626                 log_stderr("failure: expected_uid_gid");
2627                 goto out;
2628         }
2629
2630         /* Check ownership through second idmapped mount. */
2631         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 31000, 31000)) {
2632                 log_stderr("failure: expected_uid_gid");
2633                 goto out;
2634         }
2635         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 31000, 31000)) {
2636                 log_stderr("failure: expected_uid_gid");
2637                 goto out;
2638         }
2639         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 31000, 31000)) {
2640                 log_stderr("failure: expected_uid_gid");
2641                 goto out;
2642         }
2643         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 31000, 31000)) {
2644                 log_stderr("failure: expected_uid_gid");
2645                 goto out;
2646         }
2647         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2648                 log_stderr("failure: expected_uid_gid");
2649                 goto out;
2650         }
2651         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 31000, 31000)) {
2652                 log_stderr("failure: expected_uid_gid");
2653                 goto out;
2654         }
2655         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 31000, 31000)) {
2656                 log_stderr("failure: expected_uid_gid");
2657                 goto out;
2658         }
2659
2660         pid = fork();
2661         if (pid < 0) {
2662                 log_stderr("failure: fork");
2663                 goto out;
2664         }
2665         if (pid == 0) {
2666                 if (!switch_userns(attr2.userns_fd, 0, 0, false))
2667                         die("failure: switch_userns");
2668
2669                 if (!fchownat(t_dir1_fd, FILE1, 0, 0, 0))
2670                         die("failure: fchownat");
2671                 if (!fchownat(t_dir1_fd, FILE2, 0, 0, 0))
2672                         die("failure: fchownat");
2673                 if (!fchownat(t_dir1_fd, HARDLINK1, 0, 0, 0))
2674                         die("failure: fchownat");
2675                 if (!fchownat(t_dir1_fd, CHRDEV1, 0, 0, 0))
2676                         die("failure: fchownat");
2677                 if (!fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2678                         die("failure: fchownat");
2679                 if (!fchownat(t_dir1_fd, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2680                         die("failure: fchownat");
2681                 if (!fchownat(t_dir1_fd, DIR1, 0, 0, AT_EMPTY_PATH))
2682                         die("failure: fchownat");
2683
2684                 if (!fchownat(open_tree_fd1, FILE1, 0, 0, 0))
2685                         die("failure: fchownat");
2686                 if (!fchownat(open_tree_fd1, FILE2, 0, 0, 0))
2687                         die("failure: fchownat");
2688                 if (!fchownat(open_tree_fd1, HARDLINK1, 0, 0, 0))
2689                         die("failure: fchownat");
2690                 if (!fchownat(open_tree_fd1, CHRDEV1, 0, 0, 0))
2691                         die("failure: fchownat");
2692                 if (!fchownat(open_tree_fd1, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2693                         die("failure: fchownat");
2694                 if (!fchownat(open_tree_fd1, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2695                         die("failure: fchownat");
2696                 if (!fchownat(open_tree_fd1, DIR1, 0, 0, AT_EMPTY_PATH))
2697                         die("failure: fchownat");
2698
2699                 if (fchownat(open_tree_fd2, FILE1, 0, 0, 0))
2700                         die("failure: fchownat");
2701                 if (fchownat(open_tree_fd2, FILE2, 0, 0, 0))
2702                         die("failure: fchownat");
2703                 if (fchownat(open_tree_fd2, HARDLINK1, 0, 0, 0))
2704                         die("failure: fchownat");
2705                 if (fchownat(open_tree_fd2, CHRDEV1, 0, 0, 0))
2706                         die("failure: fchownat");
2707                 if (!fchownat(open_tree_fd2, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2708                         die("failure: fchownat");
2709                 if (fchownat(open_tree_fd2, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2710                         die("failure: fchownat");
2711                 if (fchownat(open_tree_fd2, DIR1, 0, 0, AT_EMPTY_PATH))
2712                         die("failure: fchownat");
2713
2714                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2715                         die("failure: expected_uid_gid");
2716                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2717                         die("failure: expected_uid_gid");
2718                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2719                         die("failure: expected_uid_gid");
2720                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2721                         die("failure: expected_uid_gid");
2722                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2723                         die("failure: expected_uid_gid");
2724                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2725                         die("failure: expected_uid_gid");
2726                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2727                         die("failure: expected_uid_gid");
2728
2729                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, t_overflowuid, t_overflowgid))
2730                         die("failure: expected_uid_gid");
2731                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, t_overflowuid, t_overflowgid))
2732                         die("failure: expected_uid_gid");
2733                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2734                         die("failure: expected_uid_gid");
2735                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2736                         die("failure: expected_uid_gid");
2737                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2738                         die("failure: expected_uid_gid");
2739                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2740                         die("failure: expected_uid_gid");
2741                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, t_overflowuid, t_overflowgid))
2742                         die("failure: expected_uid_gid");
2743
2744                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 0, 0))
2745                         die("failure: expected_uid_gid");
2746                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 0, 0))
2747                         die("failure: expected_uid_gid");
2748                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 0, 0))
2749                         die("failure: expected_uid_gid");
2750                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 0, 0))
2751                         die("failure: expected_uid_gid");
2752                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2753                         die("failure: expected_uid_gid");
2754                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 0, 0))
2755                         die("failure: expected_uid_gid");
2756                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 0, 0))
2757                         die("failure: expected_uid_gid");
2758
2759                 exit(EXIT_SUCCESS);
2760         }
2761
2762         if (wait_for_pid(pid))
2763                 goto out;
2764
2765         /* Check ownership through original mount. */
2766         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2767                 log_stderr("failure: expected_uid_gid");
2768                 goto out;
2769         }
2770         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2771                 log_stderr("failure: expected_uid_gid");
2772                 goto out;
2773         }
2774         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2775                 log_stderr("failure: expected_uid_gid");
2776                 goto out;
2777         }
2778         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2779                 log_stderr("failure: expected_uid_gid");
2780                 goto out;
2781         }
2782         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2783                 log_stderr("failure: expected_uid_gid");
2784                 goto out;
2785         }
2786         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2787                 log_stderr("failure: expected_uid_gid");
2788                 goto out;
2789         }
2790         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2791                 log_stderr("failure: expected_uid_gid");
2792                 goto out;
2793         }
2794
2795         /* Check ownership through first idmapped mount. */
2796         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2797                 log_stderr("failure: expected_uid_gid");
2798                 goto out;
2799         }
2800         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2801                 log_stderr("failure: expected_uid_gid");
2802                 goto out;
2803         }
2804         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2805                 log_stderr("failure: expected_uid_gid");
2806                 goto out;
2807         }
2808         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2809                 log_stderr("failure: expected_uid_gid");
2810                 goto out;
2811         }
2812         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2813                 log_stderr("failure: expected_uid_gid");
2814                 goto out;
2815         }
2816         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2817                 log_stderr("failure: expected_uid_gid");
2818                 goto out;
2819         }
2820         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2821                 log_stderr("failure: expected_uid_gid");
2822                 goto out;
2823         }
2824
2825         /* Check ownership through second idmapped mount. */
2826         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2827                 log_stderr("failure: expected_uid_gid");
2828                 goto out;
2829         }
2830         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2831                 log_stderr("failure: expected_uid_gid");
2832                 goto out;
2833         }
2834         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2835                 log_stderr("failure: expected_uid_gid");
2836                 goto out;
2837         }
2838         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2839                 log_stderr("failure: expected_uid_gid");
2840                 goto out;
2841         }
2842         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2843                 log_stderr("failure: expected_uid_gid");
2844                 goto out;
2845         }
2846         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2847                 log_stderr("failure: expected_uid_gid");
2848                 goto out;
2849         }
2850         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2851                 log_stderr("failure: expected_uid_gid");
2852                 goto out;
2853         }
2854
2855         fret = 0;
2856         log_debug("Ran test");
2857 out:
2858         safe_close(attr1.userns_fd);
2859         safe_close(attr2.userns_fd);
2860         safe_close(file1_fd);
2861         safe_close(open_tree_fd1);
2862         safe_close(open_tree_fd2);
2863
2864         return fret;
2865 }
2866
2867 static int fscaps(void)
2868 {
2869         int fret = -1;
2870         int file1_fd = -EBADF;
2871         struct mount_attr attr = {
2872                 .attr_set = MOUNT_ATTR_IDMAP,
2873         };
2874         pid_t pid;
2875
2876         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2877         if (file1_fd < 0) {
2878                 log_stderr("failure: openat");
2879                 goto out;
2880         }
2881
2882         /* Skip if vfs caps are unsupported. */
2883         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
2884                 return 0;
2885
2886         /* Changing mount properties on a detached mount. */
2887         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2888         if (attr.userns_fd < 0) {
2889                 log_stderr("failure: get_userns_fd");
2890                 goto out;
2891         }
2892
2893         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
2894                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2895                 goto out;
2896         }
2897
2898         pid = fork();
2899         if (pid < 0) {
2900                 log_stderr("failure: fork");
2901                 goto out;
2902         }
2903         if (pid == 0) {
2904                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2905                         die("failure: switch_userns");
2906
2907                 /*
2908                  * On kernels before 5.12 this would succeed and return the
2909                  * unconverted caps. Then - for whatever reason - this behavior
2910                  * got changed and since 5.12 EOVERFLOW is returned when the
2911                  * rootid stored alongside the vfs caps does not map to uid 0 in
2912                  * the caller's user namespace.
2913                  */
2914                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
2915                         die("failure: expected_dummy_vfs_caps_uid");
2916
2917                 exit(EXIT_SUCCESS);
2918         }
2919
2920         if (wait_for_pid(pid))
2921                 goto out;
2922
2923         if (fremovexattr(file1_fd, "security.capability")) {
2924                 log_stderr("failure: fremovexattr");
2925                 goto out;
2926         }
2927         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2928                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2929                 goto out;
2930         }
2931         if (errno != ENODATA) {
2932                 log_stderr("failure: errno");
2933                 goto out;
2934         }
2935
2936         if (set_dummy_vfs_caps(file1_fd, 0, 10000)) {
2937                 log_stderr("failure: set_dummy_vfs_caps");
2938                 goto out;
2939         }
2940
2941         if (!expected_dummy_vfs_caps_uid(file1_fd, 10000)) {
2942                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2943                 goto out;
2944         }
2945
2946         pid = fork();
2947         if (pid < 0) {
2948                 log_stderr("failure: fork");
2949                 goto out;
2950         }
2951         if (pid == 0) {
2952                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2953                         die("failure: switch_userns");
2954
2955                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0))
2956                         die("failure: expected_dummy_vfs_caps_uid");
2957
2958                 exit(EXIT_SUCCESS);
2959         }
2960
2961         if (wait_for_pid(pid))
2962                 goto out;
2963
2964         if (fremovexattr(file1_fd, "security.capability")) {
2965                 log_stderr("failure: fremovexattr");
2966                 goto out;
2967         }
2968         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2969                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2970                 goto out;
2971         }
2972         if (errno != ENODATA) {
2973                 log_stderr("failure: errno");
2974                 goto out;
2975         }
2976
2977         fret = 0;
2978         log_debug("Ran test");
2979 out:
2980         safe_close(attr.userns_fd);
2981         safe_close(file1_fd);
2982
2983         return fret;
2984 }
2985
2986 static int fscaps_idmapped_mounts(void)
2987 {
2988         int fret = -1;
2989         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
2990         struct mount_attr attr = {
2991                 .attr_set = MOUNT_ATTR_IDMAP,
2992         };
2993         pid_t pid;
2994
2995         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2996         if (file1_fd < 0) {
2997                 log_stderr("failure: openat");
2998                 goto out;
2999         }
3000
3001         /* Skip if vfs caps are unsupported. */
3002         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3003                 return 0;
3004
3005         if (fremovexattr(file1_fd, "security.capability")) {
3006                 log_stderr("failure: fremovexattr");
3007                 goto out;
3008         }
3009
3010         /* Changing mount properties on a detached mount. */
3011         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3012         if (attr.userns_fd < 0) {
3013                 log_stderr("failure: get_userns_fd");
3014                 goto out;
3015         }
3016
3017         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3018                                      AT_EMPTY_PATH |
3019                                      AT_NO_AUTOMOUNT |
3020                                      AT_SYMLINK_NOFOLLOW |
3021                                      OPEN_TREE_CLOEXEC |
3022                                      OPEN_TREE_CLONE);
3023         if (open_tree_fd < 0) {
3024                 log_stderr("failure: sys_open_tree");
3025                 goto out;
3026         }
3027
3028         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3029                 log_stderr("failure: sys_mount_setattr");
3030                 goto out;
3031         }
3032
3033         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3034         if (file1_fd2 < 0) {
3035                 log_stderr("failure: openat");
3036                 goto out;
3037         }
3038
3039         if (!set_dummy_vfs_caps(file1_fd2, 0, 1000)) {
3040                 log_stderr("failure: set_dummy_vfs_caps");
3041                 goto out;
3042         }
3043
3044         if (set_dummy_vfs_caps(file1_fd2, 0, 10000)) {
3045                 log_stderr("failure: set_dummy_vfs_caps");
3046                 goto out;
3047         }
3048
3049         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3050                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3051                 goto out;
3052         }
3053
3054         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3055                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3056                 goto out;
3057         }
3058
3059         pid = fork();
3060         if (pid < 0) {
3061                 log_stderr("failure: fork");
3062                 goto out;
3063         }
3064         if (pid == 0) {
3065                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3066                         die("failure: switch_userns");
3067
3068                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3069                         die("failure: expected_dummy_vfs_caps_uid");
3070
3071                 exit(EXIT_SUCCESS);
3072         }
3073
3074         if (wait_for_pid(pid))
3075                 goto out;
3076
3077         if (fremovexattr(file1_fd2, "security.capability")) {
3078                 log_stderr("failure: fremovexattr");
3079                 goto out;
3080         }
3081         if (expected_dummy_vfs_caps_uid(file1_fd2, -1)) {
3082                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3083                 goto out;
3084         }
3085         if (errno != ENODATA) {
3086                 log_stderr("failure: errno");
3087                 goto out;
3088         }
3089
3090         if (set_dummy_vfs_caps(file1_fd2, 0, 12000)) {
3091                 log_stderr("failure: set_dummy_vfs_caps");
3092                 goto out;
3093         }
3094
3095         if (!expected_dummy_vfs_caps_uid(file1_fd2, 12000)) {
3096                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3097                 goto out;
3098         }
3099
3100         if (!expected_dummy_vfs_caps_uid(file1_fd, 2000)) {
3101                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3102                 goto out;
3103         }
3104
3105         pid = fork();
3106         if (pid < 0) {
3107                 log_stderr("failure: fork");
3108                 goto out;
3109         }
3110         if (pid == 0) {
3111                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3112                         die("failure: switch_userns");
3113
3114                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 2000))
3115                         die("failure: expected_dummy_vfs_caps_uid");
3116
3117                 exit(EXIT_SUCCESS);
3118         }
3119
3120         if (wait_for_pid(pid))
3121                 goto out;
3122
3123         fret = 0;
3124         log_debug("Ran test");
3125 out:
3126         safe_close(attr.userns_fd);
3127         safe_close(file1_fd);
3128         safe_close(file1_fd2);
3129         safe_close(open_tree_fd);
3130
3131         return fret;
3132 }
3133
3134 static int fscaps_idmapped_mounts_in_userns(void)
3135 {
3136         int fret = -1;
3137         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3138         struct mount_attr attr = {
3139                 .attr_set = MOUNT_ATTR_IDMAP,
3140         };
3141         pid_t pid;
3142
3143         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3144         if (file1_fd < 0) {
3145                 log_stderr("failure: openat");
3146                 goto out;
3147         }
3148
3149         /* Skip if vfs caps are unsupported. */
3150         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3151                 return 0;
3152
3153         if (fremovexattr(file1_fd, "security.capability")) {
3154                 log_stderr("failure: fremovexattr");
3155                 goto out;
3156         }
3157
3158         /* Changing mount properties on a detached mount. */
3159         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3160         if (attr.userns_fd < 0) {
3161                 log_stderr("failure: get_userns_fd");
3162                 goto out;
3163         }
3164
3165         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3166                                      AT_EMPTY_PATH |
3167                                      AT_NO_AUTOMOUNT |
3168                                      AT_SYMLINK_NOFOLLOW |
3169                                      OPEN_TREE_CLOEXEC |
3170                                      OPEN_TREE_CLONE);
3171         if (open_tree_fd < 0) {
3172                 log_stderr("failure: sys_open_tree");
3173                 goto out;
3174         }
3175
3176         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3177                 log_stderr("failure: sys_mount_setattr");
3178                 goto out;
3179         }
3180
3181         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3182         if (file1_fd2 < 0) {
3183                 log_stderr("failure: openat");
3184                 goto out;
3185         }
3186
3187         pid = fork();
3188         if (pid < 0) {
3189                 log_stderr("failure: fork");
3190                 goto out;
3191         }
3192         if (pid == 0) {
3193                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3194                         die("failure: switch_userns");
3195
3196                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3197                         die("failure: expected_dummy_vfs_caps_uid");
3198                 if (errno != ENODATA)
3199                         die("failure: errno");
3200
3201                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3202                         die("failure: set_dummy_vfs_caps");
3203
3204                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3205                         die("failure: expected_dummy_vfs_caps_uid");
3206
3207                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
3208                         die("failure: expected_dummy_vfs_caps_uid");
3209
3210                 exit(EXIT_SUCCESS);
3211         }
3212
3213         if (wait_for_pid(pid))
3214                 goto out;
3215
3216         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
3217                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3218                 goto out;
3219         }
3220
3221         fret = 0;
3222         log_debug("Ran test");
3223 out:
3224         safe_close(attr.userns_fd);
3225         safe_close(file1_fd);
3226         safe_close(file1_fd2);
3227         safe_close(open_tree_fd);
3228
3229         return fret;
3230 }
3231
3232 static int fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns(void)
3233 {
3234         int fret = -1;
3235         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3236         struct mount_attr attr = {
3237                 .attr_set = MOUNT_ATTR_IDMAP,
3238         };
3239         pid_t pid;
3240
3241         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3242         if (file1_fd < 0) {
3243                 log_stderr("failure: openat");
3244                 goto out;
3245         }
3246
3247         /* Skip if vfs caps are unsupported. */
3248         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3249                 return 0;
3250
3251         if (fremovexattr(file1_fd, "security.capability")) {
3252                 log_stderr("failure: fremovexattr");
3253                 goto out;
3254         }
3255         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
3256                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3257                 goto out;
3258         }
3259         if (errno != ENODATA) {
3260                 log_stderr("failure: errno");
3261                 goto out;
3262         }
3263
3264         /* Changing mount properties on a detached mount. */
3265         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3266         if (attr.userns_fd < 0) {
3267                 log_stderr("failure: get_userns_fd");
3268                 goto out;
3269         }
3270
3271         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3272                                      AT_EMPTY_PATH |
3273                                      AT_NO_AUTOMOUNT |
3274                                      AT_SYMLINK_NOFOLLOW |
3275                                      OPEN_TREE_CLOEXEC |
3276                                      OPEN_TREE_CLONE);
3277         if (open_tree_fd < 0) {
3278                 log_stderr("failure: sys_open_tree");
3279                 goto out;
3280         }
3281
3282         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3283                 log_stderr("failure: sys_mount_setattr");
3284                 goto out;
3285         }
3286
3287         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3288         if (file1_fd2 < 0) {
3289                 log_stderr("failure: openat");
3290                 goto out;
3291         }
3292
3293         /*
3294          * Verify we can set an v3 fscap for real root this was regressed at
3295          * some point. Make sure this doesn't happen again!
3296          */
3297         pid = fork();
3298         if (pid < 0) {
3299                 log_stderr("failure: fork");
3300                 goto out;
3301         }
3302         if (pid == 0) {
3303                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3304                         die("failure: switch_userns");
3305
3306                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3307                         die("failure: expected_dummy_vfs_caps_uid");
3308                 if (errno != ENODATA)
3309                         die("failure: errno");
3310
3311                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3312                         die("failure: set_dummy_vfs_caps");
3313
3314                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3315                         die("failure: expected_dummy_vfs_caps_uid");
3316
3317                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0) && errno != EOVERFLOW)
3318                         die("failure: expected_dummy_vfs_caps_uid");
3319
3320                 exit(EXIT_SUCCESS);
3321         }
3322
3323         if (wait_for_pid(pid))
3324                 goto out;
3325
3326         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3327                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3328                 goto out;
3329         }
3330
3331         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3332                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3333                 goto out;
3334         }
3335
3336         fret = 0;
3337         log_debug("Ran test");
3338 out:
3339         safe_close(attr.userns_fd);
3340         safe_close(file1_fd);
3341         safe_close(file1_fd2);
3342         safe_close(open_tree_fd);
3343
3344         return fret;
3345 }
3346
3347 static int fscaps_idmapped_mounts_in_userns_separate_userns(void)
3348 {
3349         int fret = -1;
3350         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3351         struct mount_attr attr = {
3352                 .attr_set = MOUNT_ATTR_IDMAP,
3353         };
3354         pid_t pid;
3355
3356         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3357         if (file1_fd < 0) {
3358                 log_stderr("failure: openat");
3359                 goto out;
3360         }
3361
3362         /* Skip if vfs caps are unsupported. */
3363         if (set_dummy_vfs_caps(file1_fd, 0, 1000)) {
3364                 log_stderr("failure: set_dummy_vfs_caps");
3365                 goto out;
3366         }
3367
3368         if (fremovexattr(file1_fd, "security.capability")) {
3369                 log_stderr("failure: fremovexattr");
3370                 goto out;
3371         }
3372
3373         /* change ownership of all files to uid 0 */
3374         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
3375                 log_stderr("failure: chown_r");
3376                 goto out;
3377         }
3378
3379         /* Changing mount properties on a detached mount. */
3380         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
3381         if (attr.userns_fd < 0) {
3382                 log_stderr("failure: get_userns_fd");
3383                 goto out;
3384         }
3385
3386         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3387                                      AT_EMPTY_PATH |
3388                                      AT_NO_AUTOMOUNT |
3389                                      AT_SYMLINK_NOFOLLOW |
3390                                      OPEN_TREE_CLOEXEC |
3391                                      OPEN_TREE_CLONE);
3392         if (open_tree_fd < 0) {
3393                 log_stderr("failure: sys_open_tree");
3394                 goto out;
3395         }
3396
3397         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3398                 log_stderr("failure: sys_mount_setattr");
3399                 goto out;
3400         }
3401
3402         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3403         if (file1_fd2 < 0) {
3404                 log_stderr("failure: openat");
3405                 goto out;
3406         }
3407
3408         pid = fork();
3409         if (pid < 0) {
3410                 log_stderr("failure: fork");
3411                 goto out;
3412         }
3413         if (pid == 0) {
3414                 int userns_fd;
3415
3416                 userns_fd = get_userns_fd(0, 10000, 10000);
3417                 if (userns_fd < 0)
3418                         die("failure: get_userns_fd");
3419
3420                 if (!switch_userns(userns_fd, 0, 0, false))
3421                         die("failure: switch_userns");
3422
3423                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3424                         die("failure: set fscaps");
3425
3426                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3427                         die("failure: expected_dummy_vfs_caps_uid");
3428
3429                 if (!expected_dummy_vfs_caps_uid(file1_fd, 20000) && errno != EOVERFLOW)
3430                         die("failure: expected_dummy_vfs_caps_uid");
3431
3432                 exit(EXIT_SUCCESS);
3433         }
3434
3435         if (wait_for_pid(pid))
3436                 goto out;
3437
3438         if (!expected_dummy_vfs_caps_uid(file1_fd, 20000)) {
3439                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3440                 goto out;
3441         }
3442
3443         pid = fork();
3444         if (pid < 0) {
3445                 log_stderr("failure: fork");
3446                 goto out;
3447         }
3448         if (pid == 0) {
3449                 int userns_fd;
3450
3451                 userns_fd = get_userns_fd(0, 10000, 10000);
3452                 if (userns_fd < 0)
3453                         die("failure: get_userns_fd");
3454
3455                 if (!switch_userns(userns_fd, 0, 0, false))
3456                         die("failure: switch_userns");
3457
3458                 if (fremovexattr(file1_fd2, "security.capability"))
3459                         die("failure: fremovexattr");
3460                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3461                         die("failure: expected_dummy_vfs_caps_uid");
3462                 if (errno != ENODATA)
3463                         die("failure: errno");
3464
3465                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3466                         die("failure: set_dummy_vfs_caps");
3467
3468                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3469                         die("failure: expected_dummy_vfs_caps_uid");
3470
3471                 if (!expected_dummy_vfs_caps_uid(file1_fd, 21000) && errno != EOVERFLOW)
3472                         die("failure: expected_dummy_vfs_caps_uid");
3473
3474                 exit(EXIT_SUCCESS);
3475         }
3476
3477         if (wait_for_pid(pid))
3478                 goto out;
3479
3480         if (!expected_dummy_vfs_caps_uid(file1_fd, 21000)) {
3481                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3482                 goto out;
3483         }
3484
3485         fret = 0;
3486         log_debug("Ran test");
3487 out:
3488         safe_close(attr.userns_fd);
3489         safe_close(file1_fd);
3490         safe_close(file1_fd2);
3491         safe_close(open_tree_fd);
3492
3493         return fret;
3494 }
3495
3496 /* Validate that when the IDMAP_MOUNT_TEST_RUN_SETID environment variable is set
3497  * to 1 that we are executed with setid privileges and if set to 0 we are not.
3498  * If the env variable isn't set the tests are not run.
3499  */
3500 static void __attribute__((constructor)) setuid_rexec(void)
3501 {
3502         const char *expected_euid_str, *expected_egid_str, *rexec;
3503
3504         rexec = getenv("IDMAP_MOUNT_TEST_RUN_SETID");
3505         /* This is a regular test-suite run. */
3506         if (!rexec)
3507                 return;
3508
3509         expected_euid_str = getenv("EXPECTED_EUID");
3510         expected_egid_str = getenv("EXPECTED_EGID");
3511
3512         if (expected_euid_str && expected_egid_str) {
3513                 uid_t expected_euid;
3514                 gid_t expected_egid;
3515
3516                 expected_euid = atoi(expected_euid_str);
3517                 expected_egid = atoi(expected_egid_str);
3518
3519                 if (strcmp(rexec, "1") == 0) {
3520                         /* we're expecting to run setid */
3521                         if ((getuid() != geteuid()) && (expected_euid == geteuid()) &&
3522                             (getgid() != getegid()) && (expected_egid == getegid()))
3523                                 exit(EXIT_SUCCESS);
3524                 } else if (strcmp(rexec, "0") == 0) {
3525                         /* we're expecting to not run setid */
3526                         if ((getuid() == geteuid()) && (expected_euid == geteuid()) &&
3527                             (getgid() == getegid()) && (expected_egid == getegid()))
3528                                 exit(EXIT_SUCCESS);
3529                         else
3530                                 die("failure: non-setid");
3531                 }
3532         }
3533
3534         exit(EXIT_FAILURE);
3535 }
3536
3537 /* Validate that setid transitions are handled correctly. */
3538 static int setid_binaries(void)
3539 {
3540         int fret = -1;
3541         int file1_fd = -EBADF, exec_fd = -EBADF;
3542         pid_t pid;
3543
3544         /* create a file to be used as setuid binary */
3545         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3546         if (file1_fd < 0) {
3547                 log_stderr("failure: openat");
3548                 goto out;
3549         }
3550
3551         /* open our own executable */
3552         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3553         if (exec_fd < 0) {
3554                 log_stderr("failure: openat");
3555                 goto out;
3556         }
3557
3558         /* copy our own executable into the file we created */
3559         if (fd_to_fd(exec_fd, file1_fd)) {
3560                 log_stderr("failure: fd_to_fd");
3561                 goto out;
3562         }
3563
3564         /* chown the file to the uid and gid we want to assume */
3565         if (fchown(file1_fd, 5000, 5000)) {
3566                 log_stderr("failure: fchown");
3567                 goto out;
3568         }
3569
3570         /* set the setid bits and grant execute permissions to the group */
3571         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3572                 log_stderr("failure: fchmod");
3573                 goto out;
3574         }
3575
3576         /* Verify that the sid bits got raised. */
3577         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3578                 log_stderr("failure: is_setid");
3579                 goto out;
3580         }
3581
3582         safe_close(exec_fd);
3583         safe_close(file1_fd);
3584
3585         /* Verify we run setid binary as uid and gid 5000 from the original
3586          * mount.
3587          */
3588         pid = fork();
3589         if (pid < 0) {
3590                 log_stderr("failure: fork");
3591                 goto out;
3592         }
3593         if (pid == 0) {
3594                 static char *envp[] = {
3595                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3596                         "EXPECTED_EUID=5000",
3597                         "EXPECTED_EGID=5000",
3598                         NULL,
3599                 };
3600                 static char *argv[] = {
3601                         NULL,
3602                 };
3603
3604                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 5000, 5000))
3605                         die("failure: expected_uid_gid");
3606
3607                 sys_execveat(t_dir1_fd, FILE1, argv, envp, 0);
3608                 die("failure: sys_execveat");
3609
3610                 exit(EXIT_FAILURE);
3611         }
3612         if (wait_for_pid(pid))
3613                 goto out;
3614
3615         fret = 0;
3616         log_debug("Ran test");
3617 out:
3618
3619         return fret;
3620 }
3621
3622 /* Validate that setid transitions are handled correctly on idmapped mounts. */
3623 static int setid_binaries_idmapped_mounts(void)
3624 {
3625         int fret = -1;
3626         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3627         struct mount_attr attr = {
3628                 .attr_set = MOUNT_ATTR_IDMAP,
3629         };
3630         pid_t pid;
3631
3632         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3633                 log_stderr("failure: mkdirat");
3634                 goto out;
3635         }
3636
3637         /* create a file to be used as setuid binary */
3638         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3639         if (file1_fd < 0) {
3640                 log_stderr("failure: openat");
3641                 goto out;
3642         }
3643
3644         /* open our own executable */
3645         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3646         if (exec_fd < 0) {
3647                 log_stderr("failure:openat ");
3648                 goto out;
3649         }
3650
3651         /* copy our own executable into the file we created */
3652         if (fd_to_fd(exec_fd, file1_fd)) {
3653                 log_stderr("failure: fd_to_fd");
3654                 goto out;
3655         }
3656
3657         /* chown the file to the uid and gid we want to assume */
3658         if (fchown(file1_fd, 5000, 5000)) {
3659                 log_stderr("failure: fchown");
3660                 goto out;
3661         }
3662
3663         /* set the setid bits and grant execute permissions to the group */
3664         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3665                 log_stderr("failure: fchmod");
3666                 goto out;
3667         }
3668
3669         /* Verify that the sid bits got raised. */
3670         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3671                 log_stderr("failure: is_setid");
3672                 goto out;
3673         }
3674
3675         safe_close(exec_fd);
3676         safe_close(file1_fd);
3677
3678         /* Changing mount properties on a detached mount. */
3679         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3680         if (attr.userns_fd < 0) {
3681                 log_stderr("failure: get_userns_fd");
3682                 goto out;
3683         }
3684
3685         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3686                                      AT_EMPTY_PATH |
3687                                      AT_NO_AUTOMOUNT |
3688                                      AT_SYMLINK_NOFOLLOW |
3689                                      OPEN_TREE_CLOEXEC |
3690                                      OPEN_TREE_CLONE);
3691         if (open_tree_fd < 0) {
3692                 log_stderr("failure: sys_open_tree");
3693                 goto out;
3694         }
3695
3696         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3697                 log_stderr("failure: sys_mount_setattr");
3698                 goto out;
3699         }
3700
3701         /* A detached mount will have an anonymous mount namespace attached to
3702          * it. This means that we can't execute setid binaries on a detached
3703          * mount because the mnt_may_suid() helper will fail the check_mount()
3704          * part of its check which compares the caller's mount namespace to the
3705          * detached mount's mount namespace. Since by definition an anonymous
3706          * mount namespace is not equale to any mount namespace currently in
3707          * use this can't work. So attach the mount to the filesystem first
3708          * before performing this check.
3709          */
3710         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3711                 log_stderr("failure: sys_move_mount");
3712                 goto out;
3713         }
3714
3715         /* Verify we run setid binary as uid and gid 10000 from idmapped mount mount. */
3716         pid = fork();
3717         if (pid < 0) {
3718                 log_stderr("failure: fork");
3719                 goto out;
3720         }
3721         if (pid == 0) {
3722                 static char *envp[] = {
3723                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3724                         "EXPECTED_EUID=15000",
3725                         "EXPECTED_EGID=15000",
3726                         NULL,
3727                 };
3728                 static char *argv[] = {
3729                         NULL,
3730                 };
3731
3732                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 15000, 15000))
3733                         die("failure: expected_uid_gid");
3734
3735                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3736                 die("failure: sys_execveat");
3737
3738                 exit(EXIT_FAILURE);
3739         }
3740
3741         if (wait_for_pid(pid))
3742                 goto out;
3743
3744         fret = 0;
3745         log_debug("Ran test");
3746 out:
3747         safe_close(exec_fd);
3748         safe_close(file1_fd);
3749         safe_close(open_tree_fd);
3750
3751         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
3752         sys_umount2(t_buf, MNT_DETACH);
3753         rm_r(t_mnt_fd, DIR1);
3754
3755         return fret;
3756 }
3757
3758 /* Validate that setid transitions are handled correctly on idmapped mounts
3759  * running in a user namespace where the uid and gid of the setid binary have no
3760  * mapping.
3761  */
3762 static int setid_binaries_idmapped_mounts_in_userns(void)
3763 {
3764         int fret = -1;
3765         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3766         struct mount_attr attr = {
3767                 .attr_set = MOUNT_ATTR_IDMAP,
3768         };
3769         pid_t pid;
3770
3771         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3772                 log_stderr("failure: ");
3773                 goto out;
3774         }
3775
3776         /* create a file to be used as setuid binary */
3777         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3778         if (file1_fd < 0) {
3779                 log_stderr("failure: openat");
3780                 goto out;
3781         }
3782
3783         /* open our own executable */
3784         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3785         if (exec_fd < 0) {
3786                 log_stderr("failure: openat");
3787                 goto out;
3788         }
3789
3790         /* copy our own executable into the file we created */
3791         if (fd_to_fd(exec_fd, file1_fd)) {
3792                 log_stderr("failure: fd_to_fd");
3793                 goto out;
3794         }
3795
3796         safe_close(exec_fd);
3797
3798         /* chown the file to the uid and gid we want to assume */
3799         if (fchown(file1_fd, 5000, 5000)) {
3800                 log_stderr("failure: fchown");
3801                 goto out;
3802         }
3803
3804         /* set the setid bits and grant execute permissions to the group */
3805         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3806                 log_stderr("failure: fchmod");
3807                 goto out;
3808         }
3809
3810         /* Verify that the sid bits got raised. */
3811         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3812                 log_stderr("failure: is_setid");
3813                 goto out;
3814         }
3815
3816         safe_close(file1_fd);
3817
3818         /* Changing mount properties on a detached mount. */
3819         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3820         if (attr.userns_fd < 0) {
3821                 log_stderr("failure: get_userns_fd");
3822                 goto out;
3823         }
3824
3825         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3826                                      AT_EMPTY_PATH |
3827                                      AT_NO_AUTOMOUNT |
3828                                      AT_SYMLINK_NOFOLLOW |
3829                                      OPEN_TREE_CLOEXEC |
3830                                      OPEN_TREE_CLONE);
3831         if (open_tree_fd < 0) {
3832                 log_stderr("failure: sys_open_tree");
3833                 goto out;
3834         }
3835
3836         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3837                 log_stderr("failure: sys_mount_setattr");
3838                 goto out;
3839         }
3840
3841         /* A detached mount will have an anonymous mount namespace attached to
3842          * it. This means that we can't execute setid binaries on a detached
3843          * mount because the mnt_may_suid() helper will fail the check_mount()
3844          * part of its check which compares the caller's mount namespace to the
3845          * detached mount's mount namespace. Since by definition an anonymous
3846          * mount namespace is not equale to any mount namespace currently in
3847          * use this can't work. So attach the mount to the filesystem first
3848          * before performing this check.
3849          */
3850         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3851                 log_stderr("failure: sys_move_mount");
3852                 goto out;
3853         }
3854
3855         pid = fork();
3856         if (pid < 0) {
3857                 log_stderr("failure: fork");
3858                 goto out;
3859         }
3860         if (pid == 0) {
3861                 static char *envp[] = {
3862                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3863                         "EXPECTED_EUID=5000",
3864                         "EXPECTED_EGID=5000",
3865                         NULL,
3866                 };
3867                 static char *argv[] = {
3868                         NULL,
3869                 };
3870
3871                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3872                         die("failure: switch_userns");
3873
3874                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
3875                         die("failure: expected_uid_gid");
3876
3877                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3878                 die("failure: sys_execveat");
3879
3880                 exit(EXIT_FAILURE);
3881         }
3882
3883         if (wait_for_pid(pid)) {
3884                 log_stderr("failure: wait_for_pid");
3885                 goto out;
3886         }
3887
3888         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3889         if (file1_fd < 0) {
3890                 log_stderr("failure: openat");
3891                 goto out;
3892         }
3893
3894         /* chown the file to the uid and gid we want to assume */
3895         if (fchown(file1_fd, 0, 0)) {
3896                 log_stderr("failure: fchown");
3897                 goto out;
3898         }
3899
3900         /* set the setid bits and grant execute permissions to the group */
3901         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3902                 log_stderr("failure: fchmod");
3903                 goto out;
3904         }
3905
3906         /* Verify that the sid bits got raised. */
3907         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3908                 log_stderr("failure: is_setid");
3909                 goto out;
3910         }
3911
3912         safe_close(file1_fd);
3913
3914         pid = fork();
3915         if (pid < 0) {
3916                 log_stderr("failure: fork");
3917                 goto out;
3918         }
3919         if (pid == 0) {
3920                 static char *envp[] = {
3921                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3922                         "EXPECTED_EUID=0",
3923                         "EXPECTED_EGID=0",
3924                         NULL,
3925                 };
3926                 static char *argv[] = {
3927                         NULL,
3928                 };
3929
3930                 if (!caps_supported()) {
3931                         log_debug("skip: capability library not installed");
3932                         exit(EXIT_SUCCESS);
3933                 }
3934
3935                 if (!switch_userns(attr.userns_fd, 5000, 5000, true))
3936                         die("failure: switch_userns");
3937
3938                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
3939                         die("failure: expected_uid_gid");
3940
3941                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3942                 die("failure: sys_execveat");
3943
3944                 exit(EXIT_FAILURE);
3945         }
3946
3947         if (wait_for_pid(pid)) {
3948                 log_stderr("failure: wait_for_pid");
3949                 goto out;
3950         }
3951
3952         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3953         if (file1_fd < 0) {
3954                 log_stderr("failure: openat");
3955                 goto out;
3956         }
3957
3958         /* chown the file to the uid and gid we want to assume */
3959         if (fchown(file1_fd, 30000, 30000)) {
3960                 log_stderr("failure: fchown");
3961                 goto out;
3962         }
3963
3964         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
3965                 log_stderr("failure: fchmod");
3966                 goto out;
3967         }
3968
3969         /* Verify that the sid bits got raised. */
3970         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3971                 log_stderr("failure: is_setid");
3972                 goto out;
3973         }
3974
3975         safe_close(file1_fd);
3976
3977         /* Verify that we can't assume a uid and gid of a setid binary for which
3978          * we have no mapping in our user namespace.
3979          */
3980         pid = fork();
3981         if (pid < 0) {
3982                 log_stderr("failure: fork");
3983                 goto out;
3984         }
3985         if (pid == 0) {
3986                 char expected_euid[100];
3987                 char expected_egid[100];
3988                 static char *envp[4] = {
3989                         NULL,
3990                         NULL,
3991                         NULL,
3992                         NULL,
3993                 };
3994                 static char *argv[] = {
3995                         NULL,
3996                 };
3997
3998                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3999                         die("failure: switch_userns");
4000
4001                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4002                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4003                 envp[1] = expected_euid;
4004                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4005                 envp[2] = expected_egid;
4006
4007                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4008                         die("failure: expected_uid_gid");
4009
4010                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4011                 die("failure: sys_execveat");
4012
4013                 exit(EXIT_FAILURE);
4014         }
4015
4016         if (wait_for_pid(pid)) {
4017                 log_stderr("failure: wait_for_pid");
4018                 goto out;
4019         }
4020
4021         fret = 0;
4022         log_debug("Ran test");
4023 out:
4024         safe_close(attr.userns_fd);
4025         safe_close(exec_fd);
4026         safe_close(file1_fd);
4027         safe_close(open_tree_fd);
4028
4029         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4030         sys_umount2(t_buf, MNT_DETACH);
4031         rm_r(t_mnt_fd, DIR1);
4032
4033         return fret;
4034 }
4035
4036 /* Validate that setid transitions are handled correctly on idmapped mounts
4037  * running in a user namespace where the uid and gid of the setid binary have no
4038  * mapping.
4039  */
4040 static int setid_binaries_idmapped_mounts_in_userns_separate_userns(void)
4041 {
4042         int fret = -1;
4043         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
4044         struct mount_attr attr = {
4045                 .attr_set = MOUNT_ATTR_IDMAP,
4046         };
4047         pid_t pid;
4048
4049         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
4050                 log_stderr("failure: mkdirat");
4051                 goto out;
4052         }
4053
4054         /* create a file to be used as setuid binary */
4055         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
4056         if (file1_fd < 0) {
4057                 log_stderr("failure: openat");
4058                 goto out;
4059         }
4060
4061         /* open our own executable */
4062         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
4063         if (exec_fd < 0) {
4064                 log_stderr("failure: openat");
4065                 goto out;
4066         }
4067
4068         /* copy our own executable into the file we created */
4069         if (fd_to_fd(exec_fd, file1_fd)) {
4070                 log_stderr("failure: fd_to_fd");
4071                 goto out;
4072         }
4073
4074         safe_close(exec_fd);
4075
4076         /* change ownership of all files to uid 0 */
4077         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
4078                 log_stderr("failure: chown_r");
4079                 goto out;
4080         }
4081
4082         /* chown the file to the uid and gid we want to assume */
4083         if (fchown(file1_fd, 25000, 25000)) {
4084                 log_stderr("failure: fchown");
4085                 goto out;
4086         }
4087
4088         /* set the setid bits and grant execute permissions to the group */
4089         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4090                 log_stderr("failure: fchmod");
4091                 goto out;
4092         }
4093
4094         /* Verify that the sid bits got raised. */
4095         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4096                 log_stderr("failure: is_setid");
4097                 goto out;
4098         }
4099
4100         safe_close(file1_fd);
4101
4102         /* Changing mount properties on a detached mount. */
4103         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
4104         if (attr.userns_fd < 0) {
4105                 log_stderr("failure: get_userns_fd");
4106                 goto out;
4107         }
4108
4109         open_tree_fd = sys_open_tree(t_dir1_fd, "",
4110                                      AT_EMPTY_PATH |
4111                                      AT_NO_AUTOMOUNT |
4112                                      AT_SYMLINK_NOFOLLOW |
4113                                      OPEN_TREE_CLOEXEC |
4114                                      OPEN_TREE_CLONE);
4115         if (open_tree_fd < 0) {
4116                 log_stderr("failure: sys_open_tree");
4117                 goto out;
4118         }
4119
4120         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4121                 log_stderr("failure: sys_mount_setattr");
4122                 goto out;
4123         }
4124
4125         /* A detached mount will have an anonymous mount namespace attached to
4126          * it. This means that we can't execute setid binaries on a detached
4127          * mount because the mnt_may_suid() helper will fail the check_mount()
4128          * part of its check which compares the caller's mount namespace to the
4129          * detached mount's mount namespace. Since by definition an anonymous
4130          * mount namespace is not equale to any mount namespace currently in
4131          * use this can't work. So attach the mount to the filesystem first
4132          * before performing this check.
4133          */
4134         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
4135                 log_stderr("failure: sys_move_mount");
4136                 goto out;
4137         }
4138
4139         pid = fork();
4140         if (pid < 0) {
4141                 log_stderr("failure: fork");
4142                 goto out;
4143         }
4144         if (pid == 0) {
4145                 int userns_fd;
4146                 static char *envp[] = {
4147                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4148                         "EXPECTED_EUID=5000",
4149                         "EXPECTED_EGID=5000",
4150                         NULL,
4151                 };
4152                 static char *argv[] = {
4153                         NULL,
4154                 };
4155
4156                 userns_fd = get_userns_fd(0, 10000, 10000);
4157                 if (userns_fd < 0)
4158                         die("failure: get_userns_fd");
4159
4160                 if (!switch_userns(userns_fd, 0, 0, false))
4161                         die("failure: switch_userns");
4162
4163                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
4164                         die("failure: expected_uid_gid");
4165
4166                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4167                 die("failure: sys_execveat");
4168
4169                 exit(EXIT_FAILURE);
4170         }
4171
4172         if (wait_for_pid(pid)) {
4173                 log_stderr("failure: wait_for_pid");
4174                 goto out;
4175         }
4176
4177         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4178         if (file1_fd < 0) {
4179                 log_stderr("failure: openat");
4180                 goto out;
4181         }
4182
4183         /* chown the file to the uid and gid we want to assume */
4184         if (fchown(file1_fd, 20000, 20000)) {
4185                 log_stderr("failure: fchown");
4186                 goto out;
4187         }
4188
4189         /* set the setid bits and grant execute permissions to the group */
4190         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4191                 log_stderr("failure: fchmod");
4192                 goto out;
4193         }
4194
4195         /* Verify that the sid bits got raised. */
4196         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4197                 log_stderr("failure: is_setid");
4198                 goto out;
4199         }
4200
4201         safe_close(file1_fd);
4202
4203         pid = fork();
4204         if (pid < 0) {
4205                 log_stderr("failure: fork");
4206                 goto out;
4207         }
4208         if (pid == 0) {
4209                 int userns_fd;
4210                 static char *envp[] = {
4211                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4212                         "EXPECTED_EUID=0",
4213                         "EXPECTED_EGID=0",
4214                         NULL,
4215                 };
4216                 static char *argv[] = {
4217                         NULL,
4218                 };
4219
4220                 userns_fd = get_userns_fd(0, 10000, 10000);
4221                 if (userns_fd < 0)
4222                         die("failure: get_userns_fd");
4223
4224                 if (!caps_supported()) {
4225                         log_debug("skip: capability library not installed");
4226                         exit(EXIT_SUCCESS);
4227                 }
4228
4229                 if (!switch_userns(userns_fd, 1000, 1000, true))
4230                         die("failure: switch_userns");
4231
4232                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
4233                         die("failure: expected_uid_gid");
4234
4235                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4236                 die("failure: sys_execveat");
4237
4238                 exit(EXIT_FAILURE);
4239         }
4240         if (wait_for_pid(pid)) {
4241                 log_stderr("failure: wait_for_pid");
4242                 goto out;
4243         }
4244
4245         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4246         if (file1_fd < 0) {
4247                 log_stderr("failure: openat");
4248                 goto out;
4249         }
4250
4251         /* chown the file to the uid and gid we want to assume */
4252         if (fchown(file1_fd, 0, 0)) {
4253                 log_stderr("failure: fchown");
4254                 goto out;
4255         }
4256
4257         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
4258                 log_stderr("failure: fchmod");
4259                 goto out;
4260         }
4261
4262         /* Verify that the sid bits got raised. */
4263         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4264                 log_stderr("failure: is_setid");
4265                 goto out;
4266         }
4267
4268         safe_close(file1_fd);
4269
4270         /* Verify that we can't assume a uid and gid of a setid binary for
4271          * which we have no mapping in our user namespace.
4272          */
4273         pid = fork();
4274         if (pid < 0) {
4275                 log_stderr("failure: fork");
4276                 goto out;
4277         }
4278         if (pid == 0) {
4279                 int userns_fd;
4280                 char expected_euid[100];
4281                 char expected_egid[100];
4282                 static char *envp[4] = {
4283                         NULL,
4284                         NULL,
4285                         NULL,
4286                         NULL,
4287                 };
4288                 static char *argv[] = {
4289                         NULL,
4290                 };
4291
4292                 userns_fd = get_userns_fd(0, 10000, 10000);
4293                 if (userns_fd < 0)
4294                         die("failure: get_userns_fd");
4295
4296                 if (!switch_userns(userns_fd, 0, 0, false))
4297                         die("failure: switch_userns");
4298
4299                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4300                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4301                 envp[1] = expected_euid;
4302                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4303                 envp[2] = expected_egid;
4304
4305                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4306                         die("failure: expected_uid_gid");
4307
4308                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4309                 die("failure: sys_execveat");
4310
4311                 exit(EXIT_FAILURE);
4312         }
4313         if (wait_for_pid(pid)) {
4314                 log_stderr("failure: wait_for_pid");
4315                 goto out;
4316         }
4317
4318         fret = 0;
4319         log_debug("Ran test");
4320 out:
4321         safe_close(attr.userns_fd);
4322         safe_close(exec_fd);
4323         safe_close(file1_fd);
4324         safe_close(open_tree_fd);
4325
4326         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4327         sys_umount2(t_buf, MNT_DETACH);
4328         rm_r(t_mnt_fd, DIR1);
4329
4330         return fret;
4331 }
4332
4333 static int sticky_bit_unlink(void)
4334 {
4335         int fret = -1;
4336         int dir_fd = -EBADF;
4337         pid_t pid;
4338
4339         if (!caps_supported())
4340                 return 0;
4341
4342         /* create directory */
4343         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4344                 log_stderr("failure: mkdirat");
4345                 goto out;
4346         }
4347
4348         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4349         if (dir_fd < 0) {
4350                 log_stderr("failure: openat");
4351                 goto out;
4352         }
4353
4354         if (fchown(dir_fd, 0, 0)) {
4355                 log_stderr("failure: fchown");
4356                 goto out;
4357         }
4358
4359         if (fchmod(dir_fd, 0777)) {
4360                 log_stderr("failure: fchmod");
4361                 goto out;
4362         }
4363
4364         /* create regular file via mknod */
4365         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4366                 log_stderr("failure: mknodat");
4367                 goto out;
4368         }
4369         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4370                 log_stderr("failure: fchownat");
4371                 goto out;
4372         }
4373         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4374                 log_stderr("failure: fchmodat");
4375                 goto out;
4376         }
4377
4378         /* create regular file via mknod */
4379         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4380                 log_stderr("failure: mknodat");
4381                 goto out;
4382         }
4383         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4384                 log_stderr("failure: fchownat");
4385                 goto out;
4386         }
4387         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4388                 log_stderr("failure: fchmodat");
4389                 goto out;
4390         }
4391
4392         /* The sticky bit is not set so we must be able to delete files not
4393          * owned by us.
4394          */
4395         pid = fork();
4396         if (pid < 0) {
4397                 log_stderr("failure: fork");
4398                 goto out;
4399         }
4400         if (pid == 0) {
4401                 if (!switch_ids(1000, 1000))
4402                         die("failure: switch_ids");
4403
4404                 if (unlinkat(dir_fd, FILE1, 0))
4405                         die("failure: unlinkat");
4406
4407                 if (unlinkat(dir_fd, FILE2, 0))
4408                         die("failure: unlinkat");
4409
4410                 exit(EXIT_SUCCESS);
4411         }
4412         if (wait_for_pid(pid)) {
4413                 log_stderr("failure: wait_for_pid");
4414                 goto out;
4415         }
4416
4417         /* set sticky bit */
4418         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4419                 log_stderr("failure: fchmod");
4420                 goto out;
4421         }
4422
4423         /* validate sticky bit is set */
4424         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4425                 log_stderr("failure: is_sticky");
4426                 goto out;
4427         }
4428
4429         /* create regular file via mknod */
4430         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4431                 log_stderr("failure: mknodat");
4432                 goto out;
4433         }
4434         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4435                 log_stderr("failure: fchownat");
4436                 goto out;
4437         }
4438         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4439                 log_stderr("failure: fchmodat");
4440                 goto out;
4441         }
4442
4443         /* create regular file via mknod */
4444         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4445                 log_stderr("failure: mknodat");
4446                 goto out;
4447         }
4448         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4449                 log_stderr("failure: fchownat");
4450                 goto out;
4451         }
4452         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4453                 log_stderr("failure: fchmodat");
4454                 goto out;
4455         }
4456
4457         /* The sticky bit is set so we must not be able to delete files not
4458          * owned by us.
4459          */
4460         pid = fork();
4461         if (pid < 0) {
4462                 log_stderr("failure: fork");
4463                 goto out;
4464         }
4465         if (pid == 0) {
4466                 if (!switch_ids(1000, 1000))
4467                         die("failure: switch_ids");
4468
4469                 if (!unlinkat(dir_fd, FILE1, 0))
4470                         die("failure: unlinkat");
4471                 if (errno != EPERM)
4472                         die("failure: errno");
4473
4474                 if (!unlinkat(dir_fd, FILE2, 0))
4475                         die("failure: unlinkat");
4476                 if (errno != EPERM)
4477                         die("failure: errno");
4478
4479                 exit(EXIT_SUCCESS);
4480         }
4481         if (wait_for_pid(pid)) {
4482                 log_stderr("failure: wait_for_pid");
4483                 goto out;
4484         }
4485
4486         /* The sticky bit is set and we own the files so we must be able to
4487          * delete the files now.
4488          */
4489         pid = fork();
4490         if (pid < 0) {
4491                 log_stderr("failure: fork");
4492                 goto out;
4493         }
4494         if (pid == 0) {
4495                 /* change ownership */
4496                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
4497                         die("failure: fchownat");
4498                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
4499                         die("failure: expected_uid_gid");
4500                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
4501                         die("failure: fchownat");
4502                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
4503                         die("failure: expected_uid_gid");
4504
4505                 if (!switch_ids(1000, 1000))
4506                         die("failure: switch_ids");
4507
4508                 if (unlinkat(dir_fd, FILE1, 0))
4509                         die("failure: unlinkat");
4510
4511                 if (unlinkat(dir_fd, FILE2, 0))
4512                         die("failure: unlinkat");
4513
4514                 exit(EXIT_SUCCESS);
4515         }
4516         if (wait_for_pid(pid)) {
4517                 log_stderr("failure: wait_for_pid");
4518                 goto out;
4519         }
4520
4521         /* change uid to unprivileged user */
4522         if (fchown(dir_fd, 1000, -1)) {
4523                 log_stderr("failure: fchown");
4524                 goto out;
4525         }
4526         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4527                 log_stderr("failure: fchmod");
4528                 goto out;
4529         }
4530         /* validate sticky bit is set */
4531         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4532                 log_stderr("failure: is_sticky");
4533                 goto out;
4534         }
4535
4536         /* create regular file via mknod */
4537         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4538                 log_stderr("failure: mknodat");
4539                 goto out;
4540         }
4541         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4542                 log_stderr("failure: fchownat");
4543                 goto out;
4544         }
4545         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4546                 log_stderr("failure: fchmodat");
4547                 goto out;
4548         }
4549
4550         /* create regular file via mknod */
4551         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4552                 log_stderr("failure: mknodat");
4553                 goto out;
4554         }
4555         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4556                 log_stderr("failure: fchownat");
4557                 goto out;
4558         }
4559         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4560                 log_stderr("failure: fchmodat");
4561                 goto out;
4562         }
4563
4564         /* The sticky bit is set and we own the directory so we must be able to
4565          * delete the files now.
4566          */
4567         pid = fork();
4568         if (pid < 0) {
4569                 log_stderr("failure: fork");
4570                 goto out;
4571         }
4572         if (pid == 0) {
4573                 if (!switch_ids(1000, 1000))
4574                         die("failure: switch_ids");
4575
4576                 if (unlinkat(dir_fd, FILE1, 0))
4577                         die("failure: unlinkat");
4578
4579                 if (unlinkat(dir_fd, FILE2, 0))
4580                         die("failure: unlinkat");
4581
4582                 exit(EXIT_SUCCESS);
4583         }
4584         if (wait_for_pid(pid)) {
4585                 log_stderr("failure: wait_for_pid");
4586                 goto out;
4587         }
4588
4589         fret = 0;
4590         log_debug("Ran test");
4591 out:
4592         safe_close(dir_fd);
4593
4594         return fret;
4595 }
4596
4597 static int sticky_bit_unlink_idmapped_mounts(void)
4598 {
4599         int fret = -1;
4600         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4601         struct mount_attr attr = {
4602                 .attr_set = MOUNT_ATTR_IDMAP,
4603         };
4604         pid_t pid;
4605
4606         if (!caps_supported())
4607                 return 0;
4608
4609         /* create directory */
4610         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4611                 log_stderr("failure: mkdirat");
4612                 goto out;
4613         }
4614
4615         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4616         if (dir_fd < 0) {
4617                 log_stderr("failure: openat");
4618                 goto out;
4619         }
4620         if (fchown(dir_fd, 10000, 10000)) {
4621                 log_stderr("failure: fchown");
4622                 goto out;
4623         }
4624         if (fchmod(dir_fd, 0777)) {
4625                 log_stderr("failure: fchmod");
4626                 goto out;
4627         }
4628
4629         /* create regular file via mknod */
4630         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4631                 log_stderr("failure: mknodat");
4632                 goto out;
4633         }
4634         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4635                 log_stderr("failure: fchownat");
4636                 goto out;
4637         }
4638         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4639                 log_stderr("failure: fchmodat");
4640                 goto out;
4641         }
4642
4643         /* create regular file via mknod */
4644         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4645                 log_stderr("failure: mknodat");
4646                 goto out;
4647         }
4648         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4649                 log_stderr("failure: fchownat");
4650                 goto out;
4651         }
4652         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4653                 log_stderr("failure: fchmodat");
4654                 goto out;
4655         }
4656
4657         /* Changing mount properties on a detached mount. */
4658         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
4659         if (attr.userns_fd < 0) {
4660                 log_stderr("failure: get_userns_fd");
4661                 goto out;
4662         }
4663
4664         open_tree_fd = sys_open_tree(dir_fd, "",
4665                                      AT_EMPTY_PATH |
4666                                      AT_NO_AUTOMOUNT |
4667                                      AT_SYMLINK_NOFOLLOW |
4668                                      OPEN_TREE_CLOEXEC |
4669                                      OPEN_TREE_CLONE);
4670         if (open_tree_fd < 0) {
4671                 log_stderr("failure: sys_open_tree");
4672                 goto out;
4673         }
4674
4675         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4676                 log_stderr("failure: sys_mount_setattr");
4677                 goto out;
4678         }
4679
4680         /* The sticky bit is not set so we must be able to delete files not
4681          * owned by us.
4682          */
4683         pid = fork();
4684         if (pid < 0) {
4685                 log_stderr("failure: fork");
4686                 goto out;
4687         }
4688         if (pid == 0) {
4689                 if (!switch_ids(1000, 1000))
4690                         die("failure: switch_ids");
4691
4692                 if (unlinkat(open_tree_fd, FILE1, 0))
4693                         die("failure: unlinkat");
4694
4695                 if (unlinkat(open_tree_fd, FILE2, 0))
4696                         die("failure: unlinkat");
4697
4698                 exit(EXIT_SUCCESS);
4699         }
4700         if (wait_for_pid(pid)) {
4701                 log_stderr("failure: wait_for_pid");
4702                 goto out;
4703         }
4704
4705         /* set sticky bit */
4706         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4707                 log_stderr("failure: fchmod");
4708                 goto out;
4709         }
4710
4711         /* validate sticky bit is set */
4712         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4713                 log_stderr("failure: is_sticky");
4714                 goto out;
4715         }
4716
4717         /* create regular file via mknod */
4718         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4719                 log_stderr("failure: mknodat");
4720                 goto out;
4721         }
4722         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4723                 log_stderr("failure: fchownat");
4724                 goto out;
4725         }
4726         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4727                 log_stderr("failure: fchmodat");
4728                 goto out;
4729         }
4730
4731         /* create regular file via mknod */
4732         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4733                 log_stderr("failure: mknodat");
4734                 goto out;
4735         }
4736         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4737                 log_stderr("failure: fchownat");
4738                 goto out;
4739         }
4740         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4741                 log_stderr("failure: fchmodat");
4742                 goto out;
4743         }
4744
4745         /* The sticky bit is set so we must not be able to delete files not
4746          * owned by us.
4747          */
4748         pid = fork();
4749         if (pid < 0) {
4750                 log_stderr("failure: fork");
4751                 goto out;
4752         }
4753         if (pid == 0) {
4754                 if (!switch_ids(1000, 1000))
4755                         die("failure: switch_ids");
4756
4757                 if (!unlinkat(open_tree_fd, FILE1, 0))
4758                         die("failure: unlinkat");
4759                 if (errno != EPERM)
4760                         die("failure: errno");
4761
4762                 if (!unlinkat(open_tree_fd, FILE2, 0))
4763                         die("failure: unlinkat");
4764                 if (errno != EPERM)
4765                         die("failure: errno");
4766
4767                 exit(EXIT_SUCCESS);
4768         }
4769         if (wait_for_pid(pid)) {
4770                 log_stderr("failure: wait_for_pid");
4771                 goto out;
4772         }
4773
4774         /* The sticky bit is set and we own the files so we must be able to
4775          * delete the files now.
4776          */
4777         pid = fork();
4778         if (pid < 0) {
4779                 log_stderr("failure: fork");
4780                 goto out;
4781         }
4782         if (pid == 0) {
4783                 /* change ownership */
4784                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
4785                         die("failure: fchownat");
4786                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
4787                         die("failure: expected_uid_gid");
4788                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
4789                         die("failure: fchownat");
4790                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
4791                         die("failure: expected_uid_gid");
4792
4793                 if (!switch_ids(1000, 1000))
4794                         die("failure: switch_ids");
4795
4796                 if (unlinkat(open_tree_fd, FILE1, 0))
4797                         die("failure: unlinkat");
4798
4799                 if (unlinkat(open_tree_fd, FILE2, 0))
4800                         die("failure: unlinkat");
4801
4802                 exit(EXIT_SUCCESS);
4803         }
4804         if (wait_for_pid(pid)) {
4805                 log_stderr("failure: wait_for_pid");
4806                 goto out;
4807         }
4808
4809         /* change uid to unprivileged user */
4810         if (fchown(dir_fd, 11000, -1)) {
4811                 log_stderr("failure: fchown");
4812                 goto out;
4813         }
4814         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4815                 log_stderr("failure: fchmod");
4816                 goto out;
4817         }
4818         /* validate sticky bit is set */
4819         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4820                 log_stderr("failure: is_sticky");
4821                 goto out;
4822         }
4823
4824         /* create regular file via mknod */
4825         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4826                 log_stderr("failure: mknodat");
4827                 goto out;
4828         }
4829         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4830                 log_stderr("failure: fchownat");
4831                 goto out;
4832         }
4833         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4834                 log_stderr("failure: fchmodat");
4835                 goto out;
4836         }
4837
4838         /* create regular file via mknod */
4839         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4840                 log_stderr("failure: mknodat");
4841                 goto out;
4842         }
4843         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4844                 log_stderr("failure: fchownat");
4845                 goto out;
4846         }
4847         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4848                 log_stderr("failure: fchmodat");
4849                 goto out;
4850         }
4851
4852         /* The sticky bit is set and we own the directory so we must be able to
4853          * delete the files now.
4854          */
4855         pid = fork();
4856         if (pid < 0) {
4857                 log_stderr("failure: fork");
4858                 goto out;
4859         }
4860         if (pid == 0) {
4861                 if (!switch_ids(1000, 1000))
4862                         die("failure: switch_ids");
4863
4864                 if (unlinkat(open_tree_fd, FILE1, 0))
4865                         die("failure: unlinkat");
4866
4867                 if (unlinkat(open_tree_fd, FILE2, 0))
4868                         die("failure: unlinkat");
4869
4870                 exit(EXIT_SUCCESS);
4871         }
4872         if (wait_for_pid(pid)) {
4873                 log_stderr("failure: wait_for_pid");
4874                 goto out;
4875         }
4876
4877         fret = 0;
4878         log_debug("Ran test");
4879 out:
4880         safe_close(attr.userns_fd);
4881         safe_close(dir_fd);
4882         safe_close(open_tree_fd);
4883
4884         return fret;
4885 }
4886
4887 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
4888  * operations in a user namespace.
4889  */
4890 static int sticky_bit_unlink_idmapped_mounts_in_userns(void)
4891 {
4892         int fret = -1;
4893         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4894         struct mount_attr attr = {
4895                 .attr_set = MOUNT_ATTR_IDMAP,
4896         };
4897         pid_t pid;
4898
4899         if (!caps_supported())
4900                 return 0;
4901
4902         /* create directory */
4903         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4904                 log_stderr("failure: mkdirat");
4905                 goto out;
4906         }
4907
4908         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4909         if (dir_fd < 0) {
4910                 log_stderr("failure: openat");
4911                 goto out;
4912         }
4913         if (fchown(dir_fd, 0, 0)) {
4914                 log_stderr("failure: fchown");
4915                 goto out;
4916         }
4917         if (fchmod(dir_fd, 0777)) {
4918                 log_stderr("failure: fchmod");
4919                 goto out;
4920         }
4921
4922         /* create regular file via mknod */
4923         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4924                 log_stderr("failure: mknodat");
4925                 goto out;
4926         }
4927         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4928                 log_stderr("failure: fchownat");
4929                 goto out;
4930         }
4931         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4932                 log_stderr("failure: fchmodat");
4933                 goto out;
4934         }
4935
4936         /* create regular file via mknod */
4937         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4938                 log_stderr("failure: mknodat");
4939                 goto out;
4940         }
4941         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4942                 log_stderr("failure: fchownat");
4943                 goto out;
4944         }
4945         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4946                 log_stderr("failure: fchmodat");
4947                 goto out;
4948         }
4949
4950         /* Changing mount properties on a detached mount. */
4951         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
4952         if (attr.userns_fd < 0) {
4953                 log_stderr("failure: get_userns_fd");
4954                 goto out;
4955         }
4956
4957         open_tree_fd = sys_open_tree(dir_fd, "",
4958                                      AT_EMPTY_PATH |
4959                                      AT_NO_AUTOMOUNT |
4960                                      AT_SYMLINK_NOFOLLOW |
4961                                      OPEN_TREE_CLOEXEC |
4962                                      OPEN_TREE_CLONE);
4963         if (open_tree_fd < 0) {
4964                 log_stderr("failure: sys_open_tree");
4965                 goto out;
4966         }
4967
4968         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4969                 log_stderr("failure: sys_mount_setattr");
4970                 goto out;
4971         }
4972
4973         /* The sticky bit is not set so we must be able to delete files not
4974          * owned by us.
4975          */
4976         pid = fork();
4977         if (pid < 0) {
4978                 log_stderr("failure: fork");
4979                 goto out;
4980         }
4981         if (pid == 0) {
4982                 if (!caps_supported()) {
4983                         log_debug("skip: capability library not installed");
4984                         exit(EXIT_SUCCESS);
4985                 }
4986
4987                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
4988                         die("failure: switch_userns");
4989
4990                 if (unlinkat(dir_fd, FILE1, 0))
4991                         die("failure: unlinkat");
4992
4993                 if (unlinkat(dir_fd, FILE2, 0))
4994                         die("failure: unlinkat");
4995
4996                 exit(EXIT_SUCCESS);
4997         }
4998         if (wait_for_pid(pid)) {
4999                 log_stderr("failure: wait_for_pid");
5000                 goto out;
5001         }
5002
5003         /* set sticky bit */
5004         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5005                 log_stderr("failure: fchmod");
5006                 goto out;
5007         }
5008
5009         /* validate sticky bit is set */
5010         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5011                 log_stderr("failure: is_sticky");
5012                 goto out;
5013         }
5014
5015         /* create regular file via mknod */
5016         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5017                 log_stderr("failure: mknodat");
5018                 goto out;
5019         }
5020         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5021                 log_stderr("failure: fchownat");
5022                 goto out;
5023         }
5024         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5025                 log_stderr("failure: fchmodat");
5026                 goto out;
5027         }
5028
5029         /* create regular file via mknod */
5030         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5031                 log_stderr("failure: mknodat");
5032                 goto out;
5033         }
5034         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5035                 log_stderr("failure: fchownat");
5036                 goto out;
5037         }
5038         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5039                 log_stderr("failure: fchmodat");
5040                 goto out;
5041         }
5042
5043         /* The sticky bit is set so we must not be able to delete files not
5044          * owned by us.
5045          */
5046         pid = fork();
5047         if (pid < 0) {
5048                 log_stderr("failure: fork");
5049                 goto out;
5050         }
5051         if (pid == 0) {
5052                 if (!caps_supported()) {
5053                         log_debug("skip: capability library not installed");
5054                         exit(EXIT_SUCCESS);
5055                 }
5056
5057                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5058                         die("failure: switch_userns");
5059
5060                 if (!unlinkat(dir_fd, FILE1, 0))
5061                         die("failure: unlinkat");
5062                 if (errno != EPERM)
5063                         die("failure: errno");
5064
5065                 if (!unlinkat(dir_fd, FILE2, 0))
5066                         die("failure: unlinkat");
5067                 if (errno != EPERM)
5068                         die("failure: errno");
5069
5070                 if (!unlinkat(open_tree_fd, FILE1, 0))
5071                         die("failure: unlinkat");
5072                 if (errno != EPERM)
5073                         die("failure: errno");
5074
5075                 if (!unlinkat(open_tree_fd, FILE2, 0))
5076                         die("failure: unlinkat");
5077                 if (errno != EPERM)
5078                         die("failure: errno");
5079
5080                 exit(EXIT_SUCCESS);
5081         }
5082         if (wait_for_pid(pid)) {
5083                 log_stderr("failure: wait_for_pid");
5084                 goto out;
5085         }
5086
5087         /* The sticky bit is set and we own the files so we must be able to
5088          * delete the files now.
5089          */
5090         pid = fork();
5091         if (pid < 0) {
5092                 log_stderr("failure: fork");
5093                 goto out;
5094         }
5095         if (pid == 0) {
5096                 /* change ownership */
5097                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5098                         die("failure: fchownat");
5099                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5100                         die("failure: expected_uid_gid");
5101                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5102                         die("failure: fchownat");
5103                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5104                         die("failure: expected_uid_gid");
5105
5106                 if (!caps_supported()) {
5107                         log_debug("skip: capability library not installed");
5108                         exit(EXIT_SUCCESS);
5109                 }
5110
5111                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5112                         die("failure: switch_userns");
5113
5114                 if (!unlinkat(dir_fd, FILE1, 0))
5115                         die("failure: unlinkat");
5116                 if (errno != EPERM)
5117                         die("failure: errno");
5118
5119                 if (!unlinkat(dir_fd, FILE2, 0))
5120                         die("failure: unlinkat");
5121                 if (errno != EPERM)
5122                         die("failure: errno");
5123
5124                 if (unlinkat(open_tree_fd, FILE1, 0))
5125                         die("failure: unlinkat");
5126
5127                 if (unlinkat(open_tree_fd, FILE2, 0))
5128                         die("failure: unlinkat");
5129
5130                 exit(EXIT_SUCCESS);
5131         }
5132         if (wait_for_pid(pid)) {
5133                 log_stderr("failure: wait_for_pid");
5134                 goto out;
5135         }
5136
5137         /* change uid to unprivileged user */
5138         if (fchown(dir_fd, 1000, -1)) {
5139                 log_stderr("failure: fchown");
5140                 goto out;
5141         }
5142         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5143                 log_stderr("failure: fchmod");
5144                 goto out;
5145         }
5146         /* validate sticky bit is set */
5147         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5148                 log_stderr("failure: is_sticky");
5149                 goto out;
5150         }
5151
5152         /* create regular file via mknod */
5153         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5154                 log_stderr("failure: mknodat");
5155                 goto out;
5156         }
5157         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5158                 log_stderr("failure: fchownat");
5159                 goto out;
5160         }
5161         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5162                 log_stderr("failure: fchmodat");
5163                 goto out;
5164         }
5165
5166         /* create regular file via mknod */
5167         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5168                 log_stderr("failure: mknodat");
5169                 goto out;
5170         }
5171         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5172                 log_stderr("failure: fchownat");
5173                 goto out;
5174         }
5175         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5176                 log_stderr("failure: fchmodat");
5177                 goto out;
5178         }
5179
5180         /* The sticky bit is set and we own the directory so we must be able to
5181          * delete the files now.
5182          */
5183         pid = fork();
5184         if (pid < 0) {
5185                 log_stderr("failure: fork");
5186                 goto out;
5187         }
5188         if (pid == 0) {
5189                 if (!caps_supported()) {
5190                         log_debug("skip: capability library not installed");
5191                         exit(EXIT_SUCCESS);
5192                 }
5193
5194                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5195                         die("failure: switch_userns");
5196
5197                 /* we don't own the directory from the original mount */
5198                 if (!unlinkat(dir_fd, FILE1, 0))
5199                         die("failure: unlinkat");
5200                 if (errno != EPERM)
5201                         die("failure: errno");
5202
5203                 if (!unlinkat(dir_fd, FILE2, 0))
5204                         die("failure: unlinkat");
5205                 if (errno != EPERM)
5206                         die("failure: errno");
5207
5208                 /* we own the file from the idmapped mount */
5209                 if (unlinkat(open_tree_fd, FILE1, 0))
5210                         die("failure: unlinkat");
5211                 if (unlinkat(open_tree_fd, FILE2, 0))
5212                         die("failure: unlinkat");
5213
5214                 exit(EXIT_SUCCESS);
5215         }
5216         if (wait_for_pid(pid)) {
5217                 log_stderr("failure: wait_for_pid");
5218                 goto out;
5219         }
5220
5221         fret = 0;
5222         log_debug("Ran test");
5223 out:
5224         safe_close(attr.userns_fd);
5225         safe_close(dir_fd);
5226         safe_close(open_tree_fd);
5227
5228         return fret;
5229 }
5230
5231 static int sticky_bit_rename(void)
5232 {
5233         int fret = -1;
5234         int dir_fd = -EBADF;
5235         pid_t pid;
5236
5237         if (!caps_supported())
5238                 return 0;
5239
5240         /* create directory */
5241         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5242                 log_stderr("failure: mkdirat");
5243                 goto out;
5244         }
5245
5246         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5247         if (dir_fd < 0) {
5248                 log_stderr("failure: openat");
5249                 goto out;
5250         }
5251         if (fchown(dir_fd, 0, 0)) {
5252                 log_stderr("failure: fchown");
5253                 goto out;
5254         }
5255         if (fchmod(dir_fd, 0777)) {
5256                 log_stderr("failure: fchmod");
5257                 goto out;
5258         }
5259
5260         /* create regular file via mknod */
5261         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5262                 log_stderr("failure: mknodat");
5263                 goto out;
5264         }
5265         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5266                 log_stderr("failure: fchownat");
5267                 goto out;
5268         }
5269         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5270                 log_stderr("failure: fchmodat");
5271                 goto out;
5272         }
5273
5274         /* create regular file via mknod */
5275         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5276                 log_stderr("failure: mknodat");
5277                 goto out;
5278         }
5279         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5280                 log_stderr("failure: fchownat");
5281                 goto out;
5282         }
5283         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5284                 log_stderr("failure: fchmodat");
5285                 goto out;
5286         }
5287
5288         /* The sticky bit is not set so we must be able to delete files not
5289          * owned by us.
5290          */
5291         pid = fork();
5292         if (pid < 0) {
5293                 log_stderr("failure: fork");
5294                 goto out;
5295         }
5296         if (pid == 0) {
5297                 if (!switch_ids(1000, 1000))
5298                         die("failure: switch_ids");
5299
5300                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5301                         die("failure: renameat");
5302
5303                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5304                         die("failure: renameat");
5305
5306                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5307                         die("failure: renameat");
5308
5309                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5310                         die("failure: renameat");
5311
5312                 exit(EXIT_SUCCESS);
5313         }
5314         if (wait_for_pid(pid)) {
5315                 log_stderr("failure: wait_for_pid");
5316                 goto out;
5317         }
5318
5319         /* set sticky bit */
5320         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5321                 log_stderr("failure: fchmod");
5322                 goto out;
5323         }
5324
5325         /* validate sticky bit is set */
5326         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5327                 log_stderr("failure: is_sticky");
5328                 goto out;
5329         }
5330
5331         /* The sticky bit is set so we must not be able to delete files not
5332          * owned by us.
5333          */
5334         pid = fork();
5335         if (pid < 0) {
5336                 log_stderr("failure: fork");
5337                 goto out;
5338         }
5339         if (pid == 0) {
5340                 if (!switch_ids(1000, 1000))
5341                         die("failure: switch_ids");
5342
5343                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5344                         die("failure: renameat");
5345                 if (errno != EPERM)
5346                         die("failure: errno");
5347
5348                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5349                         die("failure: renameat");
5350                 if (errno != EPERM)
5351                         die("failure: errno");
5352
5353                 exit(EXIT_SUCCESS);
5354         }
5355         if (wait_for_pid(pid)) {
5356                 log_stderr("failure: wait_for_pid");
5357                 goto out;
5358         }
5359
5360         /* The sticky bit is set and we own the files so we must be able to
5361          * delete the files now.
5362          */
5363         pid = fork();
5364         if (pid < 0) {
5365                 log_stderr("failure: fork");
5366                 goto out;
5367         }
5368         if (pid == 0) {
5369                 /* change ownership */
5370                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5371                         die("failure: fchownat");
5372                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5373                         die("failure: expected_uid_gid");
5374                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5375                         die("failure: fchownat");
5376                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5377                         die("failure: expected_uid_gid");
5378
5379                 if (!switch_ids(1000, 1000))
5380                         die("failure: switch_ids");
5381
5382                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5383                         die("failure: renameat");
5384
5385                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5386                         die("failure: renameat");
5387
5388                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5389                         die("failure: renameat");
5390
5391                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5392                         die("failure: renameat");
5393
5394                 exit(EXIT_SUCCESS);
5395         }
5396         if (wait_for_pid(pid)) {
5397                 log_stderr("failure: wait_for_pid");
5398                 goto out;
5399         }
5400
5401         /* change uid to unprivileged user */
5402         if (fchown(dir_fd, 1000, -1)) {
5403                 log_stderr("failure: fchown");
5404                 goto out;
5405         }
5406         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5407                 log_stderr("failure: fchmod");
5408                 goto out;
5409         }
5410         /* validate sticky bit is set */
5411         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5412                 log_stderr("failure: is_sticky");
5413                 goto out;
5414         }
5415
5416
5417         /* The sticky bit is set and we own the directory so we must be able to
5418          * delete the files now.
5419          */
5420         pid = fork();
5421         if (pid < 0) {
5422                 log_stderr("failure: fork");
5423                 goto out;
5424         }
5425         if (pid == 0) {
5426                 if (!switch_ids(1000, 1000))
5427                         die("failure: switch_ids");
5428
5429                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5430                         die("failure: renameat");
5431
5432                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5433                         die("failure: renameat");
5434
5435                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5436                         die("failure: renameat");
5437
5438                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5439                         die("failure: renameat");
5440
5441                 exit(EXIT_SUCCESS);
5442         }
5443         if (wait_for_pid(pid)) {
5444                 log_stderr("failure: wait_for_pid");
5445                 goto out;
5446         }
5447
5448         fret = 0;
5449         log_debug("Ran test");
5450 out:
5451         safe_close(dir_fd);
5452
5453         return fret;
5454 }
5455
5456 static int sticky_bit_rename_idmapped_mounts(void)
5457 {
5458         int fret = -1;
5459         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5460         struct mount_attr attr = {
5461                 .attr_set = MOUNT_ATTR_IDMAP,
5462         };
5463         pid_t pid;
5464
5465         if (!caps_supported())
5466                 return 0;
5467
5468         /* create directory */
5469         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5470                 log_stderr("failure: mkdirat");
5471                 goto out;
5472         }
5473
5474         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5475         if (dir_fd < 0) {
5476                 log_stderr("failure: openat");
5477                 goto out;
5478         }
5479
5480         if (fchown(dir_fd, 10000, 10000)) {
5481                 log_stderr("failure: fchown");
5482                 goto out;
5483         }
5484
5485         if (fchmod(dir_fd, 0777)) {
5486                 log_stderr("failure: fchmod");
5487                 goto out;
5488         }
5489
5490         /* create regular file via mknod */
5491         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5492                 log_stderr("failure: mknodat");
5493                 goto out;
5494         }
5495         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
5496                 log_stderr("failure: fchownat");
5497                 goto out;
5498         }
5499         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5500                 log_stderr("failure: fchmodat");
5501                 goto out;
5502         }
5503
5504         /* create regular file via mknod */
5505         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5506                 log_stderr("failure: mknodat");
5507                 goto out;
5508         }
5509         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
5510                 log_stderr("failure: fchownat");
5511                 goto out;
5512         }
5513         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5514                 log_stderr("failure: fchmodat");
5515                 goto out;
5516         }
5517
5518         /* Changing mount properties on a detached mount. */
5519         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
5520         if (attr.userns_fd < 0) {
5521                 log_stderr("failure: get_userns_fd");
5522                 goto out;
5523         }
5524
5525         open_tree_fd = sys_open_tree(dir_fd, "",
5526                                      AT_EMPTY_PATH |
5527                                      AT_NO_AUTOMOUNT |
5528                                      AT_SYMLINK_NOFOLLOW |
5529                                      OPEN_TREE_CLOEXEC |
5530                                      OPEN_TREE_CLONE);
5531         if (open_tree_fd < 0) {
5532                 log_stderr("failure: sys_open_tree");
5533                 goto out;
5534         }
5535
5536         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5537                 log_stderr("failure: sys_mount_setattr");
5538                 goto out;
5539         }
5540
5541         /* The sticky bit is not set so we must be able to delete files not
5542          * owned by us.
5543          */
5544         pid = fork();
5545         if (pid < 0) {
5546                 log_stderr("failure: fork");
5547                 goto out;
5548         }
5549         if (pid == 0) {
5550                 if (!switch_ids(1000, 1000))
5551                         die("failure: switch_ids");
5552
5553                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5554                         die("failure: renameat");
5555
5556                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5557                         die("failure: renameat");
5558
5559                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5560                         die("failure: renameat");
5561
5562                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5563                         die("failure: renameat");
5564
5565                 exit(EXIT_SUCCESS);
5566         }
5567         if (wait_for_pid(pid)) {
5568                 log_stderr("failure: wait_for_pid");
5569                 goto out;
5570         }
5571
5572         /* set sticky bit */
5573         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5574                 log_stderr("failure: fchmod");
5575                 goto out;
5576         }
5577
5578         /* validate sticky bit is set */
5579         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5580                 log_stderr("failure: is_sticky");
5581                 goto out;
5582         }
5583
5584         /* The sticky bit is set so we must not be able to delete files not
5585          * owned by us.
5586          */
5587         pid = fork();
5588         if (pid < 0) {
5589                 log_stderr("failure: fork");
5590                 goto out;
5591         }
5592         if (pid == 0) {
5593                 if (!switch_ids(1000, 1000))
5594                         die("failure: switch_ids");
5595
5596                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5597                         die("failure: renameat");
5598                 if (errno != EPERM)
5599                         die("failure: errno");
5600
5601                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5602                         die("failure: renameat");
5603                 if (errno != EPERM)
5604                         die("failure: errno");
5605
5606                 exit(EXIT_SUCCESS);
5607         }
5608         if (wait_for_pid(pid)) {
5609                 log_stderr("failure: wait_for_pid");
5610                 goto out;
5611         }
5612
5613         /* The sticky bit is set and we own the files so we must be able to
5614          * delete the files now.
5615          */
5616         pid = fork();
5617         if (pid < 0) {
5618                 log_stderr("failure: fork");
5619                 goto out;
5620         }
5621         if (pid == 0) {
5622                 /* change ownership */
5623                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
5624                         die("failure: fchownat");
5625                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
5626                         die("failure: expected_uid_gid");
5627                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
5628                         die("failure: fchownat");
5629                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
5630                         die("failure: expected_uid_gid");
5631
5632                 if (!switch_ids(1000, 1000))
5633                         die("failure: switch_ids");
5634
5635                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5636                         die("failure: renameat");
5637
5638                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5639                         die("failure: renameat");
5640
5641                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5642                         die("failure: renameat");
5643
5644                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5645                         die("failure: renameat");
5646
5647                 exit(EXIT_SUCCESS);
5648         }
5649         if (wait_for_pid(pid)) {
5650                 log_stderr("failure: wait_for_pid");
5651                 goto out;
5652         }
5653
5654         /* change uid to unprivileged user */
5655         if (fchown(dir_fd, 11000, -1)) {
5656                 log_stderr("failure: fchown");
5657                 goto out;
5658         }
5659         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5660                 log_stderr("failure: fchmod");
5661                 goto out;
5662         }
5663         /* validate sticky bit is set */
5664         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5665                 log_stderr("failure: is_sticky");
5666                 goto out;
5667         }
5668
5669         /* The sticky bit is set and we own the directory so we must be able to
5670          * delete the files now.
5671          */
5672         pid = fork();
5673         if (pid < 0) {
5674                 log_stderr("failure: fork");
5675                 goto out;
5676         }
5677         if (pid == 0) {
5678                 if (!switch_ids(1000, 1000))
5679                         die("failure: switch_ids");
5680
5681                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5682                         die("failure: renameat");
5683
5684                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5685                         die("failure: renameat");
5686
5687                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5688                         die("failure: renameat");
5689
5690                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5691                         die("failure: renameat");
5692
5693                 exit(EXIT_SUCCESS);
5694         }
5695         if (wait_for_pid(pid)) {
5696                 log_stderr("failure: wait_for_pid");
5697                 goto out;
5698         }
5699
5700         fret = 0;
5701         log_debug("Ran test");
5702 out:
5703         safe_close(attr.userns_fd);
5704         safe_close(dir_fd);
5705         safe_close(open_tree_fd);
5706
5707         return fret;
5708 }
5709
5710 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
5711  * operations in a user namespace.
5712  */
5713 static int sticky_bit_rename_idmapped_mounts_in_userns(void)
5714 {
5715         int fret = -1;
5716         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5717         struct mount_attr attr = {
5718                 .attr_set = MOUNT_ATTR_IDMAP,
5719         };
5720         pid_t pid;
5721
5722         if (!caps_supported())
5723                 return 0;
5724
5725         /* create directory */
5726         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5727                 log_stderr("failure: mkdirat");
5728                 goto out;
5729         }
5730
5731         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5732         if (dir_fd < 0) {
5733                 log_stderr("failure: openat");
5734                 goto out;
5735         }
5736         if (fchown(dir_fd, 0, 0)) {
5737                 log_stderr("failure: fchown");
5738                 goto out;
5739         }
5740         if (fchmod(dir_fd, 0777)) {
5741                 log_stderr("failure: fchmod");
5742                 goto out;
5743         }
5744
5745         /* create regular file via mknod */
5746         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5747                 log_stderr("failure: mknodat");
5748                 goto out;
5749         }
5750         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5751                 log_stderr("failure: fchownat");
5752                 goto out;
5753         }
5754         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5755                 log_stderr("failure: fchmodat");
5756                 goto out;
5757         }
5758
5759         /* create regular file via mknod */
5760         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5761                 log_stderr("failure: mknodat");
5762                 goto out;
5763         }
5764         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5765                 log_stderr("failure: fchownat");
5766                 goto out;
5767         }
5768         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5769                 log_stderr("failure: fchmodat");
5770                 goto out;
5771         }
5772
5773         /* Changing mount properties on a detached mount. */
5774         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
5775         if (attr.userns_fd < 0) {
5776                 log_stderr("failure: get_userns_fd");
5777                 goto out;
5778         }
5779
5780         open_tree_fd = sys_open_tree(dir_fd, "",
5781                                      AT_EMPTY_PATH |
5782                                      AT_NO_AUTOMOUNT |
5783                                      AT_SYMLINK_NOFOLLOW |
5784                                      OPEN_TREE_CLOEXEC |
5785                                      OPEN_TREE_CLONE);
5786         if (open_tree_fd < 0) {
5787                 log_stderr("failure: sys_open_tree");
5788                 goto out;
5789         }
5790
5791         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5792                 log_stderr("failure: sys_mount_setattr");
5793                 goto out;
5794         }
5795
5796         /* The sticky bit is not set so we must be able to delete files not
5797          * owned by us.
5798          */
5799         pid = fork();
5800         if (pid < 0) {
5801                 log_stderr("failure: fork");
5802                 goto out;
5803         }
5804         if (pid == 0) {
5805                 if (!caps_supported()) {
5806                         log_debug("skip: capability library not installed");
5807                         exit(EXIT_SUCCESS);
5808                 }
5809
5810                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5811                         die("failure: switch_userns");
5812
5813                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5814                         die("failure: renameat");
5815
5816                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5817                         die("failure: renameat");
5818
5819                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5820                         die("failure: renameat");
5821
5822                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5823                         die("failure: renameat");
5824
5825                 exit(EXIT_SUCCESS);
5826         }
5827         if (wait_for_pid(pid)) {
5828                 log_stderr("failure: wait_for_pid");
5829                 goto out;
5830         }
5831
5832         /* set sticky bit */
5833         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5834                 log_stderr("failure: fchmod");
5835                 goto out;
5836         }
5837
5838         /* validate sticky bit is set */
5839         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5840                 log_stderr("failure: is_sticky");
5841                 goto out;
5842         }
5843
5844         /* The sticky bit is set so we must not be able to delete files not
5845          * owned by us.
5846          */
5847         pid = fork();
5848         if (pid < 0) {
5849                 log_stderr("failure: fork");
5850                 goto out;
5851         }
5852         if (pid == 0) {
5853                 if (!caps_supported()) {
5854                         log_debug("skip: capability library not installed");
5855                         exit(EXIT_SUCCESS);
5856                 }
5857
5858                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5859                         die("failure: switch_userns");
5860
5861                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5862                         die("failure: renameat");
5863                 if (errno != EPERM)
5864                         die("failure: errno");
5865
5866                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5867                         die("failure: renameat");
5868                 if (errno != EPERM)
5869                         die("failure: errno");
5870
5871                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5872                         die("failure: renameat");
5873                 if (errno != EPERM)
5874                         die("failure: errno");
5875
5876                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5877                         die("failure: renameat");
5878                 if (errno != EPERM)
5879                         die("failure: errno");
5880
5881                 exit(EXIT_SUCCESS);
5882         }
5883         if (wait_for_pid(pid)) {
5884                 log_stderr("failure: wait_for_pid");
5885                 goto out;
5886         }
5887
5888         /* The sticky bit is set and we own the files so we must be able to
5889          * delete the files now.
5890          */
5891         pid = fork();
5892         if (pid < 0) {
5893                 log_stderr("failure: fork");
5894                 goto out;
5895         }
5896         if (pid == 0) {
5897                 /* change ownership */
5898                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5899                         die("failure: fchownat");
5900                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5901                         die("failure: expected_uid_gid");
5902                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5903                         die("failure: fchownat");
5904                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5905                         die("failure: expected_uid_gid");
5906
5907                 if (!caps_supported()) {
5908                         log_debug("skip: capability library not installed");
5909                         exit(EXIT_SUCCESS);
5910                 }
5911
5912                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5913                         die("failure: switch_userns");
5914
5915                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5916                         die("failure: renameat");
5917                 if (errno != EPERM)
5918                         die("failure: errno");
5919
5920                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5921                         die("failure: renameat");
5922                 if (errno != EPERM)
5923                         die("failure: errno");
5924
5925                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5926                         die("failure: renameat");
5927
5928                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5929                         die("failure: renameat");
5930
5931                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5932                         die("failure: renameat");
5933
5934                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5935                         die("failure: renameat");
5936
5937                 exit(EXIT_SUCCESS);
5938         }
5939         if (wait_for_pid(pid)) {
5940                 log_stderr("failure: wait_for_pid");
5941                 goto out;
5942         }
5943
5944         /* change uid to unprivileged user */
5945         if (fchown(dir_fd, 1000, -1)) {
5946                 log_stderr("failure: fchown");
5947                 goto out;
5948         }
5949         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5950                 log_stderr("failure: fchmod");
5951                 goto out;
5952         }
5953         /* validate sticky bit is set */
5954         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5955                 log_stderr("failure: is_sticky");
5956                 goto out;
5957         }
5958
5959         /* The sticky bit is set and we own the directory so we must be able to
5960          * delete the files now.
5961          */
5962         pid = fork();
5963         if (pid < 0) {
5964                 log_stderr("failure: fork");
5965                 goto out;
5966         }
5967         if (pid == 0) {
5968                 if (!caps_supported()) {
5969                         log_debug("skip: capability library not installed");
5970                         exit(EXIT_SUCCESS);
5971                 }
5972
5973                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5974                         die("failure: switch_userns");
5975
5976                 /* we don't own the directory from the original mount */
5977                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5978                         die("failure: renameat");
5979                 if (errno != EPERM)
5980                         die("failure: errno");
5981
5982                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5983                         die("failure: renameat");
5984                 if (errno != EPERM)
5985                         die("failure: errno");
5986
5987                 /* we own the file from the idmapped mount */
5988                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5989                         die("failure: renameat");
5990
5991                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5992                         die("failure: renameat");
5993
5994                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5995                         die("failure: renameat");
5996
5997                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5998                         die("failure: renameat");
5999
6000                 exit(EXIT_SUCCESS);
6001         }
6002         if (wait_for_pid(pid)) {
6003                 log_stderr("failure: wait_for_pid");
6004                 goto out;
6005         }
6006
6007         fret = 0;
6008         log_debug("Ran test");
6009 out:
6010         safe_close(open_tree_fd);
6011         safe_close(attr.userns_fd);
6012         safe_close(dir_fd);
6013
6014         return fret;
6015 }
6016
6017 /* Validate that protected symlinks work correctly. */
6018 static int protected_symlinks(void)
6019 {
6020         int fret = -1;
6021         int dir_fd = -EBADF, fd = -EBADF;
6022         pid_t pid;
6023
6024         if (!protected_symlinks_enabled())
6025                 return 0;
6026
6027         if (!caps_supported())
6028                 return 0;
6029
6030         /* create directory */
6031         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6032                 log_stderr("failure: mkdirat");
6033                 goto out;
6034         }
6035
6036         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6037         if (dir_fd < 0) {
6038                 log_stderr("failure: openat");
6039                 goto out;
6040         }
6041         if (fchown(dir_fd, 0, 0)) {
6042                 log_stderr("failure: fchown");
6043                 goto out;
6044         }
6045         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6046                 log_stderr("failure: fchmod");
6047                 goto out;
6048         }
6049         /* validate sticky bit is set */
6050         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6051                 log_stderr("failure: is_sticky");
6052                 goto out;
6053         }
6054
6055         /* create regular file via mknod */
6056         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6057                 log_stderr("failure: mknodat");
6058                 goto out;
6059         }
6060         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6061                 log_stderr("failure: fchownat");
6062                 goto out;
6063         }
6064         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6065                 log_stderr("failure: fchmodat");
6066                 goto out;
6067         }
6068
6069         /* create symlinks */
6070         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6071                 log_stderr("failure: symlinkat");
6072                 goto out;
6073         }
6074         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6075                 log_stderr("failure: fchownat");
6076                 goto out;
6077         }
6078         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6079                 log_stderr("failure: expected_uid_gid");
6080                 goto out;
6081         }
6082         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6083                 log_stderr("failure: expected_uid_gid");
6084                 goto out;
6085         }
6086
6087         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6088                 log_stderr("failure: symlinkat");
6089                 goto out;
6090         }
6091         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6092                 log_stderr("failure: fchownat");
6093                 goto out;
6094         }
6095         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6096                 log_stderr("failure: expected_uid_gid");
6097                 goto out;
6098         }
6099         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6100                 log_stderr("failure: expected_uid_gid");
6101                 goto out;
6102         }
6103
6104         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6105                 log_stderr("failure: symlinkat");
6106                 goto out;
6107         }
6108         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6109                 log_stderr("failure: fchownat");
6110                 goto out;
6111         }
6112         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6113                 log_stderr("failure: expected_uid_gid");
6114                 goto out;
6115         }
6116         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6117                 log_stderr("failure: expected_uid_gid");
6118                 goto out;
6119         }
6120
6121         /* validate file can be directly read */
6122         fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6123         if (fd < 0) {
6124                 log_stderr("failure: openat");
6125                 goto out;
6126         }
6127         safe_close(fd);
6128
6129         /* validate file can be read through own symlink */
6130         fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6131         if (fd < 0) {
6132                 log_stderr("failure: openat");
6133                 goto out;
6134         }
6135         safe_close(fd);
6136
6137         pid = fork();
6138         if (pid < 0) {
6139                 log_stderr("failure: fork");
6140                 goto out;
6141         }
6142         if (pid == 0) {
6143                 if (!switch_ids(1000, 1000))
6144                         die("failure: switch_ids");
6145
6146                 /* validate file can be directly read */
6147                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6148                 if (fd < 0)
6149                         die("failure: openat");
6150                 safe_close(fd);
6151
6152                 /* validate file can be read through own symlink */
6153                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6154                 if (fd < 0)
6155                         die("failure: openat");
6156                 safe_close(fd);
6157
6158                 /* validate file can be read through root symlink */
6159                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6160                 if (fd < 0)
6161                         die("failure: openat");
6162                 safe_close(fd);
6163
6164                 /* validate file can't be read through other users symlink */
6165                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6166                 if (fd >= 0)
6167                         die("failure: openat");
6168                 if (errno != EACCES)
6169                         die("failure: errno");
6170
6171                 exit(EXIT_SUCCESS);
6172         }
6173         if (wait_for_pid(pid)) {
6174                 log_stderr("failure: wait_for_pid");
6175                 goto out;
6176         }
6177
6178         pid = fork();
6179         if (pid < 0) {
6180                 log_stderr("failure: fork");
6181                 goto out;
6182         }
6183         if (pid == 0) {
6184                 if (!switch_ids(2000, 2000))
6185                         die("failure: switch_ids");
6186
6187                 /* validate file can be directly read */
6188                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6189                 if (fd < 0)
6190                         die("failure: openat");
6191                 safe_close(fd);
6192
6193                 /* validate file can be read through own symlink */
6194                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6195                 if (fd < 0)
6196                         die("failure: openat");
6197                 safe_close(fd);
6198
6199                 /* validate file can be read through root symlink */
6200                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6201                 if (fd < 0)
6202                         die("failure: openat");
6203                 safe_close(fd);
6204
6205                 /* validate file can't be read through other users symlink */
6206                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6207                 if (fd >= 0)
6208                         die("failure: openat");
6209                 if (errno != EACCES)
6210                         die("failure: errno");
6211
6212                 exit(EXIT_SUCCESS);
6213         }
6214         if (wait_for_pid(pid)) {
6215                 log_stderr("failure: wait_for_pid");
6216                 goto out;
6217         }
6218
6219         fret = 0;
6220         log_debug("Ran test");
6221 out:
6222         safe_close(fd);
6223         safe_close(dir_fd);
6224
6225         return fret;
6226 }
6227
6228 /* Validate that protected symlinks work correctly on idmapped mounts. */
6229 static int protected_symlinks_idmapped_mounts(void)
6230 {
6231         int fret = -1;
6232         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6233         struct mount_attr attr = {
6234                 .attr_set = MOUNT_ATTR_IDMAP,
6235         };
6236         pid_t pid;
6237
6238         if (!protected_symlinks_enabled())
6239                 return 0;
6240
6241         if (!caps_supported())
6242                 return 0;
6243
6244         /* create directory */
6245         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6246                 log_stderr("failure: mkdirat");
6247                 goto out;
6248         }
6249
6250         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6251         if (dir_fd < 0) {
6252                 log_stderr("failure: openat");
6253                 goto out;
6254         }
6255         if (fchown(dir_fd, 10000, 10000)) {
6256                 log_stderr("failure: fchown");
6257                 goto out;
6258         }
6259         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6260                 log_stderr("failure: fchmod");
6261                 goto out;
6262         }
6263         /* validate sticky bit is set */
6264         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6265                 log_stderr("failure: is_sticky");
6266                 goto out;
6267         }
6268
6269         /* create regular file via mknod */
6270         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6271                 log_stderr("failure: mknodat");
6272                 goto out;
6273         }
6274         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
6275                 log_stderr("failure: fchownat");
6276                 goto out;
6277         }
6278         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6279                 log_stderr("failure: fchmodat");
6280                 goto out;
6281         }
6282
6283         /* create symlinks */
6284         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6285                 log_stderr("failure: symlinkat");
6286                 goto out;
6287         }
6288         if (fchownat(dir_fd, SYMLINK_USER1, 10000, 10000, AT_SYMLINK_NOFOLLOW)) {
6289                 log_stderr("failure: fchownat");
6290                 goto out;
6291         }
6292         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
6293                 log_stderr("failure: expected_uid_gid");
6294                 goto out;
6295         }
6296         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6297                 log_stderr("failure: expected_uid_gid");
6298                 goto out;
6299         }
6300
6301         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6302                 log_stderr("failure: symlinkat");
6303                 goto out;
6304         }
6305         if (fchownat(dir_fd, SYMLINK_USER2, 11000, 11000, AT_SYMLINK_NOFOLLOW)) {
6306                 log_stderr("failure: fchownat");
6307                 goto out;
6308         }
6309         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 11000, 11000)) {
6310                 log_stderr("failure: expected_uid_gid");
6311                 goto out;
6312         }
6313         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6314                 log_stderr("failure: expected_uid_gid");
6315                 goto out;
6316         }
6317
6318         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6319                 log_stderr("failure: symlinkat");
6320                 goto out;
6321         }
6322         if (fchownat(dir_fd, SYMLINK_USER3, 12000, 12000, AT_SYMLINK_NOFOLLOW)) {
6323                 log_stderr("failure: fchownat");
6324                 goto out;
6325         }
6326         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
6327                 log_stderr("failure: expected_uid_gid");
6328                 goto out;
6329         }
6330         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6331                 log_stderr("failure: expected_uid_gid");
6332                 goto out;
6333         }
6334
6335         /* Changing mount properties on a detached mount. */
6336         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
6337         if (attr.userns_fd < 0) {
6338                 log_stderr("failure: get_userns_fd");
6339                 goto out;
6340         }
6341
6342         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6343                                      AT_EMPTY_PATH |
6344                                      AT_NO_AUTOMOUNT |
6345                                      AT_SYMLINK_NOFOLLOW |
6346                                      OPEN_TREE_CLOEXEC |
6347                                      OPEN_TREE_CLONE);
6348         if (open_tree_fd < 0) {
6349                 log_stderr("failure: open_tree_fd");
6350                 goto out;
6351         }
6352
6353         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6354                 log_stderr("failure: sys_mount_setattr");
6355                 goto out;
6356         }
6357
6358         /* validate file can be directly read */
6359         fd = openat(open_tree_fd, DIR1 "/"  FILE1, O_RDONLY | O_CLOEXEC, 0);
6360         if (fd < 0) {
6361                 log_stderr("failure: openat");
6362                 goto out;
6363         }
6364         safe_close(fd);
6365
6366         /* validate file can be read through own symlink */
6367         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6368         if (fd < 0) {
6369                 log_stderr("failure: openat");
6370                 goto out;
6371         }
6372         safe_close(fd);
6373
6374         pid = fork();
6375         if (pid < 0) {
6376                 log_stderr("failure: fork");
6377                 goto out;
6378         }
6379         if (pid == 0) {
6380                 if (!switch_ids(1000, 1000))
6381                         die("failure: switch_ids");
6382
6383                 /* validate file can be directly read */
6384                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6385                 if (fd < 0)
6386                         die("failure: openat");
6387                 safe_close(fd);
6388
6389                 /* validate file can be read through own symlink */
6390                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6391                 if (fd < 0)
6392                         die("failure: openat");
6393                 safe_close(fd);
6394
6395                 /* validate file can be read through root symlink */
6396                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6397                 if (fd < 0)
6398                         die("failure: openat");
6399                 safe_close(fd);
6400
6401                 /* validate file can't be read through other users symlink */
6402                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6403                 if (fd >= 0)
6404                         die("failure: openat");
6405                 if (errno != EACCES)
6406                         die("failure: errno");
6407
6408                 exit(EXIT_SUCCESS);
6409         }
6410         if (wait_for_pid(pid)) {
6411                 log_stderr("failure: wait_for_pid");
6412                 goto out;
6413         }
6414
6415         pid = fork();
6416         if (pid < 0) {
6417                 log_stderr("failure: fork");
6418                 goto out;
6419         }
6420         if (pid == 0) {
6421                 if (!switch_ids(2000, 2000))
6422                         die("failure: switch_ids");
6423
6424                 /* validate file can be directly read */
6425                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6426                 if (fd < 0)
6427                         die("failure: openat");
6428                 safe_close(fd);
6429
6430                 /* validate file can be read through own symlink */
6431                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6432                 if (fd < 0)
6433                         die("failure: openat");
6434                 safe_close(fd);
6435
6436                 /* validate file can be read through root symlink */
6437                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6438                 if (fd < 0)
6439                         die("failure: openat");
6440                 safe_close(fd);
6441
6442                 /* validate file can't be read through other users symlink */
6443                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6444                 if (fd >= 0)
6445                         die("failure: openat");
6446                 if (errno != EACCES)
6447                         die("failure: errno");
6448
6449                 exit(EXIT_SUCCESS);
6450         }
6451         if (wait_for_pid(pid)) {
6452                 log_stderr("failure: wait_for_pid");
6453                 goto out;
6454         }
6455
6456         fret = 0;
6457         log_debug("Ran test");
6458 out:
6459         safe_close(attr.userns_fd);
6460         safe_close(fd);
6461         safe_close(dir_fd);
6462         safe_close(open_tree_fd);
6463
6464         return fret;
6465 }
6466
6467 /* Validate that protected symlinks work correctly on idmapped mounts inside a
6468  * user namespace.
6469  */
6470 static int protected_symlinks_idmapped_mounts_in_userns(void)
6471 {
6472         int fret = -1;
6473         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6474         struct mount_attr attr = {
6475                 .attr_set = MOUNT_ATTR_IDMAP,
6476         };
6477         pid_t pid;
6478
6479         if (!protected_symlinks_enabled())
6480                 return 0;
6481
6482         if (!caps_supported())
6483                 return 0;
6484
6485         /* create directory */
6486         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6487                 log_stderr("failure: mkdirat");
6488                 goto out;
6489         }
6490
6491         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6492         if (dir_fd < 0) {
6493                 log_stderr("failure: openat");
6494                 goto out;
6495         }
6496         if (fchown(dir_fd, 0, 0)) {
6497                 log_stderr("failure: fchown");
6498                 goto out;
6499         }
6500         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6501                 log_stderr("failure: fchmod");
6502                 goto out;
6503         }
6504         /* validate sticky bit is set */
6505         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6506                 log_stderr("failure: is_sticky");
6507                 goto out;
6508         }
6509
6510         /* create regular file via mknod */
6511         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6512                 log_stderr("failure: mknodat");
6513                 goto out;
6514         }
6515         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6516                 log_stderr("failure: fchownat");
6517                 goto out;
6518         }
6519         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6520                 log_stderr("failure: fchmodat");
6521                 goto out;
6522         }
6523
6524         /* create symlinks */
6525         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6526                 log_stderr("failure: symlinkat");
6527                 goto out;
6528         }
6529         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6530                 log_stderr("failure: fchownat");
6531                 goto out;
6532         }
6533         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6534                 log_stderr("failure: expected_uid_gid");
6535                 goto out;
6536         }
6537         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6538                 log_stderr("failure: expected_uid_gid");
6539                 goto out;
6540         }
6541
6542         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6543                 log_stderr("failure: symlinkat");
6544                 goto out;
6545         }
6546         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6547                 log_stderr("failure: fchownat");
6548                 goto out;
6549         }
6550         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6551                 log_stderr("failure: expected_uid_gid");
6552                 goto out;
6553         }
6554         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6555                 log_stderr("failure: expected_uid_gid");
6556                 goto out;
6557         }
6558
6559         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6560                 log_stderr("failure: symlinkat");
6561                 goto out;
6562         }
6563         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6564                 log_stderr("failure: fchownat");
6565                 goto out;
6566         }
6567         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6568                 log_stderr("failure: expected_uid_gid");
6569                 goto out;
6570         }
6571         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6572                 log_stderr("failure: expected_uid_gid");
6573                 goto out;
6574         }
6575
6576         /* Changing mount properties on a detached mount. */
6577         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
6578         if (attr.userns_fd < 0) {
6579                 log_stderr("failure: get_userns_fd");
6580                 goto out;
6581         }
6582
6583         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6584                                      AT_EMPTY_PATH |
6585                                      AT_NO_AUTOMOUNT |
6586                                      AT_SYMLINK_NOFOLLOW |
6587                                      OPEN_TREE_CLOEXEC |
6588                                      OPEN_TREE_CLONE);
6589         if (open_tree_fd < 0) {
6590                 log_stderr("failure: sys_open_tree");
6591                 goto out;
6592         }
6593
6594         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6595                 log_stderr("failure: sys_mount_setattr");
6596                 goto out;
6597         }
6598
6599         /* validate file can be directly read */
6600         fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6601         if (fd < 0) {
6602                 log_stderr("failure: openat");
6603                 goto out;
6604         }
6605         safe_close(fd);
6606
6607         /* validate file can be read through own symlink */
6608         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6609         if (fd < 0) {
6610                 log_stderr("failure: openat");
6611                 goto out;
6612         }
6613         safe_close(fd);
6614
6615         pid = fork();
6616         if (pid < 0) {
6617                 log_stderr("failure: fork");
6618                 goto out;
6619         }
6620         if (pid == 0) {
6621                 if (!caps_supported()) {
6622                         log_debug("skip: capability library not installed");
6623                         exit(EXIT_SUCCESS);
6624                 }
6625
6626                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
6627                         die("failure: switch_userns");
6628
6629                 /* validate file can be directly read */
6630                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6631                 if (fd < 0)
6632                         die("failure: openat");
6633                 safe_close(fd);
6634
6635                 /* validate file can be read through own symlink */
6636                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6637                 if (fd < 0)
6638                         die("failure: openat");
6639                 safe_close(fd);
6640
6641                 /* validate file can be read through root symlink */
6642                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6643                 if (fd < 0)
6644                         die("failure: openat");
6645                 safe_close(fd);
6646
6647                 /* validate file can't be read through other users symlink */
6648                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6649                 if (fd >= 0)
6650                         die("failure: openat");
6651                 if (errno != EACCES)
6652                         die("failure: errno");
6653
6654                 exit(EXIT_SUCCESS);
6655         }
6656         if (wait_for_pid(pid)) {
6657                 log_stderr("failure: wait_for_pid");
6658                 goto out;
6659         }
6660
6661         pid = fork();
6662         if (pid < 0) {
6663                 log_stderr("failure: fork");
6664                 goto out;
6665         }
6666         if (pid == 0) {
6667                 if (!caps_supported()) {
6668                         log_debug("skip: capability library not installed");
6669                         exit(EXIT_SUCCESS);
6670                 }
6671
6672                 if (!switch_userns(attr.userns_fd, 2000, 2000, true))
6673                         die("failure: switch_userns");
6674
6675                 /* validate file can be directly read */
6676                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6677                 if (fd < 0)
6678                         die("failure: openat");
6679                 safe_close(fd);
6680
6681                 /* validate file can be read through own symlink */
6682                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6683                 if (fd < 0)
6684                         die("failure: openat");
6685                 safe_close(fd);
6686
6687                 /* validate file can be read through root symlink */
6688                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6689                 if (fd < 0)
6690                         die("failure: openat");
6691                 safe_close(fd);
6692
6693                 /* validate file can't be read through other users symlink */
6694                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6695                 if (fd >= 0)
6696                         die("failure: openat");
6697                 if (errno != EACCES)
6698                         die("failure: errno");
6699
6700                 exit(EXIT_SUCCESS);
6701         }
6702         if (wait_for_pid(pid)) {
6703                 log_stderr("failure: wait_for_pid");
6704                 goto out;
6705         }
6706
6707         fret = 0;
6708         log_debug("Ran test");
6709 out:
6710         safe_close(dir_fd);
6711         safe_close(open_tree_fd);
6712         safe_close(attr.userns_fd);
6713
6714         return fret;
6715 }
6716
6717 static int acls(void)
6718 {
6719         int fret = -1;
6720         int dir1_fd = -EBADF, open_tree_fd = -EBADF;
6721         struct mount_attr attr = {
6722                 .attr_set = MOUNT_ATTR_IDMAP,
6723         };
6724         pid_t pid;
6725
6726         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
6727                 log_stderr("failure: mkdirat");
6728                 goto out;
6729         }
6730         if (fchmodat(t_dir1_fd, DIR1, 0777, 0)) {
6731                 log_stderr("failure: fchmodat");
6732                 goto out;
6733         }
6734
6735         if (mkdirat(t_dir1_fd, DIR2, 0777)) {
6736                 log_stderr("failure: mkdirat");
6737                 goto out;
6738         }
6739         if (fchmodat(t_dir1_fd, DIR2, 0777, 0)) {
6740                 log_stderr("failure: fchmodat");
6741                 goto out;
6742         }
6743
6744         /* Changing mount properties on a detached mount. */
6745         attr.userns_fd = get_userns_fd(100010, 100020, 5);
6746         if (attr.userns_fd < 0) {
6747                 log_stderr("failure: get_userns_fd");
6748                 goto out;
6749         }
6750
6751         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
6752                                      AT_NO_AUTOMOUNT |
6753                                      AT_SYMLINK_NOFOLLOW |
6754                                      OPEN_TREE_CLOEXEC |
6755                                      OPEN_TREE_CLONE);
6756         if (open_tree_fd < 0) {
6757                 log_stderr("failure: sys_open_tree");
6758                 goto out;
6759         }
6760
6761         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6762                 log_stderr("failure: sys_mount_setattr");
6763                 goto out;
6764         }
6765
6766         if (sys_move_mount(open_tree_fd, "", t_dir1_fd, DIR2, MOVE_MOUNT_F_EMPTY_PATH)) {
6767                 log_stderr("failure: sys_move_mount");
6768                 goto out;
6769         }
6770
6771         dir1_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6772         if (dir1_fd < 0) {
6773                 log_stderr("failure: openat");
6774                 goto out;
6775         }
6776
6777         if (mkdirat(dir1_fd, DIR3, 0000)) {
6778                 log_stderr("failure: mkdirat");
6779                 goto out;
6780         }
6781         if (fchown(dir1_fd, 100010, 100010)) {
6782                 log_stderr("failure: fchown");
6783                 goto out;
6784         }
6785         if (fchmod(dir1_fd, 0777)) {
6786                 log_stderr("failure: fchmod");
6787                 goto out;
6788         }
6789
6790         snprintf(t_buf, sizeof(t_buf), "setfacl -m u:100010:rwx %s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6791         if (system(t_buf)) {
6792                 log_stderr("failure: system");
6793                 goto out;
6794         }
6795
6796         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100010:rwx", t_mountpoint, T_DIR1, DIR1, DIR3);
6797         if (system(t_buf)) {
6798                 log_stderr("failure: system");
6799                 goto out;
6800         }
6801
6802         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100020:rwx", t_mountpoint, T_DIR1, DIR2, DIR3);
6803         if (system(t_buf)) {
6804                 log_stderr("failure: system");
6805                 goto out;
6806         }
6807
6808         pid = fork();
6809         if (pid < 0) {
6810                 log_stderr("failure: fork");
6811                 goto out;
6812         }
6813         if (pid == 0) {
6814                 if (!caps_supported()) {
6815                         log_debug("skip: capability library not installed");
6816                         exit(EXIT_SUCCESS);
6817                 }
6818
6819                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6820                         die("failure: switch_userns");
6821
6822                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6823                          t_mountpoint, T_DIR1, DIR1, DIR3, 4294967295LU);
6824                 if (system(t_buf))
6825                         die("failure: system");
6826
6827                 exit(EXIT_SUCCESS);
6828         }
6829         if (wait_for_pid(pid)) {
6830                 log_stderr("failure: wait_for_pid");
6831                 goto out;
6832         }
6833
6834         pid = fork();
6835         if (pid < 0) {
6836                 log_stderr("failure: fork");
6837                 goto out;
6838         }
6839         if (pid == 0) {
6840                 if (!caps_supported()) {
6841                         log_debug("skip: capability library not installed");
6842                         exit(EXIT_SUCCESS);
6843                 }
6844
6845                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6846                         die("failure: switch_userns");
6847
6848                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6849                          t_mountpoint, T_DIR1, DIR2, DIR3, 100010LU);
6850                 if (system(t_buf))
6851                         die("failure: system");
6852
6853                 exit(EXIT_SUCCESS);
6854         }
6855         if (wait_for_pid(pid)) {
6856                 log_stderr("failure: wait_for_pid");
6857                 goto out;
6858         }
6859
6860         /* Now, dir is owned by someone else in the user namespace, but we can
6861          * still read it because of acls.
6862          */
6863         if (fchown(dir1_fd, 100012, 100012)) {
6864                 log_stderr("failure: fchown");
6865                 goto out;
6866         }
6867
6868         pid = fork();
6869         if (pid < 0) {
6870                 log_stderr("failure: fork");
6871                 goto out;
6872         }
6873         if (pid == 0) {
6874                 int fd;
6875
6876                 if (!caps_supported()) {
6877                         log_debug("skip: capability library not installed");
6878                         exit(EXIT_SUCCESS);
6879                 }
6880
6881                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6882                         die("failure: switch_userns");
6883
6884                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6885                 if (fd < 0)
6886                         die("failure: openat");
6887
6888                 exit(EXIT_SUCCESS);
6889         }
6890         if (wait_for_pid(pid)) {
6891                 log_stderr("failure: wait_for_pid");
6892                 goto out;
6893         }
6894
6895         /* if we delete the acls, the ls should fail because it's 700. */
6896         snprintf(t_buf, sizeof(t_buf), "%s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6897         if (removexattr(t_buf, "system.posix_acl_access")) {
6898                 log_stderr("failure: removexattr");
6899                 goto out;
6900         }
6901
6902         pid = fork();
6903         if (pid < 0) {
6904                 log_stderr("failure: fork");
6905                 goto out;
6906         }
6907         if (pid == 0) {
6908                 int fd;
6909
6910                 if (!caps_supported()) {
6911                         log_debug("skip: capability library not installed");
6912                         exit(EXIT_SUCCESS);
6913                 }
6914
6915                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6916                         die("failure: switch_userns");
6917
6918                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6919                 if (fd >= 0)
6920                         die("failure: openat");
6921
6922                 exit(EXIT_SUCCESS);
6923         }
6924         if (wait_for_pid(pid)) {
6925                 log_stderr("failure: wait_for_pid");
6926                 goto out;
6927         }
6928
6929         snprintf(t_buf, sizeof(t_buf), "%s/" T_DIR1 "/" DIR2, t_mountpoint);
6930         sys_umount2(t_buf, MNT_DETACH);
6931
6932         fret = 0;
6933         log_debug("Ran test");
6934 out:
6935         safe_close(attr.userns_fd);
6936         safe_close(dir1_fd);
6937         safe_close(open_tree_fd);
6938
6939         return fret;
6940 }
6941
6942 #ifdef HAVE_LIBURING_H
6943 static int io_uring_openat_with_creds(struct io_uring *ring, int dfd, const char *path, int cred_id,
6944                                       bool with_link, int *ret_cqe)
6945 {
6946         struct io_uring_cqe *cqe;
6947         struct io_uring_sqe *sqe;
6948         int ret, i, to_submit = 1;
6949
6950         if (with_link) {
6951                 sqe = io_uring_get_sqe(ring);
6952                 if (!sqe)
6953                         return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6954                 io_uring_prep_nop(sqe);
6955                 sqe->flags |= IOSQE_IO_LINK;
6956                 sqe->user_data = 1;
6957                 to_submit++;
6958         }
6959
6960         sqe = io_uring_get_sqe(ring);
6961         if (!sqe)
6962                 return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6963         io_uring_prep_openat(sqe, dfd, path, O_RDONLY | O_CLOEXEC, 0);
6964         sqe->user_data = 2;
6965
6966         if (cred_id != -1)
6967                 sqe->personality = cred_id;
6968
6969         ret = io_uring_submit(ring);
6970         if (ret != to_submit) {
6971                 log_stderr("failure: io_uring_submit");
6972                 goto out;
6973         }
6974
6975         for (i = 0; i < to_submit; i++) {
6976                 ret = io_uring_wait_cqe(ring, &cqe);
6977                 if (ret < 0) {
6978                         log_stderr("failure: io_uring_wait_cqe");
6979                         goto out;
6980                 }
6981
6982                 ret = cqe->res;
6983                 /*
6984                  * Make sure caller can identify that this is a proper io_uring
6985                  * failure and not some earlier error.
6986                  */
6987                 if (ret_cqe)
6988                         *ret_cqe = ret;
6989                 io_uring_cqe_seen(ring, cqe);
6990         }
6991         log_debug("Ran test");
6992 out:
6993         return ret;
6994 }
6995
6996 static int io_uring(void)
6997 {
6998         int fret = -1;
6999         int file1_fd = -EBADF;
7000         struct io_uring *ring;
7001         int cred_id, ret, ret_cqe;
7002         pid_t pid;
7003
7004         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7005                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7006         if (!ring)
7007                 return log_errno(-1, "failure: io_uring_queue_init");
7008
7009         ret = io_uring_queue_init(8, ring, 0);
7010         if (ret) {
7011                 log_stderr("failure: io_uring_queue_init");
7012                 goto out_unmap;
7013         }
7014
7015         ret = io_uring_register_personality(ring);
7016         if (ret < 0) {
7017                 fret = 0;
7018                 goto out_unmap; /* personalities not supported */
7019         }
7020         cred_id = ret;
7021
7022         /* create file only owner can open */
7023         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7024         if (file1_fd < 0) {
7025                 log_stderr("failure: openat");
7026                 goto out;
7027         }
7028         if (fchown(file1_fd, 0, 0)) {
7029                 log_stderr("failure: fchown");
7030                 goto out;
7031         }
7032         if (fchmod(file1_fd, 0600)) {
7033                 log_stderr("failure: fchmod");
7034                 goto out;
7035         }
7036         safe_close(file1_fd);
7037
7038         pid = fork();
7039         if (pid < 0) {
7040                 log_stderr("failure: fork");
7041                 goto out;
7042         }
7043         if (pid == 0) {
7044                 /* Verify we can open it with our current credentials. */
7045                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7046                                                       -1, false, NULL);
7047                 if (file1_fd < 0)
7048                         die("failure: io_uring_open_file");
7049
7050                 exit(EXIT_SUCCESS);
7051         }
7052         if (wait_for_pid(pid)) {
7053                 log_stderr("failure: wait_for_pid");
7054                 goto out;
7055         }
7056
7057         pid = fork();
7058         if (pid < 0) {
7059                 log_stderr("failure: fork");
7060                 goto out;
7061         }
7062         if (pid == 0) {
7063                 if (!switch_ids(1000, 1000))
7064                         die("failure: switch_ids");
7065
7066                 /* Verify we can't open it with our current credentials. */
7067                 ret_cqe = 0;
7068                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7069                                                       -1, false, &ret_cqe);
7070                 if (file1_fd >= 0)
7071                         die("failure: io_uring_open_file");
7072                 if (ret_cqe == 0)
7073                         die("failure: non-open() related io_uring_open_file failure %d", ret_cqe);
7074                 if (ret_cqe != -EACCES)
7075                         die("failure: errno(%d)", abs(ret_cqe));
7076
7077                 exit(EXIT_SUCCESS);
7078         }
7079         if (wait_for_pid(pid)) {
7080                 log_stderr("failure: wait_for_pid");
7081                 goto out;
7082         }
7083
7084         pid = fork();
7085         if (pid < 0) {
7086                 log_stderr("failure: fork");
7087                 goto out;
7088         }
7089         if (pid == 0) {
7090                 if (!switch_ids(1000, 1000))
7091                         die("failure: switch_ids");
7092
7093                 /* Verify we can open it with the registered credentials. */
7094                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7095                                                       cred_id, false, NULL);
7096                 if (file1_fd < 0)
7097                         die("failure: io_uring_open_file");
7098
7099                 /* Verify we can open it with the registered credentials and as
7100                  * a link.
7101                  */
7102                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7103                                                       cred_id, true, NULL);
7104                 if (file1_fd < 0)
7105                         die("failure: io_uring_open_file");
7106
7107                 exit(EXIT_SUCCESS);
7108         }
7109         if (wait_for_pid(pid)) {
7110                 log_stderr("failure: wait_for_pid");
7111                 goto out;
7112         }
7113
7114         fret = 0;
7115         log_debug("Ran test");
7116 out:
7117         ret = io_uring_unregister_personality(ring, cred_id);
7118         if (ret)
7119                 log_stderr("failure: io_uring_unregister_personality");
7120
7121 out_unmap:
7122         munmap(ring, sizeof(struct io_uring));
7123
7124         safe_close(file1_fd);
7125
7126         return fret;
7127 }
7128
7129 static int io_uring_userns(void)
7130 {
7131         int fret = -1;
7132         int file1_fd = -EBADF, userns_fd = -EBADF;
7133         struct io_uring *ring;
7134         int cred_id, ret, ret_cqe;
7135         pid_t pid;
7136
7137         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7138                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7139         if (!ring)
7140                 return log_errno(-1, "failure: io_uring_queue_init");
7141
7142         ret = io_uring_queue_init(8, ring, 0);
7143         if (ret) {
7144                 log_stderr("failure: io_uring_queue_init");
7145                 goto out_unmap;
7146         }
7147
7148         ret = io_uring_register_personality(ring);
7149         if (ret < 0) {
7150                 fret = 0;
7151                 goto out_unmap; /* personalities not supported */
7152         }
7153         cred_id = ret;
7154
7155         /* create file only owner can open */
7156         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7157         if (file1_fd < 0) {
7158                 log_stderr("failure: openat");
7159                 goto out;
7160         }
7161         if (fchown(file1_fd, 0, 0)) {
7162                 log_stderr("failure: fchown");
7163                 goto out;
7164         }
7165         if (fchmod(file1_fd, 0600)) {
7166                 log_stderr("failure: fchmod");
7167                 goto out;
7168         }
7169         safe_close(file1_fd);
7170
7171         userns_fd = get_userns_fd(0, 10000, 10000);
7172         if (userns_fd < 0) {
7173                 log_stderr("failure: get_userns_fd");
7174                 goto out;
7175         }
7176
7177         pid = fork();
7178         if (pid < 0) {
7179                 log_stderr("failure: fork");
7180                 goto out;
7181         }
7182         if (pid == 0) {
7183                 /* Verify we can open it with our current credentials. */
7184                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7185                                                       -1, false, NULL);
7186                 if (file1_fd < 0)
7187                         die("failure: io_uring_open_file");
7188
7189                 exit(EXIT_SUCCESS);
7190         }
7191         if (wait_for_pid(pid)) {
7192                 log_stderr("failure: wait_for_pid");
7193                 goto out;
7194         }
7195
7196         pid = fork();
7197         if (pid < 0) {
7198                 log_stderr("failure: fork");
7199                 goto out;
7200         }
7201         if (pid == 0) {
7202                 if (!switch_userns(userns_fd, 0, 0, false))
7203                         die("failure: switch_userns");
7204
7205                 /* Verify we can't open it with our current credentials. */
7206                 ret_cqe = 0;
7207                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7208                                                       -1, false, &ret_cqe);
7209                 if (file1_fd >= 0)
7210                         die("failure: io_uring_open_file");
7211                 if (ret_cqe == 0)
7212                         die("failure: non-open() related io_uring_open_file failure");
7213                 if (ret_cqe != -EACCES)
7214                         die("failure: errno(%d)", abs(ret_cqe));
7215
7216                 exit(EXIT_SUCCESS);
7217         }
7218         if (wait_for_pid(pid)) {
7219                 log_stderr("failure: wait_for_pid");
7220                 goto out;
7221         }
7222
7223         pid = fork();
7224         if (pid < 0) {
7225                 log_stderr("failure: fork");
7226                 goto out;
7227         }
7228         if (pid == 0) {
7229                 if (!switch_userns(userns_fd, 0, 0, false))
7230                         die("failure: switch_userns");
7231
7232                 /* Verify we can open it with the registered credentials. */
7233                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7234                                                       cred_id, false, NULL);
7235                 if (file1_fd < 0)
7236                         die("failure: io_uring_open_file");
7237
7238                 /* Verify we can open it with the registered credentials and as
7239                  * a link.
7240                  */
7241                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7242                                                       cred_id, true, NULL);
7243                 if (file1_fd < 0)
7244                         die("failure: io_uring_open_file");
7245
7246                 exit(EXIT_SUCCESS);
7247         }
7248         if (wait_for_pid(pid)) {
7249                 log_stderr("failure: wait_for_pid");
7250                 goto out;
7251         }
7252
7253         fret = 0;
7254         log_debug("Ran test");
7255 out:
7256         ret = io_uring_unregister_personality(ring, cred_id);
7257         if (ret)
7258                 log_stderr("failure: io_uring_unregister_personality");
7259
7260 out_unmap:
7261         munmap(ring, sizeof(struct io_uring));
7262
7263         safe_close(file1_fd);
7264         safe_close(userns_fd);
7265
7266         return fret;
7267 }
7268
7269 static int io_uring_idmapped(void)
7270 {
7271         int fret = -1;
7272         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7273         struct io_uring *ring;
7274         struct mount_attr attr = {
7275                 .attr_set = MOUNT_ATTR_IDMAP,
7276         };
7277         int cred_id, ret;
7278         pid_t pid;
7279
7280         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7281                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7282         if (!ring)
7283                 return log_errno(-1, "failure: io_uring_queue_init");
7284
7285         ret = io_uring_queue_init(8, ring, 0);
7286         if (ret) {
7287                 log_stderr("failure: io_uring_queue_init");
7288                 goto out_unmap;
7289         }
7290
7291         ret = io_uring_register_personality(ring);
7292         if (ret < 0) {
7293                 fret = 0;
7294                 goto out_unmap; /* personalities not supported */
7295         }
7296         cred_id = ret;
7297
7298         /* create file only owner can open */
7299         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7300         if (file1_fd < 0) {
7301                 log_stderr("failure: openat");
7302                 goto out;
7303         }
7304         if (fchown(file1_fd, 0, 0)) {
7305                 log_stderr("failure: fchown");
7306                 goto out;
7307         }
7308         if (fchmod(file1_fd, 0600)) {
7309                 log_stderr("failure: fchmod");
7310                 goto out;
7311         }
7312         safe_close(file1_fd);
7313
7314         /* Changing mount properties on a detached mount. */
7315         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7316         if (attr.userns_fd < 0)
7317                 return log_errno(-1, "failure: create user namespace");
7318
7319         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7320                                      AT_EMPTY_PATH |
7321                                      AT_NO_AUTOMOUNT |
7322                                      AT_SYMLINK_NOFOLLOW |
7323                                      OPEN_TREE_CLOEXEC |
7324                                      OPEN_TREE_CLONE);
7325         if (open_tree_fd < 0)
7326                 return log_errno(-1, "failure: create detached mount");
7327
7328         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7329                 return log_errno(-1, "failure: set mount attributes");
7330
7331         pid = fork();
7332         if (pid < 0) {
7333                 log_stderr("failure: fork");
7334                 goto out;
7335         }
7336         if (pid == 0) {
7337                 if (!switch_ids(10000, 10000))
7338                         die("failure: switch_ids");
7339
7340                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7341                                                       -1, false, NULL);
7342                 if (file1_fd < 0)
7343                         die("failure: io_uring_open_file");
7344
7345                 exit(EXIT_SUCCESS);
7346         }
7347         if (wait_for_pid(pid)) {
7348                 log_stderr("failure: wait_for_pid");
7349                 goto out;
7350         }
7351
7352         pid = fork();
7353         if (pid < 0) {
7354                 log_stderr("failure: fork");
7355                 goto out;
7356         }
7357         if (pid == 0) {
7358                 if (!switch_ids(10001, 10001))
7359                         die("failure: switch_ids");
7360
7361                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7362                                                       cred_id, false, NULL);
7363                 if (file1_fd < 0)
7364                         die("failure: io_uring_open_file");
7365
7366                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7367                                                       cred_id, true, NULL);
7368                 if (file1_fd < 0)
7369                         die("failure: io_uring_open_file");
7370
7371                 exit(EXIT_SUCCESS);
7372         }
7373         if (wait_for_pid(pid)) {
7374                 log_stderr("failure: wait_for_pid");
7375                 goto out;
7376         }
7377
7378         fret = 0;
7379         log_debug("Ran test");
7380 out:
7381         ret = io_uring_unregister_personality(ring, cred_id);
7382         if (ret)
7383                 log_stderr("failure: io_uring_unregister_personality");
7384
7385 out_unmap:
7386         munmap(ring, sizeof(struct io_uring));
7387
7388         safe_close(attr.userns_fd);
7389         safe_close(file1_fd);
7390         safe_close(open_tree_fd);
7391
7392         return fret;
7393 }
7394
7395 /*
7396  * Create an idmapped mount where the we leave the owner of the file unmapped.
7397  * In no circumstances, even with recorded credentials can it be allowed to
7398  * open the file.
7399  */
7400 static int io_uring_idmapped_unmapped(void)
7401 {
7402         int fret = -1;
7403         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7404         struct io_uring *ring;
7405         struct mount_attr attr = {
7406                 .attr_set = MOUNT_ATTR_IDMAP,
7407         };
7408         int cred_id, ret, ret_cqe;
7409         pid_t pid;
7410
7411         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7412                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7413         if (!ring)
7414                 return log_errno(-1, "failure: io_uring_queue_init");
7415
7416         ret = io_uring_queue_init(8, ring, 0);
7417         if (ret) {
7418                 log_stderr("failure: io_uring_queue_init");
7419                 goto out_unmap;
7420         }
7421
7422         ret = io_uring_register_personality(ring);
7423         if (ret < 0) {
7424                 fret = 0;
7425                 goto out_unmap; /* personalities not supported */
7426         }
7427         cred_id = ret;
7428
7429         /* create file only owner can open */
7430         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7431         if (file1_fd < 0) {
7432                 log_stderr("failure: openat");
7433                 goto out;
7434         }
7435         if (fchown(file1_fd, 0, 0)) {
7436                 log_stderr("failure: fchown");
7437                 goto out;
7438         }
7439         if (fchmod(file1_fd, 0600)) {
7440                 log_stderr("failure: fchmod");
7441                 goto out;
7442         }
7443         safe_close(file1_fd);
7444
7445         /* Changing mount properties on a detached mount. */
7446         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7447         if (attr.userns_fd < 0)
7448                 return log_errno(-1, "failure: create user namespace");
7449
7450         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7451                                      AT_EMPTY_PATH |
7452                                      AT_NO_AUTOMOUNT |
7453                                      AT_SYMLINK_NOFOLLOW |
7454                                      OPEN_TREE_CLOEXEC |
7455                                      OPEN_TREE_CLONE);
7456         if (open_tree_fd < 0)
7457                 return log_errno(-1, "failure: create detached mount");
7458
7459         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7460                 return log_errno(-1, "failure: set mount attributes");
7461
7462         pid = fork();
7463         if (pid < 0) {
7464                 log_stderr("failure: fork");
7465                 goto out;
7466         }
7467         if (pid == 0) {
7468                 if (!switch_ids(10000, 10000))
7469                         die("failure: switch_ids");
7470
7471                 ret_cqe = 0;
7472                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7473                                                       cred_id, false, &ret_cqe);
7474                 if (file1_fd >= 0)
7475                         die("failure: io_uring_open_file");
7476                 if (ret_cqe == 0)
7477                         die("failure: non-open() related io_uring_open_file failure");
7478                 if (ret_cqe != -EACCES)
7479                         die("failure: errno(%d)", abs(ret_cqe));
7480
7481                 ret_cqe = 0;
7482                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7483                                                       cred_id, true, &ret_cqe);
7484                 if (file1_fd >= 0)
7485                         die("failure: io_uring_open_file");
7486                 if (ret_cqe == 0)
7487                         die("failure: non-open() related io_uring_open_file failure");
7488                 if (ret_cqe != -EACCES)
7489                         die("failure: errno(%d)", abs(ret_cqe));
7490
7491                 exit(EXIT_SUCCESS);
7492         }
7493         if (wait_for_pid(pid)) {
7494                 log_stderr("failure: wait_for_pid");
7495                 goto out;
7496         }
7497
7498         fret = 0;
7499         log_debug("Ran test");
7500 out:
7501         ret = io_uring_unregister_personality(ring, cred_id);
7502         if (ret)
7503                 log_stderr("failure: io_uring_unregister_personality");
7504
7505 out_unmap:
7506         munmap(ring, sizeof(struct io_uring));
7507
7508         safe_close(attr.userns_fd);
7509         safe_close(file1_fd);
7510         safe_close(open_tree_fd);
7511
7512         return fret;
7513 }
7514
7515 static int io_uring_idmapped_userns(void)
7516 {
7517         int fret = -1;
7518         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7519         struct io_uring *ring;
7520         struct mount_attr attr = {
7521                 .attr_set = MOUNT_ATTR_IDMAP,
7522         };
7523         int cred_id, ret, ret_cqe;
7524         pid_t pid;
7525
7526         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7527                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7528         if (!ring)
7529                 return log_errno(-1, "failure: io_uring_queue_init");
7530
7531         ret = io_uring_queue_init(8, ring, 0);
7532         if (ret) {
7533                 log_stderr("failure: io_uring_queue_init");
7534                 goto out_unmap;
7535         }
7536
7537         ret = io_uring_register_personality(ring);
7538         if (ret < 0) {
7539                 fret = 0;
7540                 goto out_unmap; /* personalities not supported */
7541         }
7542         cred_id = ret;
7543
7544         /* create file only owner can open */
7545         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7546         if (file1_fd < 0) {
7547                 log_stderr("failure: openat");
7548                 goto out;
7549         }
7550         if (fchown(file1_fd, 0, 0)) {
7551                 log_stderr("failure: fchown");
7552                 goto out;
7553         }
7554         if (fchmod(file1_fd, 0600)) {
7555                 log_stderr("failure: fchmod");
7556                 goto out;
7557         }
7558         safe_close(file1_fd);
7559
7560         /* Changing mount properties on a detached mount. */
7561         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7562         if (attr.userns_fd < 0)
7563                 return log_errno(-1, "failure: create user namespace");
7564
7565         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7566                                      AT_EMPTY_PATH |
7567                                      AT_NO_AUTOMOUNT |
7568                                      AT_SYMLINK_NOFOLLOW |
7569                                      OPEN_TREE_CLOEXEC |
7570                                      OPEN_TREE_CLONE);
7571         if (open_tree_fd < 0)
7572                 return log_errno(-1, "failure: create detached mount");
7573
7574         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7575                 return log_errno(-1, "failure: set mount attributes");
7576
7577         pid = fork();
7578         if (pid < 0) {
7579                 log_stderr("failure: fork");
7580                 goto out;
7581         }
7582         if (pid == 0) {
7583                 if (!switch_userns(attr.userns_fd, 0, 0, false))
7584                         die("failure: switch_userns");
7585
7586                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7587                                                       -1, false, NULL);
7588                 if (file1_fd < 0)
7589                         die("failure: io_uring_open_file");
7590
7591                 exit(EXIT_SUCCESS);
7592         }
7593         if (wait_for_pid(pid)) {
7594                 log_stderr("failure: wait_for_pid");
7595                 goto out;
7596         }
7597
7598         pid = fork();
7599         if (pid < 0) {
7600                 log_stderr("failure: fork");
7601                 goto out;
7602         }
7603         if (pid == 0) {
7604                 if (!caps_supported()) {
7605                         log_debug("skip: capability library not installed");
7606                         exit(EXIT_SUCCESS);
7607                 }
7608
7609                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
7610                         die("failure: switch_userns");
7611
7612                 ret_cqe = 0;
7613                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7614                                                       -1, false, &ret_cqe);
7615                 if (file1_fd >= 0)
7616                         die("failure: io_uring_open_file");
7617                 if (ret_cqe == 0)
7618                         die("failure: non-open() related io_uring_open_file failure");
7619                 if (ret_cqe != -EACCES)
7620                         die("failure: errno(%d)", abs(ret_cqe));
7621
7622                 ret_cqe = 0;
7623                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7624                                                       -1, true, &ret_cqe);
7625                 if (file1_fd >= 0)
7626                         die("failure: io_uring_open_file");
7627                 if (ret_cqe == 0)
7628                         die("failure: non-open() related io_uring_open_file failure");
7629                 if (ret_cqe != -EACCES)
7630                         die("failure: errno(%d)", abs(ret_cqe));
7631
7632                 ret_cqe = 0;
7633                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7634                                                       -1, false, &ret_cqe);
7635                 if (file1_fd >= 0)
7636                         die("failure: io_uring_open_file");
7637                 if (ret_cqe == 0)
7638                         die("failure: non-open() related io_uring_open_file failure");
7639                 if (ret_cqe != -EACCES)
7640                         die("failure: errno(%d)", abs(ret_cqe));
7641
7642                 ret_cqe = 0;
7643                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7644                                                       -1, true, &ret_cqe);
7645                 if (file1_fd >= 0)
7646                         die("failure: io_uring_open_file");
7647                 if (ret_cqe == 0)
7648                         die("failure: non-open() related io_uring_open_file failure");
7649                 if (ret_cqe != -EACCES)
7650                         die("failure: errno(%d)", abs(ret_cqe));
7651
7652                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7653                                                       cred_id, false, NULL);
7654                 if (file1_fd < 0)
7655                         die("failure: io_uring_open_file");
7656
7657                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7658                                                       cred_id, true, NULL);
7659                 if (file1_fd < 0)
7660                         die("failure: io_uring_open_file");
7661
7662                 exit(EXIT_SUCCESS);
7663         }
7664         if (wait_for_pid(pid)) {
7665                 log_stderr("failure: wait_for_pid");
7666                 goto out;
7667         }
7668
7669         fret = 0;
7670         log_debug("Ran test");
7671 out:
7672         ret = io_uring_unregister_personality(ring, cred_id);
7673         if (ret)
7674                 log_stderr("failure: io_uring_unregister_personality");
7675
7676 out_unmap:
7677         munmap(ring, sizeof(struct io_uring));
7678
7679         safe_close(attr.userns_fd);
7680         safe_close(file1_fd);
7681         safe_close(open_tree_fd);
7682
7683         return fret;
7684 }
7685
7686 static int io_uring_idmapped_unmapped_userns(void)
7687 {
7688         int fret = -1;
7689         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7690         struct io_uring *ring;
7691         struct mount_attr attr = {
7692                 .attr_set = MOUNT_ATTR_IDMAP,
7693         };
7694         int cred_id, ret, ret_cqe;
7695         pid_t pid;
7696
7697         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7698                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7699         if (!ring)
7700                 return log_errno(-1, "failure: io_uring_queue_init");
7701
7702         ret = io_uring_queue_init(8, ring, 0);
7703         if (ret) {
7704                 log_stderr("failure: io_uring_queue_init");
7705                 goto out_unmap;
7706         }
7707
7708         ret = io_uring_register_personality(ring);
7709         if (ret < 0) {
7710                 fret = 0;
7711                 goto out_unmap; /* personalities not supported */
7712         }
7713         cred_id = ret;
7714
7715         /* create file only owner can open */
7716         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7717         if (file1_fd < 0) {
7718                 log_stderr("failure: openat");
7719                 goto out;
7720         }
7721         if (fchown(file1_fd, 0, 0)) {
7722                 log_stderr("failure: fchown");
7723                 goto out;
7724         }
7725         if (fchmod(file1_fd, 0600)) {
7726                 log_stderr("failure: fchmod");
7727                 goto out;
7728         }
7729         safe_close(file1_fd);
7730
7731         /* Changing mount properties on a detached mount. */
7732         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7733         if (attr.userns_fd < 0)
7734                 return log_errno(-1, "failure: create user namespace");
7735
7736         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7737                                      AT_EMPTY_PATH |
7738                                      AT_NO_AUTOMOUNT |
7739                                      AT_SYMLINK_NOFOLLOW |
7740                                      OPEN_TREE_CLOEXEC |
7741                                      OPEN_TREE_CLONE);
7742         if (open_tree_fd < 0)
7743                 return log_errno(-1, "failure: create detached mount");
7744
7745         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7746                 return log_errno(-1, "failure: set mount attributes");
7747
7748         pid = fork();
7749         if (pid < 0) {
7750                 log_stderr("failure: fork");
7751                 goto out;
7752         }
7753         if (pid == 0) {
7754                 if (!caps_supported()) {
7755                         log_debug("skip: capability library not installed");
7756                         exit(EXIT_SUCCESS);
7757                 }
7758
7759                 if (!switch_userns(attr.userns_fd, 10000, 10000, true))
7760                         die("failure: switch_ids");
7761
7762                 ret_cqe = 0;
7763                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7764                                                       cred_id, false, &ret_cqe);
7765                 if (file1_fd >= 0)
7766                         die("failure: io_uring_open_file");
7767                 if (ret_cqe == 0)
7768                         die("failure: non-open() related io_uring_open_file failure");
7769                 if (ret_cqe != -EACCES)
7770                         die("failure: errno(%d)", abs(ret_cqe));
7771
7772                 ret_cqe = 0;
7773                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7774                                                       cred_id, true, &ret_cqe);
7775                 if (file1_fd >= 0)
7776                         die("failure: io_uring_open_file");
7777                 if (ret_cqe == 0)
7778                         die("failure: non-open() related io_uring_open_file failure");
7779                 if (ret_cqe != -EACCES)
7780                         die("failure: errno(%d)", abs(ret_cqe));
7781
7782                 exit(EXIT_SUCCESS);
7783         }
7784         if (wait_for_pid(pid)) {
7785                 log_stderr("failure: wait_for_pid");
7786                 goto out;
7787         }
7788
7789         fret = 0;
7790         log_debug("Ran test");
7791 out:
7792         ret = io_uring_unregister_personality(ring, cred_id);
7793         if (ret)
7794                 log_stderr("failure: io_uring_unregister_personality");
7795
7796 out_unmap:
7797         munmap(ring, sizeof(struct io_uring));
7798
7799         safe_close(attr.userns_fd);
7800         safe_close(file1_fd);
7801         safe_close(open_tree_fd);
7802
7803         return fret;
7804 }
7805 #endif /* HAVE_LIBURING_H */
7806
7807 /* The following tests are concerned with setgid inheritance. These can be
7808  * filesystem type specific. For xfs, if a new file or directory is created
7809  * within a setgid directory and irix_sgid_inhiert is set then inherit the
7810  * setgid bit if the caller is in the group of the directory.
7811  */
7812 static int setgid_create(void)
7813 {
7814         int fret = -1;
7815         int file1_fd = -EBADF;
7816         pid_t pid;
7817
7818         if (!caps_supported())
7819                 return 0;
7820
7821         if (fchmod(t_dir1_fd, S_IRUSR |
7822                               S_IWUSR |
7823                               S_IRGRP |
7824                               S_IWGRP |
7825                               S_IROTH |
7826                               S_IWOTH |
7827                               S_IXUSR |
7828                               S_IXGRP |
7829                               S_IXOTH |
7830                               S_ISGID), 0) {
7831                 log_stderr("failure: fchmod");
7832                 goto out;
7833         }
7834
7835         /* Verify that the setgid bit got raised. */
7836         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7837                 log_stderr("failure: is_setgid");
7838                 goto out;
7839         }
7840
7841         pid = fork();
7842         if (pid < 0) {
7843                 log_stderr("failure: fork");
7844                 goto out;
7845         }
7846         if (pid == 0) {
7847                 /* create regular file via open() */
7848                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7849                 if (file1_fd < 0)
7850                         die("failure: create");
7851
7852                 /* We're capable_wrt_inode_uidgid() and also our fsgid matches
7853                  * the directories gid.
7854                  */
7855                 if (!is_setgid(t_dir1_fd, FILE1, 0))
7856                         die("failure: is_setgid");
7857
7858                 /* create directory */
7859                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7860                         die("failure: create");
7861
7862                 /* Directories always inherit the setgid bit. */
7863                 if (!is_setgid(t_dir1_fd, DIR1, 0))
7864                         die("failure: is_setgid");
7865
7866                 if (unlinkat(t_dir1_fd, FILE1, 0))
7867                         die("failure: delete");
7868
7869                 if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR))
7870                         die("failure: delete");
7871
7872                 exit(EXIT_SUCCESS);
7873         }
7874         if (wait_for_pid(pid))
7875                 goto out;
7876
7877         pid = fork();
7878         if (pid < 0) {
7879                 log_stderr("failure: fork");
7880                 goto out;
7881         }
7882         if (pid == 0) {
7883                 if (!switch_ids(0, 10000))
7884                         die("failure: switch_ids");
7885
7886                 if (!caps_down())
7887                         die("failure: caps_down");
7888
7889                 /* create regular file via open() */
7890                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7891                 if (file1_fd < 0)
7892                         die("failure: create");
7893
7894                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
7895                  * bit needs to be stripped.
7896                  */
7897                 if (is_setgid(t_dir1_fd, FILE1, 0))
7898                         die("failure: is_setgid");
7899
7900                 /* create directory */
7901                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7902                         die("failure: create");
7903
7904                 if (xfs_irix_sgid_inherit_enabled()) {
7905                         /* We're not in_group_p(). */
7906                         if (is_setgid(t_dir1_fd, DIR1, 0))
7907                                 die("failure: is_setgid");
7908                 } else {
7909                         /* Directories always inherit the setgid bit. */
7910                         if (!is_setgid(t_dir1_fd, DIR1, 0))
7911                                 die("failure: is_setgid");
7912                 }
7913
7914                 exit(EXIT_SUCCESS);
7915         }
7916         if (wait_for_pid(pid))
7917                 goto out;
7918
7919         fret = 0;
7920         log_debug("Ran test");
7921 out:
7922         safe_close(file1_fd);
7923
7924         return fret;
7925 }
7926
7927 static int setgid_create_idmapped(void)
7928 {
7929         int fret = -1;
7930         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7931         struct mount_attr attr = {
7932                 .attr_set = MOUNT_ATTR_IDMAP,
7933         };
7934         pid_t pid;
7935
7936         if (!caps_supported())
7937                 return 0;
7938
7939         if (fchmod(t_dir1_fd, S_IRUSR |
7940                               S_IWUSR |
7941                               S_IRGRP |
7942                               S_IWGRP |
7943                               S_IROTH |
7944                               S_IWOTH |
7945                               S_IXUSR |
7946                               S_IXGRP |
7947                               S_IXOTH |
7948                               S_ISGID), 0) {
7949                 log_stderr("failure: fchmod");
7950                 goto out;
7951         }
7952
7953         /* Verify that the sid bits got raised. */
7954         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7955                 log_stderr("failure: is_setgid");
7956                 goto out;
7957         }
7958
7959         /* Changing mount properties on a detached mount. */
7960         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7961         if (attr.userns_fd < 0) {
7962                 log_stderr("failure: get_userns_fd");
7963                 goto out;
7964         }
7965
7966         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7967                                      AT_EMPTY_PATH |
7968                                      AT_NO_AUTOMOUNT |
7969                                      AT_SYMLINK_NOFOLLOW |
7970                                      OPEN_TREE_CLOEXEC |
7971                                      OPEN_TREE_CLONE);
7972         if (open_tree_fd < 0) {
7973                 log_stderr("failure: sys_open_tree");
7974                 goto out;
7975         }
7976
7977         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
7978                 log_stderr("failure: sys_mount_setattr");
7979                 goto out;
7980         }
7981
7982         pid = fork();
7983         if (pid < 0) {
7984                 log_stderr("failure: fork");
7985                 goto out;
7986         }
7987         if (pid == 0) {
7988                 if (!switch_ids(10000, 11000))
7989                         die("failure: switch fsids");
7990
7991                 /* create regular file via open() */
7992                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7993                 if (file1_fd < 0)
7994                         die("failure: create");
7995
7996                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
7997                  * bit needs to be stripped.
7998                  */
7999                 if (is_setgid(open_tree_fd, FILE1, 0))
8000                         die("failure: is_setgid");
8001
8002                 /* create directory */
8003                 if (mkdirat(open_tree_fd, DIR1, 0000))
8004                         die("failure: create");
8005
8006                 if (xfs_irix_sgid_inherit_enabled()) {
8007                         /* We're not in_group_p(). */
8008                         if (is_setgid(open_tree_fd, DIR1, 0))
8009                                 die("failure: is_setgid");
8010                 } else {
8011                         /* Directories always inherit the setgid bit. */
8012                         if (!is_setgid(open_tree_fd, DIR1, 0))
8013                                 die("failure: is_setgid");
8014                 }
8015
8016                 exit(EXIT_SUCCESS);
8017         }
8018         if (wait_for_pid(pid))
8019                 goto out;
8020
8021         fret = 0;
8022         log_debug("Ran test");
8023 out:
8024         safe_close(attr.userns_fd);
8025         safe_close(file1_fd);
8026         safe_close(open_tree_fd);
8027
8028         return fret;
8029 }
8030
8031 static int setgid_create_idmapped_in_userns(void)
8032 {
8033         int fret = -1;
8034         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8035         struct mount_attr attr = {
8036                 .attr_set = MOUNT_ATTR_IDMAP,
8037         };
8038         pid_t pid;
8039
8040         if (!caps_supported())
8041                 return 0;
8042
8043         if (fchmod(t_dir1_fd, S_IRUSR |
8044                               S_IWUSR |
8045                               S_IRGRP |
8046                               S_IWGRP |
8047                               S_IROTH |
8048                               S_IWOTH |
8049                               S_IXUSR |
8050                               S_IXGRP |
8051                               S_IXOTH |
8052                               S_ISGID), 0) {
8053                 log_stderr("failure: fchmod");
8054                 goto out;
8055         }
8056
8057         /* Verify that the sid bits got raised. */
8058         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
8059                 log_stderr("failure: is_setgid");
8060                 goto out;
8061         }
8062
8063         /* Changing mount properties on a detached mount. */
8064         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8065         if (attr.userns_fd < 0) {
8066                 log_stderr("failure: get_userns_fd");
8067                 goto out;
8068         }
8069
8070         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8071                                      AT_EMPTY_PATH |
8072                                      AT_NO_AUTOMOUNT |
8073                                      AT_SYMLINK_NOFOLLOW |
8074                                      OPEN_TREE_CLOEXEC |
8075                                      OPEN_TREE_CLONE);
8076         if (open_tree_fd < 0) {
8077                 log_stderr("failure: sys_open_tree");
8078                 goto out;
8079         }
8080
8081         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8082                 log_stderr("failure: sys_mount_setattr");
8083                 goto out;
8084         }
8085
8086         pid = fork();
8087         if (pid < 0) {
8088                 log_stderr("failure: fork");
8089                 goto out;
8090         }
8091         if (pid == 0) {
8092                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8093                         die("failure: switch_userns");
8094
8095                 /* create regular file via open() */
8096                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8097                 if (file1_fd < 0)
8098                         die("failure: create");
8099
8100                 /* We're in_group_p() and capable_wrt_inode_uidgid() so setgid
8101                  * bit needs to be set.
8102                  */
8103                 if (!is_setgid(open_tree_fd, FILE1, 0))
8104                         die("failure: is_setgid");
8105
8106                 /* create directory */
8107                 if (mkdirat(open_tree_fd, DIR1, 0000))
8108                         die("failure: create");
8109
8110                 /* Directories always inherit the setgid bit. */
8111                 if (!is_setgid(open_tree_fd, DIR1, 0))
8112                         die("failure: is_setgid");
8113
8114                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8115                         die("failure: check ownership");
8116
8117                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8118                         die("failure: check ownership");
8119
8120                 if (unlinkat(open_tree_fd, FILE1, 0))
8121                         die("failure: delete");
8122
8123                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8124                         die("failure: delete");
8125
8126                 exit(EXIT_SUCCESS);
8127         }
8128         if (wait_for_pid(pid))
8129                 goto out;
8130
8131         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8132                 log_stderr("failure: fchownat");
8133                 goto out;
8134         }
8135
8136         pid = fork();
8137         if (pid < 0) {
8138                 log_stderr("failure: fork");
8139                 goto out;
8140         }
8141         if (pid == 0) {
8142                 if (!caps_supported()) {
8143                         log_debug("skip: capability library not installed");
8144                         exit(EXIT_SUCCESS);
8145                 }
8146
8147                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8148                         die("failure: switch_userns");
8149
8150                 /* create regular file via open() */
8151                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8152                 if (file1_fd < 0)
8153                         die("failure: create");
8154
8155                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8156                  * bit needs to be stripped.
8157                  */
8158                 if (is_setgid(open_tree_fd, FILE1, 0))
8159                         die("failure: is_setgid");
8160
8161                 /* create directory */
8162                 if (mkdirat(open_tree_fd, DIR1, 0000))
8163                         die("failure: create");
8164
8165                 if (xfs_irix_sgid_inherit_enabled()) {
8166                         /* We're not in_group_p(). */
8167                         if (is_setgid(open_tree_fd, DIR1, 0))
8168                                 die("failure: is_setgid");
8169                 } else {
8170                         /* Directories always inherit the setgid bit. */
8171                         if (!is_setgid(open_tree_fd, DIR1, 0))
8172                                 die("failure: is_setgid");
8173                 }
8174
8175                 /* Files and directories created in setgid directories inherit
8176                  * the i_gid of the parent directory.
8177                  */
8178                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8179                         die("failure: check ownership");
8180
8181                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 1000))
8182                         die("failure: check ownership");
8183
8184                 if (unlinkat(open_tree_fd, FILE1, 0))
8185                         die("failure: delete");
8186
8187                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8188                         die("failure: delete");
8189
8190                 exit(EXIT_SUCCESS);
8191         }
8192         if (wait_for_pid(pid))
8193                 goto out;
8194
8195         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8196                 log_stderr("failure: fchownat");
8197                 goto out;
8198         }
8199
8200         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8201                 log_stderr("failure: fchownat");
8202                 goto out;
8203         }
8204
8205         pid = fork();
8206         if (pid < 0) {
8207                 log_stderr("failure: fork");
8208                 goto out;
8209         }
8210         if (pid == 0) {
8211                 if (!caps_supported()) {
8212                         log_debug("skip: capability library not installed");
8213                         exit(EXIT_SUCCESS);
8214                 }
8215
8216                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8217                         die("failure: switch_userns");
8218
8219                 /* create regular file via open() */
8220                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8221                 if (file1_fd < 0)
8222                         die("failure: create");
8223
8224                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8225                  * bit needs to be stripped.
8226                  */
8227                 if (is_setgid(open_tree_fd, FILE1, 0))
8228                         die("failure: is_setgid");
8229
8230                 /* create directory */
8231                 if (mkdirat(open_tree_fd, DIR1, 0000))
8232                         die("failure: create");
8233
8234                 /* Directories always inherit the setgid bit. */
8235                 if (xfs_irix_sgid_inherit_enabled()) {
8236                         /* We're not in_group_p(). */
8237                         if (is_setgid(open_tree_fd, DIR1, 0))
8238                                 die("failure: is_setgid");
8239                 } else {
8240                         /* Directories always inherit the setgid bit. */
8241                         if (!is_setgid(open_tree_fd, DIR1, 0))
8242                                 die("failure: is_setgid");
8243                 }
8244
8245                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8246                         die("failure: check ownership");
8247
8248                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8249                         die("failure: check ownership");
8250
8251                 exit(EXIT_SUCCESS);
8252         }
8253         if (wait_for_pid(pid))
8254                 goto out;
8255
8256         fret = 0;
8257         log_debug("Ran test");
8258 out:
8259         safe_close(attr.userns_fd);
8260         safe_close(file1_fd);
8261         safe_close(open_tree_fd);
8262
8263         return fret;
8264 }
8265
8266 #define PTR_TO_INT(p) ((int)((intptr_t)(p)))
8267 #define INT_TO_PTR(u) ((void *)((intptr_t)(u)))
8268
8269 static void *idmapped_mount_create_cb(void *data)
8270 {
8271         int fret = EXIT_FAILURE, open_tree_fd = PTR_TO_INT(data);
8272         struct mount_attr attr = {
8273                 .attr_set = MOUNT_ATTR_IDMAP,
8274         };
8275
8276         /* Changing mount properties on a detached mount. */
8277         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8278         if (attr.userns_fd < 0) {
8279                 log_stderr("failure: get_userns_fd");
8280                 goto out;
8281         }
8282
8283         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8284                 log_stderr("failure: sys_mount_setattr");
8285                 goto out;
8286         }
8287
8288         fret = EXIT_SUCCESS;
8289
8290 out:
8291         safe_close(attr.userns_fd);
8292         pthread_exit(INT_TO_PTR(fret));
8293 }
8294
8295 /* This tries to verify that we never see an inconistent ownership on-disk and
8296  * can't write invalid ids to disk. To do this we create a race between
8297  * idmapping a mount and creating files on it.
8298  * Note, while it is perfectly fine to see overflowuid and overflowgid as owner
8299  * if we create files through the open_tree_fd before the mount is idmapped but
8300  * look at the files after the mount has been idmapped in this test it can never
8301  * be the case that we see overflowuid and overflowgid when we access the file
8302  * through a non-idmapped mount (in the initial user namespace).
8303  */
8304 static void *idmapped_mount_operations_cb(void *data)
8305 {
8306         int file1_fd = -EBADF, file2_fd = -EBADF, dir1_fd = -EBADF,
8307             dir1_fd2 = -EBADF, fret = EXIT_FAILURE,
8308             open_tree_fd = PTR_TO_INT(data);
8309
8310         if (!switch_fsids(10000, 10000)) {
8311                 log_stderr("failure: switch fsids");
8312                 goto out;
8313         }
8314
8315         file1_fd = openat(open_tree_fd, FILE1,
8316                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8317         if (file1_fd < 0) {
8318                 log_stderr("failure: openat");
8319                 goto out;
8320         }
8321
8322         file2_fd = openat(open_tree_fd, FILE2,
8323                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8324         if (file2_fd < 0) {
8325                 log_stderr("failure: openat");
8326                 goto out;
8327         }
8328
8329         if (mkdirat(open_tree_fd, DIR1, 0777)) {
8330                 log_stderr("failure: mkdirat");
8331                 goto out;
8332         }
8333
8334         dir1_fd = openat(open_tree_fd, DIR1,
8335                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8336         if (dir1_fd < 0) {
8337                 log_stderr("failure: openat");
8338                 goto out;
8339         }
8340
8341         if (!__expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0, false) &&
8342             !__expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000, false) &&
8343             !__expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid, false)) {
8344                 log_stderr("failure: expected_uid_gid");
8345                 goto out;
8346         }
8347
8348         if (!__expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0, false) &&
8349             !__expected_uid_gid(open_tree_fd, FILE2, 0, 10000, 10000, false) &&
8350             !__expected_uid_gid(open_tree_fd, FILE2, 0, t_overflowuid, t_overflowgid, false)) {
8351                 log_stderr("failure: expected_uid_gid");
8352                 goto out;
8353         }
8354
8355         if (!__expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0, false) &&
8356             !__expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000, false) &&
8357             !__expected_uid_gid(open_tree_fd, DIR1, 0, t_overflowuid, t_overflowgid, false)) {
8358                 log_stderr("failure: expected_uid_gid");
8359                 goto out;
8360         }
8361
8362         if (!__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 0, 0, false) &&
8363             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 10000, 10000, false) &&
8364             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, t_overflowuid, t_overflowgid, false)) {
8365                 log_stderr("failure: expected_uid_gid");
8366                 goto out;
8367         }
8368
8369         dir1_fd2 = openat(t_dir1_fd, DIR1,
8370                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8371         if (dir1_fd2 < 0) {
8372                 log_stderr("failure: openat");
8373                 goto out;
8374         }
8375
8376         if (!__expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0, false) &&
8377             !__expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000, false)) {
8378                 log_stderr("failure: expected_uid_gid");
8379                 goto out;
8380         }
8381
8382         if (!__expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0, false) &&
8383             !__expected_uid_gid(t_dir1_fd, FILE2, 0, 10000, 10000, false)) {
8384                 log_stderr("failure: expected_uid_gid");
8385                 goto out;
8386         }
8387
8388         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8389             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8390                 log_stderr("failure: expected_uid_gid");
8391                 goto out;
8392         }
8393
8394         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8395             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8396                 log_stderr("failure: expected_uid_gid");
8397                 goto out;
8398         }
8399
8400         if (!__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 0, 0, false) &&
8401             !__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 10000, 10000, false)) {
8402                 log_stderr("failure: expected_uid_gid");
8403                 goto out;
8404         }
8405
8406         fret = EXIT_SUCCESS;
8407
8408 out:
8409         safe_close(file1_fd);
8410         safe_close(file2_fd);
8411         safe_close(dir1_fd);
8412         safe_close(dir1_fd2);
8413
8414         pthread_exit(INT_TO_PTR(fret));
8415 }
8416
8417 static int threaded_idmapped_mount_interactions(void)
8418 {
8419         int i;
8420         int fret = -1;
8421         pid_t pid;
8422         pthread_attr_t thread_attr;
8423         pthread_t threads[2];
8424
8425         pthread_attr_init(&thread_attr);
8426
8427         for (i = 0; i < 1000; i++) {
8428                 int ret1 = 0, ret2 = 0, tret1 = 0, tret2 = 0;
8429
8430                 pid = fork();
8431                 if (pid < 0) {
8432                         log_stderr("failure: fork");
8433                         goto out;
8434                 }
8435                 if (pid == 0) {
8436                         int open_tree_fd = -EBADF;
8437
8438                         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8439                                                      AT_EMPTY_PATH |
8440                                                      AT_NO_AUTOMOUNT |
8441                                                      AT_SYMLINK_NOFOLLOW |
8442                                                      OPEN_TREE_CLOEXEC |
8443                                                      OPEN_TREE_CLONE);
8444                         if (open_tree_fd < 0)
8445                                 die("failure: sys_open_tree");
8446
8447                         if (pthread_create(&threads[0], &thread_attr,
8448                                            idmapped_mount_create_cb,
8449                                            INT_TO_PTR(open_tree_fd)))
8450                                 die("failure: pthread_create");
8451
8452                         if (pthread_create(&threads[1], &thread_attr,
8453                                            idmapped_mount_operations_cb,
8454                                            INT_TO_PTR(open_tree_fd)))
8455                                 die("failure: pthread_create");
8456
8457                         ret1 = pthread_join(threads[0], INT_TO_PTR(tret1));
8458                         ret2 = pthread_join(threads[1], INT_TO_PTR(tret2));
8459
8460                         if (ret1) {
8461                                 errno = ret1;
8462                                 die("failure: pthread_join");
8463                         }
8464
8465                         if (ret2) {
8466                                 errno = ret2;
8467                                 die("failure: pthread_join");
8468                         }
8469
8470                         if (tret1 || tret2)
8471                                 exit(EXIT_FAILURE);
8472
8473                         exit(EXIT_SUCCESS);
8474
8475                 }
8476
8477                 if (wait_for_pid(pid)) {
8478                         log_stderr("failure: iteration %d", i);
8479                         goto out;
8480                 }
8481
8482                 rm_r(t_dir1_fd, ".");
8483
8484         }
8485
8486         fret = 0;
8487         log_debug("Ran test");
8488
8489 out:
8490         return fret;
8491 }
8492
8493 static int setattr_truncate(void)
8494 {
8495         int fret = -1;
8496         int file1_fd = -EBADF;
8497
8498         /* create regular file via open() */
8499         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8500         if (file1_fd < 0) {
8501                 log_stderr("failure: create");
8502                 goto out;
8503         }
8504
8505         if (ftruncate(file1_fd, 10000)) {
8506                 log_stderr("failure: ftruncate");
8507                 goto out;
8508         }
8509
8510         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8511                 log_stderr("failure: check ownership");
8512                 goto out;
8513         }
8514
8515         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 10000)) {
8516                 log_stderr("failure: expected_file_size");
8517                 goto out;
8518         }
8519
8520         if (ftruncate(file1_fd, 0)) {
8521                 log_stderr("failure: ftruncate");
8522                 goto out;
8523         }
8524
8525         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8526                 log_stderr("failure: check ownership");
8527                 goto out;
8528         }
8529
8530         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 0)) {
8531                 log_stderr("failure: expected_file_size");
8532                 goto out;
8533         }
8534
8535         if (unlinkat(t_dir1_fd, FILE1, 0)) {
8536                 log_stderr("failure: remove");
8537                 goto out;
8538         }
8539
8540         fret = 0;
8541         log_debug("Ran test");
8542 out:
8543         safe_close(file1_fd);
8544
8545         return fret;
8546 }
8547
8548 static int setattr_truncate_idmapped(void)
8549 {
8550         int fret = -1;
8551         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8552         pid_t pid;
8553         struct mount_attr attr = {
8554                 .attr_set = MOUNT_ATTR_IDMAP,
8555         };
8556
8557         /* Changing mount properties on a detached mount. */
8558         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8559         if (attr.userns_fd < 0) {
8560                 log_stderr("failure: get_userns_fd");
8561                 goto out;
8562         }
8563
8564         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8565                                      AT_EMPTY_PATH |
8566                                      AT_NO_AUTOMOUNT |
8567                                      AT_SYMLINK_NOFOLLOW |
8568                                      OPEN_TREE_CLOEXEC |
8569                                      OPEN_TREE_CLONE);
8570         if (open_tree_fd < 0) {
8571                 log_stderr("failure: sys_open_tree");
8572                 goto out;
8573         }
8574
8575         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8576                 log_stderr("failure: sys_mount_setattr");
8577                 goto out;
8578         }
8579
8580         pid = fork();
8581         if (pid < 0) {
8582                 log_stderr("failure: fork");
8583                 goto out;
8584         }
8585         if (pid == 0) {
8586                 if (!switch_ids(10000, 10000))
8587                         die("failure: switch_ids");
8588
8589                 /* create regular file via open() */
8590                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8591                 if (file1_fd < 0)
8592                         die("failure: create");
8593
8594                 if (ftruncate(file1_fd, 10000))
8595                         die("failure: ftruncate");
8596
8597                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8598                         die("failure: check ownership");
8599
8600                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8601                         die("failure: expected_file_size");
8602
8603                 if (ftruncate(file1_fd, 0))
8604                         die("failure: ftruncate");
8605
8606                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8607                         die("failure: check ownership");
8608
8609                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8610                         die("failure: expected_file_size");
8611
8612                 exit(EXIT_SUCCESS);
8613         }
8614         if (wait_for_pid(pid))
8615                 goto out;
8616
8617         pid = fork();
8618         if (pid < 0) {
8619                 log_stderr("failure: fork");
8620                 goto out;
8621         }
8622         if (pid == 0) {
8623                 int file1_fd2 = -EBADF;
8624
8625                 /* create regular file via open() */
8626                 file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8627                 if (file1_fd2 < 0)
8628                         die("failure: create");
8629
8630                 if (ftruncate(file1_fd2, 10000))
8631                         die("failure: ftruncate");
8632
8633                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8634                         die("failure: check ownership");
8635
8636                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8637                         die("failure: expected_file_size");
8638
8639                 if (ftruncate(file1_fd2, 0))
8640                         die("failure: ftruncate");
8641
8642                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8643                         die("failure: check ownership");
8644
8645                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8646                         die("failure: expected_file_size");
8647
8648                 exit(EXIT_SUCCESS);
8649         }
8650         if (wait_for_pid(pid))
8651                 goto out;
8652
8653         fret = 0;
8654         log_debug("Ran test");
8655 out:
8656         safe_close(file1_fd);
8657         safe_close(open_tree_fd);
8658
8659         return fret;
8660 }
8661
8662 static int setattr_truncate_idmapped_in_userns(void)
8663 {
8664         int fret = -1;
8665         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8666         struct mount_attr attr = {
8667                 .attr_set = MOUNT_ATTR_IDMAP,
8668         };
8669         pid_t pid;
8670
8671         /* Changing mount properties on a detached mount. */
8672         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8673         if (attr.userns_fd < 0) {
8674                 log_stderr("failure: get_userns_fd");
8675                 goto out;
8676         }
8677
8678         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8679                                      AT_EMPTY_PATH |
8680                                      AT_NO_AUTOMOUNT |
8681                                      AT_SYMLINK_NOFOLLOW |
8682                                      OPEN_TREE_CLOEXEC |
8683                                      OPEN_TREE_CLONE);
8684         if (open_tree_fd < 0) {
8685                 log_stderr("failure: sys_open_tree");
8686                 goto out;
8687         }
8688
8689         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8690                 log_stderr("failure: sys_mount_setattr");
8691                 goto out;
8692         }
8693
8694         pid = fork();
8695         if (pid < 0) {
8696                 log_stderr("failure: fork");
8697                 goto out;
8698         }
8699         if (pid == 0) {
8700                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8701                         die("failure: switch_userns");
8702
8703                 /* create regular file via open() */
8704                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8705                 if (file1_fd < 0)
8706                         die("failure: create");
8707
8708                 if (ftruncate(file1_fd, 10000))
8709                         die("failure: ftruncate");
8710
8711                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8712                         die("failure: check ownership");
8713
8714                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8715                         die("failure: expected_file_size");
8716
8717                 if (ftruncate(file1_fd, 0))
8718                         die("failure: ftruncate");
8719
8720                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8721                         die("failure: check ownership");
8722
8723                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8724                         die("failure: expected_file_size");
8725
8726                 if (unlinkat(open_tree_fd, FILE1, 0))
8727                         die("failure: delete");
8728
8729                 exit(EXIT_SUCCESS);
8730         }
8731         if (wait_for_pid(pid))
8732                 goto out;
8733
8734         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8735                 log_stderr("failure: fchownat");
8736                 goto out;
8737         }
8738
8739         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8740                 log_stderr("failure: fchownat");
8741                 goto out;
8742         }
8743
8744         pid = fork();
8745         if (pid < 0) {
8746                 log_stderr("failure: fork");
8747                 goto out;
8748         }
8749         if (pid == 0) {
8750                 if (!caps_supported()) {
8751                         log_debug("skip: capability library not installed");
8752                         exit(EXIT_SUCCESS);
8753                 }
8754
8755                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8756                         die("failure: switch_userns");
8757
8758                 /* create regular file via open() */
8759                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8760                 if (file1_fd < 0)
8761                         die("failure: create");
8762
8763                 if (ftruncate(file1_fd, 10000))
8764                         die("failure: ftruncate");
8765
8766                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8767                         die("failure: check ownership");
8768
8769                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8770                         die("failure: expected_file_size");
8771
8772                 if (ftruncate(file1_fd, 0))
8773                         die("failure: ftruncate");
8774
8775                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8776                         die("failure: check ownership");
8777
8778                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8779                         die("failure: expected_file_size");
8780
8781                 if (unlinkat(open_tree_fd, FILE1, 0))
8782                         die("failure: delete");
8783
8784                 exit(EXIT_SUCCESS);
8785         }
8786         if (wait_for_pid(pid))
8787                 goto out;
8788
8789         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8790                 log_stderr("failure: fchownat");
8791                 goto out;
8792         }
8793
8794         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8795                 log_stderr("failure: fchownat");
8796                 goto out;
8797         }
8798
8799         pid = fork();
8800         if (pid < 0) {
8801                 log_stderr("failure: fork");
8802                 goto out;
8803         }
8804         if (pid == 0) {
8805                 if (!caps_supported()) {
8806                         log_debug("skip: capability library not installed");
8807                         exit(EXIT_SUCCESS);
8808                 }
8809
8810                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8811                         die("failure: switch_userns");
8812
8813                 /* create regular file via open() */
8814                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8815                 if (file1_fd < 0)
8816                         die("failure: create");
8817
8818                 if (ftruncate(file1_fd, 10000))
8819                         die("failure: ftruncate");
8820
8821                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8822                         die("failure: check ownership");
8823
8824                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8825                         die("failure: expected_file_size");
8826
8827                 if (ftruncate(file1_fd, 0))
8828                         die("failure: ftruncate");
8829
8830                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8831                         die("failure: check ownership");
8832
8833                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8834                         die("failure: expected_file_size");
8835
8836                 if (unlinkat(open_tree_fd, FILE1, 0))
8837                         die("failure: delete");
8838
8839                 exit(EXIT_SUCCESS);
8840         }
8841         if (wait_for_pid(pid))
8842                 goto out;
8843
8844         fret = 0;
8845         log_debug("Ran test");
8846 out:
8847         safe_close(attr.userns_fd);
8848         safe_close(file1_fd);
8849         safe_close(open_tree_fd);
8850
8851         return fret;
8852 }
8853
8854 static int nested_userns(void)
8855 {
8856         int fret = -1;
8857         int ret;
8858         pid_t pid;
8859         unsigned int id;
8860         struct list *it, *next;
8861         struct userns_hierarchy hierarchy[] = {
8862                 { .level = 1, .fd_userns = -EBADF, },
8863                 { .level = 2, .fd_userns = -EBADF, },
8864                 { .level = 3, .fd_userns = -EBADF, },
8865                 { .level = 4, .fd_userns = -EBADF, },
8866                 /* Dummy entry that marks the end. */
8867                 { .level = MAX_USERNS_LEVEL, .fd_userns = -EBADF, },
8868         };
8869         struct mount_attr attr_level1 = {
8870                 .attr_set       = MOUNT_ATTR_IDMAP,
8871                 .userns_fd      = -EBADF,
8872         };
8873         struct mount_attr attr_level2 = {
8874                 .attr_set       = MOUNT_ATTR_IDMAP,
8875                 .userns_fd      = -EBADF,
8876         };
8877         struct mount_attr attr_level3 = {
8878                 .attr_set       = MOUNT_ATTR_IDMAP,
8879                 .userns_fd      = -EBADF,
8880         };
8881         struct mount_attr attr_level4 = {
8882                 .attr_set       = MOUNT_ATTR_IDMAP,
8883                 .userns_fd      = -EBADF,
8884         };
8885         int fd_dir1 = -EBADF,
8886             fd_open_tree_level1 = -EBADF,
8887             fd_open_tree_level2 = -EBADF,
8888             fd_open_tree_level3 = -EBADF,
8889             fd_open_tree_level4 = -EBADF;
8890         const unsigned int id_file_range = 10000;
8891
8892         list_init(&hierarchy[0].id_map);
8893         list_init(&hierarchy[1].id_map);
8894         list_init(&hierarchy[2].id_map);
8895         list_init(&hierarchy[3].id_map);
8896
8897         /*
8898          * Give a large map to the outermost user namespace so we can create
8899          * comfortable nested maps.
8900          */
8901         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_UID);
8902         if (ret) {
8903                 log_stderr("failure: adding uidmap for userns at level 1");
8904                 goto out;
8905         }
8906
8907         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_GID);
8908         if (ret) {
8909                 log_stderr("failure: adding gidmap for userns at level 1");
8910                 goto out;
8911         }
8912
8913         /* This is uid:0->2000000:100000000 in init userns. */
8914         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_UID);
8915         if (ret) {
8916                 log_stderr("failure: adding uidmap for userns at level 2");
8917                 goto out;
8918         }
8919
8920         /* This is gid:0->2000000:100000000 in init userns. */
8921         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_GID);
8922         if (ret) {
8923                 log_stderr("failure: adding gidmap for userns at level 2");
8924                 goto out;
8925         }
8926
8927         /* This is uid:0->3000000:999 in init userns. */
8928         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_UID);
8929         if (ret) {
8930                 log_stderr("failure: adding uidmap for userns at level 3");
8931                 goto out;
8932         }
8933
8934         /* This is gid:0->3000000:999 in the init userns. */
8935         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_GID);
8936         if (ret) {
8937                 log_stderr("failure: adding gidmap for userns at level 3");
8938                 goto out;
8939         }
8940
8941         /* id 999 will remain unmapped. */
8942
8943         /* This is uid:1000->2001000:1 in init userns. */
8944         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_UID);
8945         if (ret) {
8946                 log_stderr("failure: adding uidmap for userns at level 3");
8947                 goto out;
8948         }
8949
8950         /* This is gid:1000->2001000:1 in init userns. */
8951         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_GID);
8952         if (ret) {
8953                 log_stderr("failure: adding gidmap for userns at level 3");
8954                 goto out;
8955         }
8956
8957         /* This is uid:1001->3001001:10000 in init userns. */
8958         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_UID);
8959         if (ret) {
8960                 log_stderr("failure: adding uidmap for userns at level 3");
8961                 goto out;
8962         }
8963
8964         /* This is gid:1001->3001001:10000 in init userns. */
8965         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_GID);
8966         if (ret) {
8967                 log_stderr("failure: adding gidmap for userns at level 3");
8968                 goto out;
8969         }
8970
8971         /* Don't write a mapping in the 4th userns. */
8972         list_empty(&hierarchy[4].id_map);
8973
8974         /* Create the actual userns hierarchy. */
8975         ret = create_userns_hierarchy(hierarchy);
8976         if (ret) {
8977                 log_stderr("failure: create userns hierarchy");
8978                 goto out;
8979         }
8980
8981         attr_level1.userns_fd = hierarchy[0].fd_userns;
8982         attr_level2.userns_fd = hierarchy[1].fd_userns;
8983         attr_level3.userns_fd = hierarchy[2].fd_userns;
8984         attr_level4.userns_fd = hierarchy[3].fd_userns;
8985
8986         /*
8987          * Create one directory where we create files for each uid/gid within
8988          * the first userns.
8989          */
8990         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
8991                 log_stderr("failure: mkdirat");
8992                 goto out;
8993         }
8994
8995         fd_dir1 = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
8996         if (fd_dir1 < 0) {
8997                 log_stderr("failure: openat");
8998                 goto out;
8999         }
9000
9001         for (id = 0; id <= id_file_range; id++) {
9002                 char file[256];
9003
9004                 snprintf(file, sizeof(file), DIR1 "/" FILE1 "_%u", id);
9005
9006                 if (mknodat(t_dir1_fd, file, S_IFREG | 0644, 0)) {
9007                         log_stderr("failure: create %s", file);
9008                         goto out;
9009                 }
9010
9011                 if (fchownat(t_dir1_fd, file, id, id, AT_SYMLINK_NOFOLLOW)) {
9012                         log_stderr("failure: fchownat %s", file);
9013                         goto out;
9014                 }
9015
9016                 if (!expected_uid_gid(t_dir1_fd, file, 0, id, id)) {
9017                         log_stderr("failure: check ownership %s", file);
9018                         goto out;
9019                 }
9020         }
9021
9022         /* Create detached mounts for all the user namespaces. */
9023         fd_open_tree_level1 = sys_open_tree(t_dir1_fd, DIR1,
9024                                             AT_NO_AUTOMOUNT |
9025                                             AT_SYMLINK_NOFOLLOW |
9026                                             OPEN_TREE_CLOEXEC |
9027                                             OPEN_TREE_CLONE);
9028         if (fd_open_tree_level1 < 0) {
9029                 log_stderr("failure: sys_open_tree");
9030                 goto out;
9031         }
9032
9033         fd_open_tree_level2 = sys_open_tree(t_dir1_fd, DIR1,
9034                                             AT_NO_AUTOMOUNT |
9035                                             AT_SYMLINK_NOFOLLOW |
9036                                             OPEN_TREE_CLOEXEC |
9037                                             OPEN_TREE_CLONE);
9038         if (fd_open_tree_level2 < 0) {
9039                 log_stderr("failure: sys_open_tree");
9040                 goto out;
9041         }
9042
9043         fd_open_tree_level3 = sys_open_tree(t_dir1_fd, DIR1,
9044                                             AT_NO_AUTOMOUNT |
9045                                             AT_SYMLINK_NOFOLLOW |
9046                                             OPEN_TREE_CLOEXEC |
9047                                             OPEN_TREE_CLONE);
9048         if (fd_open_tree_level3 < 0) {
9049                 log_stderr("failure: sys_open_tree");
9050                 goto out;
9051         }
9052
9053         fd_open_tree_level4 = sys_open_tree(t_dir1_fd, DIR1,
9054                                             AT_NO_AUTOMOUNT |
9055                                             AT_SYMLINK_NOFOLLOW |
9056                                             OPEN_TREE_CLOEXEC |
9057                                             OPEN_TREE_CLONE);
9058         if (fd_open_tree_level4 < 0) {
9059                 log_stderr("failure: sys_open_tree");
9060                 goto out;
9061         }
9062
9063         /* Turn detached mounts into detached idmapped mounts. */
9064         if (sys_mount_setattr(fd_open_tree_level1, "", AT_EMPTY_PATH,
9065                               &attr_level1, sizeof(attr_level1))) {
9066                 log_stderr("failure: sys_mount_setattr");
9067                 goto out;
9068         }
9069
9070         if (sys_mount_setattr(fd_open_tree_level2, "", AT_EMPTY_PATH,
9071                               &attr_level2, sizeof(attr_level2))) {
9072                 log_stderr("failure: sys_mount_setattr");
9073                 goto out;
9074         }
9075
9076         if (sys_mount_setattr(fd_open_tree_level3, "", AT_EMPTY_PATH,
9077                               &attr_level3, sizeof(attr_level3))) {
9078                 log_stderr("failure: sys_mount_setattr");
9079                 goto out;
9080         }
9081
9082         if (sys_mount_setattr(fd_open_tree_level4, "", AT_EMPTY_PATH,
9083                               &attr_level4, sizeof(attr_level4))) {
9084                 log_stderr("failure: sys_mount_setattr");
9085                 goto out;
9086         }
9087
9088         /* Verify that ownership looks correct for callers in the init userns. */
9089         for (id = 0; id <= id_file_range; id++) {
9090                 bool bret;
9091                 unsigned int id_level1, id_level2, id_level3;
9092                 char file[256];
9093
9094                 snprintf(file, sizeof(file), FILE1 "_%u", id);
9095
9096                 id_level1 = id + 1000000;
9097                 if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1)) {
9098                         log_stderr("failure: check ownership %s", file);
9099                         goto out;
9100                 }
9101
9102                 id_level2 = id + 2000000;
9103                 if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2)) {
9104                         log_stderr("failure: check ownership %s", file);
9105                         goto out;
9106                 }
9107
9108                 if (id == 999) {
9109                         /* This id is unmapped. */
9110                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9111                 } else if (id == 1000) {
9112                         id_level3 = id + 2000000; /* We punched a hole in the map at 1000. */
9113                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9114                 } else {
9115                         id_level3 = id + 3000000; /* Rest is business as usual. */
9116                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9117                 }
9118                 if (!bret) {
9119                         log_stderr("failure: check ownership %s", file);
9120                         goto out;
9121                 }
9122
9123                 if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid)) {
9124                         log_stderr("failure: check ownership %s", file);
9125                         goto out;
9126                 }
9127         }
9128
9129         /* Verify that ownership looks correct for callers in the first userns. */
9130         pid = fork();
9131         if (pid < 0) {
9132                 log_stderr("failure: fork");
9133                 goto out;
9134         }
9135         if (pid == 0) {
9136                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9137                         die("failure: switch_userns");
9138
9139                 for (id = 0; id <= id_file_range; id++) {
9140                         bool bret;
9141                         unsigned int id_level1, id_level2, id_level3;
9142                         char file[256];
9143
9144                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9145
9146                         id_level1 = id;
9147                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9148                                 die("failure: check ownership %s", file);
9149
9150                         id_level2 = id + 1000000;
9151                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9152                                 die("failure: check ownership %s", file);
9153
9154                         if (id == 999) {
9155                                 /* This id is unmapped. */
9156                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9157                         } else if (id == 1000) {
9158                                 id_level3 = id + 1000000; /* We punched a hole in the map at 1000. */
9159                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9160                         } else {
9161                                 id_level3 = id + 2000000; /* Rest is business as usual. */
9162                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9163                         }
9164                         if (!bret)
9165                                 die("failure: check ownership %s", file);
9166
9167                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9168                                 die("failure: check ownership %s", file);
9169                 }
9170
9171                 exit(EXIT_SUCCESS);
9172         }
9173         if (wait_for_pid(pid))
9174                 goto out;
9175
9176         /* Verify that ownership looks correct for callers in the second userns. */
9177         pid = fork();
9178         if (pid < 0) {
9179                 log_stderr("failure: fork");
9180                 goto out;
9181         }
9182         if (pid == 0) {
9183                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9184                         die("failure: switch_userns");
9185
9186                 for (id = 0; id <= id_file_range; id++) {
9187                         bool bret;
9188                         unsigned int id_level2, id_level3;
9189                         char file[256];
9190
9191                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9192
9193                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9194                                 die("failure: check ownership %s", file);
9195
9196                         id_level2 = id;
9197                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9198                                 die("failure: check ownership %s", file);
9199
9200                         if (id == 999) {
9201                                 /* This id is unmapped. */
9202                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9203                         } else if (id == 1000) {
9204                                 id_level3 = id; /* We punched a hole in the map at 1000. */
9205                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9206                         } else {
9207                                 id_level3 = id + 1000000; /* Rest is business as usual. */
9208                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9209                         }
9210                         if (!bret)
9211                                 die("failure: check ownership %s", file);
9212
9213                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9214                                 die("failure: check ownership %s", file);
9215                 }
9216
9217                 exit(EXIT_SUCCESS);
9218         }
9219         if (wait_for_pid(pid))
9220                 goto out;
9221
9222         /* Verify that ownership looks correct for callers in the third userns. */
9223         pid = fork();
9224         if (pid < 0) {
9225                 log_stderr("failure: fork");
9226                 goto out;
9227         }
9228         if (pid == 0) {
9229                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9230                         die("failure: switch_userns");
9231
9232                 for (id = 0; id <= id_file_range; id++) {
9233                         bool bret;
9234                         unsigned int id_level2, id_level3;
9235                         char file[256];
9236
9237                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9238
9239                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9240                                 die("failure: check ownership %s", file);
9241
9242                         if (id == 1000) {
9243                                 /*
9244                                  * The idmapping of the third userns has a hole
9245                                  * at uid/gid 1000. That means:
9246                                  * - 1000->userns_0(2000000) // init userns
9247                                  * - 1000->userns_1(2000000) // level 1
9248                                  * - 1000->userns_2(1000000) // level 2
9249                                  * - 1000->userns_3(1000)    // level 3 (because level 3 has a hole)
9250                                  */
9251                                 id_level2 = id;
9252                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2);
9253                         } else {
9254                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid);
9255                         }
9256                         if (!bret)
9257                                 die("failure: check ownership %s", file);
9258
9259
9260                         if (id == 999) {
9261                                 /* This id is unmapped. */
9262                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9263                         } else {
9264                                 id_level3 = id; /* Rest is business as usual. */
9265                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9266                         }
9267                         if (!bret)
9268                                 die("failure: check ownership %s", file);
9269
9270                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9271                                 die("failure: check ownership %s", file);
9272                 }
9273
9274                 exit(EXIT_SUCCESS);
9275         }
9276         if (wait_for_pid(pid))
9277                 goto out;
9278
9279         /* Verify that ownership looks correct for callers in the fourth userns. */
9280         pid = fork();
9281         if (pid < 0) {
9282                 log_stderr("failure: fork");
9283                 goto out;
9284         }
9285         if (pid == 0) {
9286                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9287                         die("failure: switch_userns");
9288
9289                 for (id = 0; id <= id_file_range; id++) {
9290                         char file[256];
9291
9292                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9293
9294                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9295                                 die("failure: check ownership %s", file);
9296
9297                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9298                                 die("failure: check ownership %s", file);
9299
9300                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9301                                 die("failure: check ownership %s", file);
9302
9303                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9304                                 die("failure: check ownership %s", file);
9305                 }
9306
9307                 exit(EXIT_SUCCESS);
9308         }
9309         if (wait_for_pid(pid))
9310                 goto out;
9311
9312         /* Verify that chown works correctly for callers in the first userns. */
9313         pid = fork();
9314         if (pid < 0) {
9315                 log_stderr("failure: fork");
9316                 goto out;
9317         }
9318         if (pid == 0) {
9319                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9320                         die("failure: switch_userns");
9321
9322                 for (id = 0; id <= id_file_range; id++) {
9323                         bool bret;
9324                         unsigned int id_level1, id_level2, id_level3, id_new;
9325                         char file[256];
9326
9327                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9328
9329                         id_new = id + 1;
9330                         if (fchownat(fd_open_tree_level1, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9331                                 die("failure: fchownat %s", file);
9332
9333                         id_level1 = id_new;
9334                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9335                                 die("failure: check ownership %s", file);
9336
9337                         id_level2 = id_new + 1000000;
9338                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9339                                 die("failure: check ownership %s", file);
9340
9341                         if (id_new == 999) {
9342                                 /* This id is unmapped. */
9343                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9344                         } else if (id_new == 1000) {
9345                                 id_level3 = id_new + 1000000; /* We punched a hole in the map at 1000. */
9346                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9347                         } else {
9348                                 id_level3 = id_new + 2000000; /* Rest is business as usual. */
9349                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9350                         }
9351                         if (!bret)
9352                                 die("failure: check ownership %s", file);
9353
9354                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9355                                 die("failure: check ownership %s", file);
9356
9357                         /* Revert ownership. */
9358                         if (fchownat(fd_open_tree_level1, file, id, id, AT_SYMLINK_NOFOLLOW))
9359                                 die("failure: fchownat %s", file);
9360                 }
9361
9362                 exit(EXIT_SUCCESS);
9363         }
9364         if (wait_for_pid(pid))
9365                 goto out;
9366
9367         /* Verify that chown works correctly for callers in the second userns. */
9368         pid = fork();
9369         if (pid < 0) {
9370                 log_stderr("failure: fork");
9371                 goto out;
9372         }
9373         if (pid == 0) {
9374                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9375                         die("failure: switch_userns");
9376
9377                 for (id = 0; id <= id_file_range; id++) {
9378                         bool bret;
9379                         unsigned int id_level2, id_level3, id_new;
9380                         char file[256];
9381
9382                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9383
9384                         id_new = id + 1;
9385                         if (fchownat(fd_open_tree_level2, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9386                                 die("failure: fchownat %s", file);
9387
9388                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9389                                 die("failure: check ownership %s", file);
9390
9391                         id_level2 = id_new;
9392                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9393                                 die("failure: check ownership %s", file);
9394
9395                         if (id_new == 999) {
9396                                 /* This id is unmapped. */
9397                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9398                         } else if (id_new == 1000) {
9399                                 id_level3 = id_new; /* We punched a hole in the map at 1000. */
9400                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9401                         } else {
9402                                 id_level3 = id_new + 1000000; /* Rest is business as usual. */
9403                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9404                         }
9405                         if (!bret)
9406                                 die("failure: check ownership %s", file);
9407
9408                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9409                                 die("failure: check ownership %s", file);
9410
9411                         /* Revert ownership. */
9412                         if (fchownat(fd_open_tree_level2, file, id, id, AT_SYMLINK_NOFOLLOW))
9413                                 die("failure: fchownat %s", file);
9414                 }
9415
9416                 exit(EXIT_SUCCESS);
9417         }
9418         if (wait_for_pid(pid))
9419                 goto out;
9420
9421         /* Verify that chown works correctly for callers in the third userns. */
9422         pid = fork();
9423         if (pid < 0) {
9424                 log_stderr("failure: fork");
9425                 goto out;
9426         }
9427         if (pid == 0) {
9428                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9429                         die("failure: switch_userns");
9430
9431                 for (id = 0; id <= id_file_range; id++) {
9432                         unsigned int id_new;
9433                         char file[256];
9434
9435                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9436
9437                         id_new = id + 1;
9438                         if (id_new == 999 || id_new == 1000) {
9439                                 /*
9440                                  * We can't change ownership as we can't
9441                                  * chown from or to an unmapped id.
9442                                  */
9443                                 if (!fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9444                                         die("failure: fchownat %s", file);
9445                         } else {
9446                                 if (fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9447                                         die("failure: fchownat %s", file);
9448                         }
9449
9450                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9451                                 die("failure: check ownership %s", file);
9452
9453                         /* There's no id 1000 anymore as we changed ownership for id 1000 to 1001 above. */
9454                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9455                                 die("failure: check ownership %s", file);
9456
9457                         if (id_new == 999) {
9458                                 /*
9459                                  * We did not change ownership as we can't
9460                                  * chown to an unmapped id.
9461                                  */
9462                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id, id))
9463                                         die("failure: check ownership %s", file);
9464                         } else if (id_new == 1000) {
9465                                 /*
9466                                  * We did not change ownership as we can't
9467                                  * chown from an unmapped id.
9468                                  */
9469                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9470                                         die("failure: check ownership %s", file);
9471                         } else {
9472                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id_new, id_new))
9473                                         die("failure: check ownership %s", file);
9474                         }
9475
9476                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9477                                 die("failure: check ownership %s", file);
9478
9479                         /* Revert ownership. */
9480                         if (id_new != 999 && id_new != 1000) {
9481                                 if (fchownat(fd_open_tree_level3, file, id, id, AT_SYMLINK_NOFOLLOW))
9482                                         die("failure: fchownat %s", file);
9483                         }
9484                 }
9485
9486                 exit(EXIT_SUCCESS);
9487         }
9488         if (wait_for_pid(pid))
9489                 goto out;
9490
9491         /* Verify that chown works correctly for callers in the fourth userns. */
9492         pid = fork();
9493         if (pid < 0) {
9494                 log_stderr("failure: fork");
9495                 goto out;
9496         }
9497         if (pid == 0) {
9498                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9499                         die("failure: switch_userns");
9500
9501                 for (id = 0; id <= id_file_range; id++) {
9502                         char file[256];
9503                         unsigned long id_new;
9504
9505                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9506
9507                         id_new = id + 1;
9508                         if (!fchownat(fd_open_tree_level4, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9509                                 die("failure: fchownat %s", file);
9510
9511                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9512                                 die("failure: check ownership %s", file);
9513
9514                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9515                                 die("failure: check ownership %s", file);
9516
9517                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9518                                 die("failure: check ownership %s", file);
9519
9520                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9521                                 die("failure: check ownership %s", file);
9522
9523                 }
9524
9525                 exit(EXIT_SUCCESS);
9526         }
9527         if (wait_for_pid(pid))
9528                 goto out;
9529
9530         fret = 0;
9531         log_debug("Ran test");
9532
9533 out:
9534         list_for_each_safe(it, &hierarchy[0].id_map, next) {
9535                 list_del(it);
9536                 free(it->elem);
9537                 free(it);
9538         }
9539
9540         list_for_each_safe(it, &hierarchy[1].id_map, next) {
9541                 list_del(it);
9542                 free(it->elem);
9543                 free(it);
9544         }
9545
9546         list_for_each_safe(it, &hierarchy[2].id_map, next) {
9547                 list_del(it);
9548                 free(it->elem);
9549                 free(it);
9550         }
9551
9552         safe_close(hierarchy[0].fd_userns);
9553         safe_close(hierarchy[1].fd_userns);
9554         safe_close(hierarchy[2].fd_userns);
9555         safe_close(fd_dir1);
9556         safe_close(fd_open_tree_level1);
9557         safe_close(fd_open_tree_level2);
9558         safe_close(fd_open_tree_level3);
9559         safe_close(fd_open_tree_level4);
9560         return fret;
9561 }
9562
9563 #ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS
9564
9565 #ifndef BTRFS_PATH_NAME_MAX
9566 #define BTRFS_PATH_NAME_MAX 4087
9567 #endif
9568
9569 struct btrfs_ioctl_vol_args {
9570         __s64 fd;
9571         char name[BTRFS_PATH_NAME_MAX + 1];
9572 };
9573 #endif
9574
9575 #ifndef HAVE_STRUCT_BTRFS_QGROUP_LIMIT
9576 struct btrfs_qgroup_limit {
9577         __u64 flags;
9578         __u64 max_rfer;
9579         __u64 max_excl;
9580         __u64 rsv_rfer;
9581         __u64 rsv_excl;
9582 };
9583 #endif
9584
9585 #ifndef HAVE_STRUCT_BTRFS_QGROUP_INHERIT
9586 struct btrfs_qgroup_inherit {
9587         __u64 flags;
9588         __u64 num_qgroups;
9589         __u64 num_ref_copies;
9590         __u64 num_excl_copies;
9591         struct btrfs_qgroup_limit lim;
9592         __u64 qgroups[0];
9593 };
9594 #endif
9595
9596 #if !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2) || !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID)
9597
9598 #ifndef BTRFS_SUBVOL_NAME_MAX
9599 #define BTRFS_SUBVOL_NAME_MAX 4039
9600 #endif
9601
9602 struct btrfs_ioctl_vol_args_v2 {
9603         __s64 fd;
9604         __u64 transid;
9605         __u64 flags;
9606         union {
9607                 struct {
9608                         __u64 size;
9609                         struct btrfs_qgroup_inherit *qgroup_inherit;
9610                 };
9611                 __u64 unused[4];
9612         };
9613         union {
9614                 char name[BTRFS_SUBVOL_NAME_MAX + 1];
9615                 __u64 devid;
9616                 __u64 subvolid;
9617         };
9618 };
9619 #endif
9620
9621 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_ARGS
9622
9623 #ifndef BTRFS_INO_LOOKUP_PATH_MAX
9624 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
9625 #endif
9626 struct btrfs_ioctl_ino_lookup_args {
9627         __u64 treeid;
9628         __u64 objectid;
9629         char name[BTRFS_INO_LOOKUP_PATH_MAX];
9630 };
9631 #endif
9632
9633 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_USER_ARGS
9634
9635 #ifndef BTRFS_VOL_NAME_MAX
9636 #define BTRFS_VOL_NAME_MAX 255
9637 #endif
9638
9639 #ifndef BTRFS_INO_LOOKUP_USER_PATH_MAX
9640 #define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
9641 #endif
9642
9643 struct btrfs_ioctl_ino_lookup_user_args {
9644         __u64 dirid;
9645         __u64 treeid;
9646         char name[BTRFS_VOL_NAME_MAX + 1];
9647         char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
9648 };
9649 #endif
9650
9651 #ifndef HAVE_STRUCT_BTRFS_IOCTL_GET_SUBVOL_ROOTREF_ARGS
9652
9653 #ifndef BTRFS_MAX_ROOTREF_BUFFER_NUM
9654 #define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
9655 #endif
9656
9657 struct btrfs_ioctl_get_subvol_rootref_args {
9658         __u64 min_treeid;
9659         struct {
9660                 __u64 treeid;
9661                 __u64 dirid;
9662         } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
9663         __u8 num_items;
9664         __u8 align[7];
9665 };
9666 #endif
9667
9668 #ifndef BTRFS_IOCTL_MAGIC
9669 #define BTRFS_IOCTL_MAGIC 0x94
9670 #endif
9671
9672 #ifndef BTRFS_IOC_SNAP_DESTROY
9673 #define BTRFS_IOC_SNAP_DESTROY \
9674         _IOW(BTRFS_IOCTL_MAGIC, 15, struct btrfs_ioctl_vol_args)
9675 #endif
9676
9677 #ifndef BTRFS_IOC_SNAP_DESTROY_V2
9678 #define BTRFS_IOC_SNAP_DESTROY_V2 \
9679         _IOW(BTRFS_IOCTL_MAGIC, 63, struct btrfs_ioctl_vol_args_v2)
9680 #endif
9681
9682 #ifndef BTRFS_IOC_SNAP_CREATE_V2
9683 #define BTRFS_IOC_SNAP_CREATE_V2 \
9684         _IOW(BTRFS_IOCTL_MAGIC, 23, struct btrfs_ioctl_vol_args_v2)
9685 #endif
9686
9687 #ifndef BTRFS_IOC_SUBVOL_CREATE_V2
9688 #define BTRFS_IOC_SUBVOL_CREATE_V2 \
9689         _IOW(BTRFS_IOCTL_MAGIC, 24, struct btrfs_ioctl_vol_args_v2)
9690 #endif
9691
9692 #ifndef BTRFS_IOC_SUBVOL_GETFLAGS
9693 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
9694 #endif
9695
9696 #ifndef BTRFS_IOC_SUBVOL_SETFLAGS
9697 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
9698 #endif
9699
9700 #ifndef BTRFS_IOC_INO_LOOKUP
9701 #define BTRFS_IOC_INO_LOOKUP \
9702         _IOWR(BTRFS_IOCTL_MAGIC, 18, struct btrfs_ioctl_ino_lookup_args)
9703 #endif
9704
9705 #ifndef BTRFS_IOC_INO_LOOKUP_USER
9706 #define BTRFS_IOC_INO_LOOKUP_USER \
9707         _IOWR(BTRFS_IOCTL_MAGIC, 62, struct btrfs_ioctl_ino_lookup_user_args)
9708 #endif
9709
9710 #ifndef BTRFS_IOC_GET_SUBVOL_ROOTREF
9711 #define BTRFS_IOC_GET_SUBVOL_ROOTREF \
9712         _IOWR(BTRFS_IOCTL_MAGIC, 61, struct btrfs_ioctl_get_subvol_rootref_args)
9713 #endif
9714
9715 #ifndef BTRFS_SUBVOL_RDONLY
9716 #define BTRFS_SUBVOL_RDONLY (1ULL << 1)
9717 #endif
9718
9719 #ifndef BTRFS_SUBVOL_SPEC_BY_ID
9720 #define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
9721 #endif
9722
9723 #ifndef BTRFS_FIRST_FREE_OBJECTID
9724 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
9725 #endif
9726
9727 static int btrfs_delete_subvolume(int parent_fd, const char *name)
9728 {
9729         struct btrfs_ioctl_vol_args args = {};
9730         size_t len;
9731         int ret;
9732
9733         len = strlen(name);
9734         if (len >= sizeof(args.name))
9735                 return -ENAMETOOLONG;
9736
9737         memcpy(args.name, name, len);
9738         args.name[len] = '\0';
9739
9740         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY, &args);
9741         if (ret < 0)
9742                 return -1;
9743
9744         return 0;
9745 }
9746
9747 static int btrfs_delete_subvolume_id(int parent_fd, uint64_t subvolid)
9748 {
9749         struct btrfs_ioctl_vol_args_v2 args = {};
9750         int ret;
9751
9752         args.flags = BTRFS_SUBVOL_SPEC_BY_ID;
9753         args.subvolid = subvolid;
9754
9755         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY_V2, &args);
9756         if (ret < 0)
9757                 return -1;
9758
9759         return 0;
9760 }
9761
9762 static int btrfs_create_subvolume(int parent_fd, const char *name)
9763 {
9764         struct btrfs_ioctl_vol_args_v2 args = {};
9765         size_t len;
9766         int ret;
9767
9768         len = strlen(name);
9769         if (len >= sizeof(args.name))
9770                 return -ENAMETOOLONG;
9771
9772         memcpy(args.name, name, len);
9773         args.name[len] = '\0';
9774
9775         ret = ioctl(parent_fd, BTRFS_IOC_SUBVOL_CREATE_V2, &args);
9776         if (ret < 0)
9777                 return -1;
9778
9779         return 0;
9780 }
9781
9782 static int btrfs_create_snapshot(int fd, int parent_fd, const char *name,
9783                                  int flags)
9784 {
9785         struct btrfs_ioctl_vol_args_v2 args = {
9786                 .fd = fd,
9787         };
9788         size_t len;
9789         int ret;
9790
9791         if (flags & ~BTRFS_SUBVOL_RDONLY)
9792                 return -EINVAL;
9793
9794         len = strlen(name);
9795         if (len >= sizeof(args.name))
9796                 return -ENAMETOOLONG;
9797         memcpy(args.name, name, len);
9798         args.name[len] = '\0';
9799
9800         if (flags & BTRFS_SUBVOL_RDONLY)
9801                 args.flags |= BTRFS_SUBVOL_RDONLY;
9802         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_CREATE_V2, &args);
9803         if (ret < 0)
9804                 return -1;
9805
9806         return 0;
9807 }
9808
9809 static int btrfs_get_subvolume_ro(int fd, bool *read_only_ret)
9810 {
9811         uint64_t flags;
9812         int ret;
9813
9814         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9815         if (ret < 0)
9816                 return -1;
9817
9818         *read_only_ret = flags & BTRFS_SUBVOL_RDONLY;
9819         return 0;
9820 }
9821
9822 static int btrfs_set_subvolume_ro(int fd, bool read_only)
9823 {
9824         uint64_t flags;
9825         int ret;
9826
9827         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9828         if (ret < 0)
9829                 return -1;
9830
9831         if (read_only)
9832                 flags |= BTRFS_SUBVOL_RDONLY;
9833         else
9834                 flags &= ~BTRFS_SUBVOL_RDONLY;
9835
9836         ret = ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
9837         if (ret < 0)
9838                 return -1;
9839
9840         return 0;
9841 }
9842
9843 static int btrfs_get_subvolume_id(int fd, uint64_t *id_ret)
9844 {
9845         struct btrfs_ioctl_ino_lookup_args args = {
9846             .treeid = 0,
9847             .objectid = BTRFS_FIRST_FREE_OBJECTID,
9848         };
9849         int ret;
9850
9851         ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
9852         if (ret < 0)
9853                 return -1;
9854
9855         *id_ret = args.treeid;
9856
9857         return 0;
9858 }
9859
9860 /*
9861  * The following helpers are adapted from the btrfsutils library. We can't use
9862  * the library directly since we need full control over how the subvolume
9863  * iteration happens. We need to be able to check whether unprivileged
9864  * subvolume iteration is possible, i.e. whether BTRFS_IOC_INO_LOOKUP_USER is
9865  * available and also ensure that it is actually used when looking up paths.
9866  */
9867 struct btrfs_stack {
9868         uint64_t tree_id;
9869         struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
9870         size_t items_pos;
9871         size_t path_len;
9872 };
9873
9874 struct btrfs_iter {
9875         int fd;
9876         int cur_fd;
9877
9878         struct btrfs_stack *search_stack;
9879         size_t stack_len;
9880         size_t stack_capacity;
9881
9882         char *cur_path;
9883         size_t cur_path_capacity;
9884 };
9885
9886 static struct btrfs_stack *top_stack_entry(struct btrfs_iter *iter)
9887 {
9888         return &iter->search_stack[iter->stack_len - 1];
9889 }
9890
9891 static int pop_stack(struct btrfs_iter *iter)
9892 {
9893         struct btrfs_stack *top, *parent;
9894         int fd, parent_fd;
9895         size_t i;
9896
9897         if (iter->stack_len == 1) {
9898                 iter->stack_len--;
9899                 return 0;
9900         }
9901
9902         top = top_stack_entry(iter);
9903         iter->stack_len--;
9904         parent = top_stack_entry(iter);
9905
9906         fd = iter->cur_fd;
9907         for (i = parent->path_len; i < top->path_len; i++) {
9908                 if (i == 0 || iter->cur_path[i] == '/') {
9909                         parent_fd = openat(fd, "..", O_RDONLY);
9910                         if (fd != iter->cur_fd)
9911                                 close(fd);
9912                         if (parent_fd == -1)
9913                                 return -1;
9914                         fd = parent_fd;
9915                 }
9916         }
9917         if (iter->cur_fd != iter->fd)
9918                 close(iter->cur_fd);
9919         iter->cur_fd = fd;
9920
9921         return 0;
9922 }
9923
9924 static int append_stack(struct btrfs_iter *iter, uint64_t tree_id, size_t path_len)
9925 {
9926         struct btrfs_stack *entry;
9927
9928         if (iter->stack_len >= iter->stack_capacity) {
9929                 size_t new_capacity = iter->stack_capacity * 2;
9930                 struct btrfs_stack *new_search_stack;
9931 #ifdef HAVE_REALLOCARRAY
9932                 new_search_stack = reallocarray(iter->search_stack, new_capacity,
9933                                                 sizeof(*iter->search_stack));
9934 #else
9935                 new_search_stack = realloc(iter->search_stack, new_capacity * sizeof(*iter->search_stack));
9936 #endif
9937                 if (!new_search_stack)
9938                         return -ENOMEM;
9939
9940                 iter->stack_capacity = new_capacity;
9941                 iter->search_stack = new_search_stack;
9942         }
9943
9944         entry = &iter->search_stack[iter->stack_len];
9945
9946         memset(entry, 0, sizeof(*entry));
9947         entry->path_len = path_len;
9948         entry->tree_id = tree_id;
9949
9950         if (iter->stack_len) {
9951                 struct btrfs_stack *top;
9952                 char *path;
9953                 int fd;
9954
9955                 top = top_stack_entry(iter);
9956                 path = &iter->cur_path[top->path_len];
9957                 if (*path == '/')
9958                         path++;
9959                 fd = openat(iter->cur_fd, path, O_RDONLY);
9960                 if (fd == -1)
9961                         return -errno;
9962
9963                 close(iter->cur_fd);
9964                 iter->cur_fd = fd;
9965         }
9966
9967         iter->stack_len++;
9968
9969         return 0;
9970 }
9971
9972 static int btrfs_iterator_start(int fd, uint64_t top, struct btrfs_iter **ret)
9973 {
9974         struct btrfs_iter *iter;
9975         int err;
9976
9977         iter = malloc(sizeof(*iter));
9978         if (!iter)
9979                 return -ENOMEM;
9980
9981         iter->fd = fd;
9982         iter->cur_fd = fd;
9983
9984         iter->stack_len = 0;
9985         iter->stack_capacity = 4;
9986         iter->search_stack = malloc(sizeof(*iter->search_stack) *
9987                                     iter->stack_capacity);
9988         if (!iter->search_stack) {
9989                 err = -ENOMEM;
9990                 goto out_iter;
9991         }
9992
9993         iter->cur_path_capacity = 256;
9994         iter->cur_path = malloc(iter->cur_path_capacity);
9995         if (!iter->cur_path) {
9996                 err = -ENOMEM;
9997                 goto out_search_stack;
9998         }
9999
10000         err = append_stack(iter, top, 0);
10001         if (err)
10002                 goto out_cur_path;
10003
10004         *ret = iter;
10005
10006         return 0;
10007
10008 out_cur_path:
10009         free(iter->cur_path);
10010 out_search_stack:
10011         free(iter->search_stack);
10012 out_iter:
10013         free(iter);
10014         return err;
10015 }
10016
10017 static void btrfs_iterator_end(struct btrfs_iter *iter)
10018 {
10019         if (iter) {
10020                 free(iter->cur_path);
10021                 free(iter->search_stack);
10022                 if (iter->cur_fd != iter->fd)
10023                         close(iter->cur_fd);
10024                 close(iter->fd);
10025                 free(iter);
10026         }
10027 }
10028
10029 static int __append_path(struct btrfs_iter *iter, const char *name,
10030                          size_t name_len, const char *dir, size_t dir_len,
10031                          size_t *path_len_ret)
10032 {
10033         struct btrfs_stack *top = top_stack_entry(iter);
10034         size_t path_len;
10035         char *p;
10036
10037         path_len = top->path_len;
10038         /*
10039          * We need a joining slash if we have a current path and a subdirectory.
10040          */
10041         if (top->path_len && dir_len)
10042                 path_len++;
10043         path_len += dir_len;
10044         /*
10045          * We need another joining slash if we have a current path and a name,
10046          * but not if we have a subdirectory, because the lookup ioctl includes
10047          * a trailing slash.
10048          */
10049         if (top->path_len && !dir_len && name_len)
10050                 path_len++;
10051         path_len += name_len;
10052
10053         /* We need one extra character for the NUL terminator. */
10054         if (path_len + 1 > iter->cur_path_capacity) {
10055                 char *tmp = realloc(iter->cur_path, path_len + 1);
10056
10057                 if (!tmp)
10058                         return -ENOMEM;
10059                 iter->cur_path = tmp;
10060                 iter->cur_path_capacity = path_len + 1;
10061         }
10062
10063         p = iter->cur_path + top->path_len;
10064         if (top->path_len && dir_len)
10065                 *p++ = '/';
10066         memcpy(p, dir, dir_len);
10067         p += dir_len;
10068         if (top->path_len && !dir_len && name_len)
10069                 *p++ = '/';
10070         memcpy(p, name, name_len);
10071         p += name_len;
10072         *p = '\0';
10073
10074         *path_len_ret = path_len;
10075
10076         return 0;
10077 }
10078
10079 static int get_subvolume_path(struct btrfs_iter *iter, uint64_t treeid,
10080                               uint64_t dirid, size_t *path_len_ret)
10081 {
10082         struct btrfs_ioctl_ino_lookup_user_args args = {
10083                 .treeid = treeid,
10084                 .dirid = dirid,
10085         };
10086         int ret;
10087
10088         ret = ioctl(iter->cur_fd, BTRFS_IOC_INO_LOOKUP_USER, &args);
10089         if (ret == -1)
10090                 return -1;
10091
10092         return __append_path(iter, args.name, strlen(args.name), args.path,
10093                              strlen(args.path), path_len_ret);
10094 }
10095
10096 static int btrfs_iterator_next(struct btrfs_iter *iter, char **path_ret,
10097                                uint64_t *id_ret)
10098 {
10099         struct btrfs_stack *top;
10100         uint64_t treeid, dirid;
10101         size_t path_len;
10102         int ret, err;
10103
10104         for (;;) {
10105                 for (;;) {
10106                         if (iter->stack_len == 0)
10107                                 return 1;
10108
10109                         top = top_stack_entry(iter);
10110                         if (top->items_pos < top->rootref_args.num_items) {
10111                                 break;
10112                         } else {
10113                                 ret = ioctl(iter->cur_fd,
10114                                             BTRFS_IOC_GET_SUBVOL_ROOTREF,
10115                                             &top->rootref_args);
10116                                 if (ret == -1 && errno != EOVERFLOW)
10117                                         return -1;
10118                                 top->items_pos = 0;
10119
10120                                 if (top->rootref_args.num_items == 0) {
10121                                         err = pop_stack(iter);
10122                                         if (err)
10123                                                 return err;
10124                                 }
10125                         }
10126                 }
10127
10128                 treeid = top->rootref_args.rootref[top->items_pos].treeid;
10129                 dirid = top->rootref_args.rootref[top->items_pos].dirid;
10130                 top->items_pos++;
10131                 err = get_subvolume_path(iter, treeid, dirid, &path_len);
10132                 if (err) {
10133                         /* Skip the subvolume if we can't access it. */
10134                         if (errno == EACCES)
10135                                 continue;
10136                         return err;
10137                 }
10138
10139                 err = append_stack(iter, treeid, path_len);
10140                 if (err) {
10141                         /*
10142                          * Skip the subvolume if it does not exist (which can
10143                          * happen if there is another filesystem mounted over a
10144                          * parent directory) or we don't have permission to
10145                          * access it.
10146                          */
10147                         if (errno == ENOENT || errno == EACCES)
10148                                 continue;
10149                         return err;
10150                 }
10151
10152                 top = top_stack_entry(iter);
10153                 goto out;
10154         }
10155
10156 out:
10157         if (path_ret) {
10158                 *path_ret = malloc(top->path_len + 1);
10159                 if (!*path_ret)
10160                         return -ENOMEM;
10161                 memcpy(*path_ret, iter->cur_path, top->path_len);
10162                 (*path_ret)[top->path_len] = '\0';
10163         }
10164         if (id_ret)
10165                 *id_ret = top->tree_id;
10166         return 0;
10167 }
10168
10169 #define BTRFS_SUBVOLUME1 "subvol1"
10170 #define BTRFS_SUBVOLUME1_SNAPSHOT1 "subvol1_snapshot1"
10171 #define BTRFS_SUBVOLUME1_SNAPSHOT1_RO "subvol1_snapshot1_ro"
10172 #define BTRFS_SUBVOLUME1_RENAME "subvol1_rename"
10173 #define BTRFS_SUBVOLUME2 "subvol2"
10174
10175 static int btrfs_subvolumes_fsids_mapped(void)
10176 {
10177         int fret = -1;
10178         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10179         struct mount_attr attr = {
10180                 .attr_set = MOUNT_ATTR_IDMAP,
10181         };
10182         pid_t pid;
10183
10184         if (!caps_supported())
10185                 return 0;
10186
10187         /* Changing mount properties on a detached mount. */
10188         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10189         if (attr.userns_fd < 0) {
10190                 log_stderr("failure: get_userns_fd");
10191                 goto out;
10192         }
10193
10194         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10195                                      AT_EMPTY_PATH |
10196                                      AT_NO_AUTOMOUNT |
10197                                      AT_SYMLINK_NOFOLLOW |
10198                                      OPEN_TREE_CLOEXEC |
10199                                      OPEN_TREE_CLONE);
10200         if (open_tree_fd < 0) {
10201                 log_stderr("failure: sys_open_tree");
10202                 goto out;
10203         }
10204
10205         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10206                 log_stderr("failure: sys_mount_setattr");
10207                 goto out;
10208         }
10209
10210         /*
10211          * The open_tree() syscall returns an O_PATH file descriptor which we
10212          * can't use with ioctl(). So let's reopen it as a proper file
10213          * descriptor.
10214          */
10215         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10216         if (tree_fd < 0) {
10217                 log_stderr("failure: openat");
10218                 goto out;
10219         }
10220
10221         pid = fork();
10222         if (pid < 0) {
10223                 log_stderr("failure: fork");
10224                 goto out;
10225         }
10226         if (pid == 0) {
10227                 if (!switch_fsids(10000, 10000))
10228                         die("failure: switch fsids");
10229
10230                 if (!caps_up())
10231                         die("failure: raise caps");
10232
10233                 /*
10234                  * The caller's fsids now have mappings in the idmapped mount so
10235                  * any file creation must succeed.
10236                  */
10237
10238                 /* create subvolume */
10239                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10240                         die("failure: btrfs_create_subvolume");
10241
10242                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10243                         die("failure: check ownership");
10244
10245                 /* remove subvolume */
10246                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10247                         die("failure: btrfs_delete_subvolume");
10248
10249                 /* create subvolume */
10250                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10251                         die("failure: btrfs_create_subvolume");
10252
10253                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10254                         die("failure: check ownership");
10255
10256                 if (!caps_down())
10257                         die("failure: lower caps");
10258
10259                 /*
10260                  * The filesystem is not mounted with user_subvol_rm_allowed so
10261                  * subvolume deletion must fail.
10262                  */
10263                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10264                         die("failure: btrfs_delete_subvolume");
10265                 if (errno != EPERM)
10266                         die("failure: errno");
10267
10268                 exit(EXIT_SUCCESS);
10269         }
10270         if (wait_for_pid(pid))
10271                 goto out;
10272
10273         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10274                 die("failure: check ownership");
10275
10276         /* remove subvolume */
10277         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10278                 log_stderr("failure: btrfs_delete_subvolume");
10279                 goto out;
10280         }
10281
10282         fret = 0;
10283         log_debug("Ran test");
10284 out:
10285         safe_close(attr.userns_fd);
10286         safe_close(open_tree_fd);
10287         safe_close(tree_fd);
10288
10289         return fret;
10290 }
10291
10292 static int btrfs_subvolumes_fsids_mapped_userns(void)
10293 {
10294         int fret = -1;
10295         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10296         struct mount_attr attr = {
10297                 .attr_set = MOUNT_ATTR_IDMAP,
10298         };
10299         pid_t pid;
10300
10301         if (!caps_supported())
10302                 return 0;
10303
10304         /* Changing mount properties on a detached mount. */
10305         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10306         if (attr.userns_fd < 0) {
10307                 log_stderr("failure: get_userns_fd");
10308                 goto out;
10309         }
10310
10311         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10312                                      AT_EMPTY_PATH |
10313                                      AT_NO_AUTOMOUNT |
10314                                      AT_SYMLINK_NOFOLLOW |
10315                                      OPEN_TREE_CLOEXEC |
10316                                      OPEN_TREE_CLONE);
10317         if (open_tree_fd < 0) {
10318                 log_stderr("failure: sys_open_tree");
10319                 goto out;
10320         }
10321
10322         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10323                 log_stderr("failure: sys_mount_setattr");
10324                 goto out;
10325         }
10326
10327         /*
10328          * The open_tree() syscall returns an O_PATH file descriptor which we
10329          * can't use with ioctl(). So let's reopen it as a proper file
10330          * descriptor.
10331          */
10332         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10333         if (tree_fd < 0) {
10334                 log_stderr("failure: openat");
10335                 goto out;
10336         }
10337
10338         pid = fork();
10339         if (pid < 0) {
10340                 log_stderr("failure: fork");
10341                 goto out;
10342         }
10343         if (pid == 0) {
10344                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10345                         die("failure: switch_userns");
10346
10347                 /* The caller's fsids now have mappings in the idmapped mount so
10348                  * any file creation must fail.
10349                  */
10350
10351                 /* create subvolume */
10352                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10353                         die("failure: btrfs_create_subvolume");
10354
10355                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10356                         die("failure: check ownership");
10357
10358                 /* remove subvolume */
10359                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10360                         die("failure: btrfs_delete_subvolume");
10361
10362                 exit(EXIT_SUCCESS);
10363         }
10364         if (wait_for_pid(pid))
10365                 goto out;
10366
10367         /* remove subvolume */
10368         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10369                 log_stderr("failure: btrfs_delete_subvolume");
10370                 goto out;
10371         }
10372
10373         fret = 0;
10374         log_debug("Ran test");
10375 out:
10376         safe_close(attr.userns_fd);
10377         safe_close(open_tree_fd);
10378         safe_close(tree_fd);
10379
10380         return fret;
10381 }
10382
10383 static int btrfs_subvolumes_fsids_unmapped(void)
10384 {
10385         int fret = -1;
10386         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10387         struct mount_attr attr = {
10388                 .attr_set = MOUNT_ATTR_IDMAP,
10389         };
10390
10391         /* create directory for rename test */
10392         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10393                 log_stderr("failure: btrfs_create_subvolume");
10394                 goto out;
10395         }
10396
10397         /* change ownership of all files to uid 0 */
10398         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10399                 log_stderr("failure: fchownat");
10400                 goto out;
10401         }
10402
10403         /* Changing mount properties on a detached mount. */
10404         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10405         if (attr.userns_fd < 0) {
10406                 log_stderr("failure: get_userns_fd");
10407                 goto out;
10408         }
10409
10410         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10411                                      AT_EMPTY_PATH |
10412                                      AT_NO_AUTOMOUNT |
10413                                      AT_SYMLINK_NOFOLLOW |
10414                                      OPEN_TREE_CLOEXEC |
10415                                      OPEN_TREE_CLONE);
10416         if (open_tree_fd < 0) {
10417                 log_stderr("failure: sys_open_tree");
10418                 goto out;
10419         }
10420
10421         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10422                 log_stderr("failure: sys_mount_setattr");
10423                 goto out;
10424         }
10425
10426         if (!switch_fsids(0, 0)) {
10427                 log_stderr("failure: switch_fsids");
10428                 goto out;
10429         }
10430
10431         /*
10432          * The caller's fsids don't have a mappings in the idmapped mount so
10433          * any file creation must fail.
10434          */
10435
10436         /*
10437          * The open_tree() syscall returns an O_PATH file descriptor which we
10438          * can't use with ioctl(). So let's reopen it as a proper file
10439          * descriptor.
10440          */
10441         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10442         if (tree_fd < 0) {
10443                 log_stderr("failure: openat");
10444                 goto out;
10445         }
10446
10447         /* create subvolume */
10448         if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2)) {
10449                 log_stderr("failure: btrfs_create_subvolume");
10450                 goto out;
10451         }
10452         if (errno != EOVERFLOW) {
10453                 log_stderr("failure: errno");
10454                 goto out;
10455         }
10456
10457         /* try to rename a subvolume */
10458         if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10459                        BTRFS_SUBVOLUME1_RENAME)) {
10460                 log_stderr("failure: renameat");
10461                 goto out;
10462         }
10463         if (errno != EOVERFLOW) {
10464                 log_stderr("failure: errno");
10465                 goto out;
10466         }
10467
10468         /* The caller is privileged over the inode so file deletion must work. */
10469
10470         /* remove subvolume */
10471         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10472                 log_stderr("failure: btrfs_delete_subvolume");
10473                 goto out;
10474         }
10475
10476         fret = 0;
10477         log_debug("Ran test");
10478 out:
10479         safe_close(attr.userns_fd);
10480         safe_close(open_tree_fd);
10481         safe_close(tree_fd);
10482
10483         return fret;
10484 }
10485
10486 static int btrfs_subvolumes_fsids_unmapped_userns(void)
10487 {
10488         int fret = -1;
10489         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
10490         struct mount_attr attr = {
10491                 .attr_set = MOUNT_ATTR_IDMAP,
10492         };
10493         pid_t pid;
10494
10495         /* create directory for rename test */
10496         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10497                 log_stderr("failure: btrfs_create_subvolume");
10498                 goto out;
10499         }
10500
10501         /* change ownership of all files to uid 0 */
10502         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10503                 log_stderr("failure: fchownat");
10504                 goto out;
10505         }
10506
10507         /* Changing mount properties on a detached mount. */
10508         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10509         if (attr.userns_fd < 0) {
10510                 log_stderr("failure: get_userns_fd");
10511                 goto out;
10512         }
10513
10514         /* Changing mount properties on a detached mount. */
10515         userns_fd = get_userns_fd(0, 30000, 10000);
10516         if (userns_fd < 0) {
10517                 log_stderr("failure: get_userns_fd");
10518                 goto out;
10519         }
10520
10521         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10522                                      AT_EMPTY_PATH |
10523                                      AT_NO_AUTOMOUNT |
10524                                      AT_SYMLINK_NOFOLLOW |
10525                                      OPEN_TREE_CLOEXEC |
10526                                      OPEN_TREE_CLONE);
10527         if (open_tree_fd < 0) {
10528                 log_stderr("failure: sys_open_tree");
10529                 goto out;
10530         }
10531
10532         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10533                 log_stderr("failure: sys_mount_setattr");
10534                 goto out;
10535         }
10536
10537         /*
10538          * The open_tree() syscall returns an O_PATH file descriptor which we
10539          * can't use with ioctl(). So let's reopen it as a proper file
10540          * descriptor.
10541          */
10542         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10543         if (tree_fd < 0) {
10544                 log_stderr("failure: openat");
10545                 goto out;
10546         }
10547
10548         pid = fork();
10549         if (pid < 0) {
10550                 log_stderr("failure: fork");
10551                 goto out;
10552         }
10553         if (pid == 0) {
10554                 if (!switch_userns(userns_fd, 0, 0, false))
10555                         die("failure: switch_userns");
10556
10557                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
10558                                       t_overflowuid, t_overflowgid))
10559                         die("failure: expected_uid_gid");
10560
10561                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
10562                                       t_overflowuid, t_overflowgid))
10563                         die("failure: expected_uid_gid");
10564
10565                 /*
10566                  * The caller's fsids don't have a mappings in the idmapped mount so
10567                  * any file creation must fail.
10568                  */
10569
10570                 /* create subvolume */
10571                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
10572                         die("failure: btrfs_create_subvolume");
10573                 if (errno != EOVERFLOW)
10574                         die("failure: errno");
10575
10576                 /* try to rename a subvolume */
10577                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10578                                         BTRFS_SUBVOLUME1_RENAME))
10579                         die("failure: renameat");
10580                 if (errno != EOVERFLOW)
10581                         die("failure: errno");
10582
10583                 /*
10584                  * The caller is not privileged over the inode so subvolume
10585                  * deletion must fail.
10586                  */
10587
10588                 /* remove subvolume */
10589                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10590                         die("failure: btrfs_delete_subvolume");
10591
10592                 exit(EXIT_SUCCESS);
10593         }
10594         if (wait_for_pid(pid))
10595                 goto out;
10596
10597         /* remove subvolume */
10598         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10599                 log_stderr("failure: btrfs_delete_subvolume");
10600                 goto out;
10601         }
10602
10603         fret = 0;
10604         log_debug("Ran test");
10605 out:
10606         safe_close(attr.userns_fd);
10607         safe_close(open_tree_fd);
10608         safe_close(tree_fd);
10609         safe_close(userns_fd);
10610
10611         return fret;
10612 }
10613
10614 static int btrfs_snapshots_fsids_mapped(void)
10615 {
10616         int fret = -1;
10617         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10618         struct mount_attr attr = {
10619                 .attr_set = MOUNT_ATTR_IDMAP,
10620         };
10621         pid_t pid;
10622
10623         if (!caps_supported())
10624                 return 0;
10625
10626         /* Changing mount properties on a detached mount. */
10627         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10628         if (attr.userns_fd < 0) {
10629                 log_stderr("failure: get_userns_fd");
10630                 goto out;
10631         }
10632
10633         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10634                                      AT_EMPTY_PATH |
10635                                      AT_NO_AUTOMOUNT |
10636                                      AT_SYMLINK_NOFOLLOW |
10637                                      OPEN_TREE_CLOEXEC |
10638                                      OPEN_TREE_CLONE);
10639         if (open_tree_fd < 0) {
10640                 log_stderr("failure: sys_open_tree");
10641                 goto out;
10642         }
10643
10644         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10645                 log_stderr("failure: sys_mount_setattr");
10646                 goto out;
10647         }
10648
10649         /*
10650          * The open_tree() syscall returns an O_PATH file descriptor which we
10651          * can't use with ioctl(). So let's reopen it as a proper file
10652          * descriptor.
10653          */
10654         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10655         if (tree_fd < 0) {
10656                 log_stderr("failure: openat");
10657                 goto out;
10658         }
10659
10660         pid = fork();
10661         if (pid < 0) {
10662                 log_stderr("failure: fork");
10663                 goto out;
10664         }
10665         if (pid == 0) {
10666                 int subvolume_fd = -EBADF;
10667
10668                 if (!switch_fsids(10000, 10000))
10669                         die("failure: switch fsids");
10670
10671                 if (!caps_up())
10672                         die("failure: raise caps");
10673
10674                 /* The caller's fsids now have mappings in the idmapped mount so
10675                  * any file creation must fail.
10676                  */
10677
10678                 /* create subvolume */
10679                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10680                         die("failure: btrfs_create_subvolume");
10681
10682                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10683                         die("failure: expected_uid_gid");
10684
10685                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10686                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10687                 if (subvolume_fd < 0)
10688                         die("failure: openat");
10689
10690                 /* create read-write snapshot */
10691                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10692                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10693                         die("failure: btrfs_create_snapshot");
10694
10695                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10696                         die("failure: expected_uid_gid");
10697
10698                 /* create read-only snapshot */
10699                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10700                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10701                                           BTRFS_SUBVOL_RDONLY))
10702                         die("failure: btrfs_create_snapshot");
10703
10704                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10705                         die("failure: expected_uid_gid");
10706
10707                 safe_close(subvolume_fd);
10708
10709                 /* remove subvolume */
10710                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10711                         die("failure: btrfs_delete_subvolume");
10712
10713                 /* remove read-write snapshot */
10714                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
10715                         die("failure: btrfs_delete_subvolume");
10716
10717                 /* remove read-only snapshot */
10718                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
10719                         die("failure: btrfs_delete_subvolume");
10720
10721                 /* create directory */
10722                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10723                         die("failure: btrfs_create_subvolume");
10724
10725                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10726                         die("failure: expected_uid_gid");
10727
10728                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10729                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10730                 if (subvolume_fd < 0)
10731                         die("failure: openat");
10732
10733                 /* create read-write snapshot */
10734                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10735                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10736                         die("failure: btrfs_create_snapshot");
10737
10738                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10739                         die("failure: expected_uid_gid");
10740
10741                 /* create read-only snapshot */
10742                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10743                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10744                                           BTRFS_SUBVOL_RDONLY))
10745                         die("failure: btrfs_create_snapshot");
10746
10747                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10748                         die("failure: expected_uid_gid");
10749
10750                 safe_close(subvolume_fd);
10751
10752                 exit(EXIT_SUCCESS);
10753         }
10754         if (wait_for_pid(pid))
10755                 goto out;
10756
10757         /* remove directory */
10758         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10759                 log_stderr("failure: btrfs_delete_subvolume");
10760                 goto out;
10761         }
10762
10763         /* remove read-write snapshot */
10764         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10765                 log_stderr("failure: btrfs_delete_subvolume");
10766                 goto out;
10767         }
10768
10769         /* remove read-only snapshot */
10770         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10771                 log_stderr("failure: btrfs_delete_subvolume");
10772                 goto out;
10773         }
10774
10775         fret = 0;
10776         log_debug("Ran test");
10777 out:
10778         safe_close(attr.userns_fd);
10779         safe_close(open_tree_fd);
10780         safe_close(tree_fd);
10781
10782         return fret;
10783 }
10784
10785 static int btrfs_snapshots_fsids_mapped_userns(void)
10786 {
10787         int fret = -1;
10788         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10789         struct mount_attr attr = {
10790                 .attr_set = MOUNT_ATTR_IDMAP,
10791         };
10792         pid_t pid;
10793
10794         if (!caps_supported())
10795                 return 0;
10796
10797         /* Changing mount properties on a detached mount. */
10798         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10799         if (attr.userns_fd < 0) {
10800                 log_stderr("failure: get_userns_fd");
10801                 goto out;
10802         }
10803
10804         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10805                                      AT_EMPTY_PATH |
10806                                      AT_NO_AUTOMOUNT |
10807                                      AT_SYMLINK_NOFOLLOW |
10808                                      OPEN_TREE_CLOEXEC |
10809                                      OPEN_TREE_CLONE);
10810         if (open_tree_fd < 0) {
10811                 log_stderr("failure: sys_open_tree");
10812                 goto out;
10813         }
10814
10815         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10816                 log_stderr("failure: sys_mount_setattr");
10817                 goto out;
10818         }
10819
10820         /*
10821          * The open_tree() syscall returns an O_PATH file descriptor which we
10822          * can't use with ioctl(). So let's reopen it as a proper file
10823          * descriptor.
10824          */
10825         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10826         if (tree_fd < 0) {
10827                 log_stderr("failure: openat");
10828                 goto out;
10829         }
10830
10831         pid = fork();
10832         if (pid < 0) {
10833                 log_stderr("failure: fork");
10834                 goto out;
10835         }
10836         if (pid == 0) {
10837                 int subvolume_fd = -EBADF;
10838
10839                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10840                         die("failure: switch_userns");
10841
10842                 /* create subvolume */
10843                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10844                         die("failure: btrfs_create_subvolume");
10845
10846                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10847                         die("failure: expected_uid_gid");
10848
10849                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10850                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10851                 if (subvolume_fd < 0)
10852                         die("failure: openat");
10853
10854                 /* create read-write snapshot */
10855                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10856                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10857                         die("failure: btrfs_create_snapshot");
10858
10859                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
10860                         die("failure: expected_uid_gid");
10861
10862                 /* create read-only snapshot */
10863                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10864                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10865                                           BTRFS_SUBVOL_RDONLY))
10866                         die("failure: btrfs_create_snapshot");
10867
10868                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
10869                         die("failure: expected_uid_gid");
10870
10871                 safe_close(subvolume_fd);
10872
10873                 exit(EXIT_SUCCESS);
10874         }
10875         if (wait_for_pid(pid))
10876                 goto out;
10877
10878         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10879                 die("failure: expected_uid_gid");
10880
10881         /* remove directory */
10882         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10883                 log_stderr("failure: btrfs_delete_subvolume");
10884                 goto out;
10885         }
10886
10887         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10888                 die("failure: expected_uid_gid");
10889
10890         /* remove read-write snapshot */
10891         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10892                 log_stderr("failure: btrfs_delete_subvolume");
10893                 goto out;
10894         }
10895
10896         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10897                 die("failure: expected_uid_gid");
10898
10899         /* remove read-only snapshot */
10900         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10901                 log_stderr("failure: btrfs_delete_subvolume");
10902                 goto out;
10903         }
10904
10905         fret = 0;
10906         log_debug("Ran test");
10907 out:
10908         safe_close(attr.userns_fd);
10909         safe_close(open_tree_fd);
10910         safe_close(tree_fd);
10911
10912         return fret;
10913 }
10914
10915 static int btrfs_snapshots_fsids_unmapped(void)
10916 {
10917         int fret = -1;
10918         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10919         struct mount_attr attr = {
10920                 .attr_set = MOUNT_ATTR_IDMAP,
10921         };
10922         pid_t pid;
10923
10924         if (!caps_supported())
10925                 return 0;
10926
10927         /* create directory for rename test */
10928         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10929                 log_stderr("failure: btrfs_create_subvolume");
10930                 goto out;
10931         }
10932
10933         /* change ownership of all files to uid 0 */
10934         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10935                 log_stderr("failure: fchownat");
10936                 goto out;
10937         }
10938
10939         /* Changing mount properties on a detached mount. */
10940         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10941         if (attr.userns_fd < 0) {
10942                 log_stderr("failure: get_userns_fd");
10943                 goto out;
10944         }
10945
10946         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10947                                      AT_EMPTY_PATH |
10948                                      AT_NO_AUTOMOUNT |
10949                                      AT_SYMLINK_NOFOLLOW |
10950                                      OPEN_TREE_CLOEXEC |
10951                                      OPEN_TREE_CLONE);
10952         if (open_tree_fd < 0) {
10953                 log_stderr("failure: sys_open_tree");
10954                 goto out;
10955         }
10956
10957         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
10958                               sizeof(attr))) {
10959                 log_stderr("failure: sys_mount_setattr");
10960                 goto out;
10961         }
10962
10963         pid = fork();
10964         if (pid < 0) {
10965                 log_stderr("failure: fork");
10966                 goto out;
10967         }
10968         if (pid == 0) {
10969                 int subvolume_fd = -EBADF;
10970
10971                 if (!switch_fsids(0, 0)) {
10972                         log_stderr("failure: switch_fsids");
10973                         goto out;
10974                 }
10975
10976                 /*
10977                  * The caller's fsids don't have a mappings in the idmapped
10978                  * mount so any file creation must fail.
10979                  */
10980
10981                 /*
10982                  * The open_tree() syscall returns an O_PATH file descriptor
10983                  * which we can't use with ioctl(). So let's reopen it as a
10984                  * proper file descriptor.
10985                  */
10986                 tree_fd = openat(open_tree_fd, ".",
10987                                  O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10988                 if (tree_fd < 0)
10989                         die("failure: openat");
10990
10991                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10992                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10993                 if (subvolume_fd < 0)
10994                         die("failure: openat");
10995
10996                 /* create directory */
10997                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
10998                         die("failure: btrfs_create_subvolume");
10999                 if (errno != EOVERFLOW)
11000                         die("failure: errno");
11001
11002                 /* create read-write snapshot */
11003                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11004                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11005                         die("failure: btrfs_create_snapshot");
11006                 if (errno != EOVERFLOW)
11007                         die("failure: errno");
11008
11009                 /* create read-only snapshot */
11010                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11011                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11012                                            BTRFS_SUBVOL_RDONLY))
11013                         die("failure: btrfs_create_snapshot");
11014                 if (errno != EOVERFLOW)
11015                         die("failure: errno");
11016
11017                 /* try to rename a directory */
11018                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11019                                BTRFS_SUBVOLUME1_RENAME))
11020                         die("failure: renameat");
11021                 if (errno != EOVERFLOW)
11022                         die("failure: errno");
11023
11024                 if (!caps_down())
11025                         die("failure: caps_down");
11026
11027                 /* create read-write snapshot */
11028                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11029                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11030                         die("failure: btrfs_create_snapshot");
11031                 if (errno != EPERM)
11032                         die("failure: errno");
11033
11034                 /* create read-only snapshot */
11035                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11036                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11037                                            BTRFS_SUBVOL_RDONLY))
11038                         die("failure: btrfs_create_snapshot");
11039                 if (errno != EPERM)
11040                         die("failure: errno");
11041
11042                 /*
11043                  * The caller is not privileged over the inode so subvolume
11044                  * deletion must fail.
11045                  */
11046
11047                 /* remove directory */
11048                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11049                         die("failure: btrfs_delete_subvolume");
11050                 if (errno != EPERM)
11051                         die("failure: errno");
11052
11053                 if (!caps_up())
11054                         die("failure: caps_down");
11055
11056                 /*
11057                  * The caller is privileged over the inode so subvolume
11058                  * deletion must work.
11059                  */
11060
11061                 /* remove directory */
11062                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11063                         die("failure: btrfs_delete_subvolume");
11064
11065                 exit(EXIT_SUCCESS);
11066         }
11067         if (wait_for_pid(pid))
11068                 goto out;
11069
11070         fret = 0;
11071         log_debug("Ran test");
11072 out:
11073         safe_close(attr.userns_fd);
11074         safe_close(open_tree_fd);
11075         safe_close(tree_fd);
11076
11077         return fret;
11078 }
11079
11080 static int btrfs_snapshots_fsids_unmapped_userns(void)
11081 {
11082         int fret = -1;
11083         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
11084             userns_fd = -EBADF;
11085         struct mount_attr attr = {
11086                 .attr_set = MOUNT_ATTR_IDMAP,
11087         };
11088         pid_t pid;
11089
11090         if (!caps_supported())
11091                 return 0;
11092
11093         /* create directory for rename test */
11094         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
11095                 log_stderr("failure: btrfs_create_subvolume");
11096                 goto out;
11097         }
11098
11099         /* change ownership of all files to uid 0 */
11100         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
11101                 log_stderr("failure: fchownat");
11102                 goto out;
11103         }
11104
11105         /* Changing mount properties on a detached mount. */
11106         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11107         if (attr.userns_fd < 0) {
11108                 log_stderr("failure: get_userns_fd");
11109                 goto out;
11110         }
11111
11112         /* Changing mount properties on a detached mount. */
11113         userns_fd = get_userns_fd(0, 30000, 10000);
11114         if (userns_fd < 0) {
11115                 log_stderr("failure: get_userns_fd");
11116                 goto out;
11117         }
11118
11119         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11120                                      AT_EMPTY_PATH |
11121                                      AT_NO_AUTOMOUNT |
11122                                      AT_SYMLINK_NOFOLLOW |
11123                                      OPEN_TREE_CLOEXEC |
11124                                      OPEN_TREE_CLONE);
11125         if (open_tree_fd < 0) {
11126                 log_stderr("failure: sys_open_tree");
11127                 goto out;
11128         }
11129
11130         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
11131                               sizeof(attr))) {
11132                 log_stderr("failure: sys_mount_setattr");
11133                 goto out;
11134         }
11135
11136         /*
11137          * The open_tree() syscall returns an O_PATH file descriptor
11138          * which we can't use with ioctl(). So let's reopen it as a
11139          * proper file descriptor.
11140          */
11141         tree_fd = openat(open_tree_fd, ".",
11142                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11143         if (tree_fd < 0) {
11144                 log_stderr("failure: openat");
11145                 goto out;
11146         }
11147
11148         subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11149                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11150         if (subvolume_fd < 0) {
11151                 log_stderr("failure: openat");
11152                 goto out;
11153         }
11154
11155         pid = fork();
11156         if (pid < 0) {
11157                 log_stderr("failure: fork");
11158                 goto out;
11159         }
11160         if (pid == 0) {
11161                 if (!switch_userns(userns_fd, 0, 0, false))
11162                         die("failure: switch_userns");
11163
11164                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
11165                                       t_overflowuid, t_overflowgid))
11166                         die("failure: expected_uid_gid");
11167
11168                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
11169                                       t_overflowuid, t_overflowgid))
11170                         die("failure: expected_uid_gid");
11171
11172                 /*
11173                  * The caller's fsids don't have a mappings in the idmapped
11174                  * mount so any file creation must fail.
11175                  */
11176
11177                 /* create directory */
11178                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
11179                         die("failure: btrfs_create_subvolume");
11180                 if (errno != EOVERFLOW)
11181                         die("failure: errno");
11182
11183                 /* create read-write snapshot */
11184                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11185                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11186                         die("failure: btrfs_create_snapshot");
11187                 if (errno != EPERM)
11188                         die("failure: errno");
11189
11190                 /* create read-only snapshot */
11191                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11192                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11193                                            BTRFS_SUBVOL_RDONLY))
11194                         die("failure: btrfs_create_snapshot");
11195                 if (errno != EPERM)
11196                         die("failure: errno");
11197
11198                 /* try to rename a directory */
11199                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11200                                BTRFS_SUBVOLUME1_RENAME))
11201                         die("failure: renameat");
11202                 if (errno != EOVERFLOW)
11203                         die("failure: errno");
11204
11205                 /*
11206                  * The caller is not privileged over the inode so subvolume
11207                  * deletion must fail.
11208                  */
11209
11210                 /* remove directory */
11211                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11212                         die("failure: btrfs_delete_subvolume");
11213                 if (errno != EPERM)
11214                         die("failure: errno");
11215
11216                 exit(EXIT_SUCCESS);
11217         }
11218         if (wait_for_pid(pid))
11219                 goto out;
11220
11221         /* remove directory */
11222         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11223                 die("failure: btrfs_delete_subvolume");
11224
11225         fret = 0;
11226         log_debug("Ran test");
11227 out:
11228         safe_close(attr.userns_fd);
11229         safe_close(open_tree_fd);
11230         safe_close(subvolume_fd);
11231         safe_close(tree_fd);
11232
11233         return fret;
11234 }
11235
11236 static int btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed(void)
11237 {
11238         int fret = -1;
11239         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11240         struct mount_attr attr = {
11241                 .attr_set = MOUNT_ATTR_IDMAP,
11242         };
11243         pid_t pid;
11244
11245         if (!caps_supported())
11246                 return 0;
11247
11248         /* Changing mount properties on a detached mount. */
11249         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11250         if (attr.userns_fd < 0) {
11251                 log_stderr("failure: get_userns_fd");
11252                 goto out;
11253         }
11254
11255         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11256                                      AT_EMPTY_PATH |
11257                                      AT_NO_AUTOMOUNT |
11258                                      AT_SYMLINK_NOFOLLOW |
11259                                      OPEN_TREE_CLOEXEC |
11260                                      OPEN_TREE_CLONE);
11261         if (open_tree_fd < 0) {
11262                 log_stderr("failure: sys_open_tree");
11263                 goto out;
11264         }
11265
11266         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11267                 log_stderr("failure: sys_mount_setattr");
11268                 goto out;
11269         }
11270
11271         /*
11272          * The open_tree() syscall returns an O_PATH file descriptor which we
11273          * can't use with ioctl(). So let's reopen it as a proper file
11274          * descriptor.
11275          */
11276         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11277         if (tree_fd < 0) {
11278                 log_stderr("failure: openat");
11279                 goto out;
11280         }
11281
11282         pid = fork();
11283         if (pid < 0) {
11284                 log_stderr("failure: fork");
11285                 goto out;
11286         }
11287         if (pid == 0) {
11288                 if (!switch_fsids(10000, 10000))
11289                         die("failure: switch fsids");
11290
11291                 if (!caps_down())
11292                         die("failure: raise caps");
11293
11294                 /*
11295                  * The caller's fsids now have mappings in the idmapped mount so
11296                  * any file creation must succedd.
11297                  */
11298
11299                 /* create subvolume */
11300                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11301                         die("failure: btrfs_create_subvolume");
11302
11303                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11304                         die("failure: check ownership");
11305
11306                 /*
11307                  * The scratch device is mounted with user_subvol_rm_allowed so
11308                  * subvolume deletion must succeed.
11309                  */
11310                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11311                         die("failure: btrfs_delete_subvolume");
11312
11313                 exit(EXIT_SUCCESS);
11314         }
11315         if (wait_for_pid(pid))
11316                 goto out;
11317
11318         fret = 0;
11319         log_debug("Ran test");
11320 out:
11321         safe_close(attr.userns_fd);
11322         safe_close(open_tree_fd);
11323         safe_close(tree_fd);
11324
11325         return fret;
11326 }
11327
11328 static int btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed(void)
11329 {
11330         int fret = -1;
11331         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11332         struct mount_attr attr = {
11333                 .attr_set = MOUNT_ATTR_IDMAP,
11334         };
11335         pid_t pid;
11336
11337         if (!caps_supported())
11338                 return 0;
11339
11340         /* Changing mount properties on a detached mount. */
11341         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11342         if (attr.userns_fd < 0) {
11343                 log_stderr("failure: get_userns_fd");
11344                 goto out;
11345         }
11346
11347         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11348                                      AT_EMPTY_PATH |
11349                                      AT_NO_AUTOMOUNT |
11350                                      AT_SYMLINK_NOFOLLOW |
11351                                      OPEN_TREE_CLOEXEC |
11352                                      OPEN_TREE_CLONE);
11353         if (open_tree_fd < 0) {
11354                 log_stderr("failure: sys_open_tree");
11355                 goto out;
11356         }
11357
11358         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11359                 log_stderr("failure: sys_mount_setattr");
11360                 goto out;
11361         }
11362
11363         /*
11364          * The open_tree() syscall returns an O_PATH file descriptor which we
11365          * can't use with ioctl(). So let's reopen it as a proper file
11366          * descriptor.
11367          */
11368         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11369         if (tree_fd < 0) {
11370                 log_stderr("failure: openat");
11371                 goto out;
11372         }
11373
11374         pid = fork();
11375         if (pid < 0) {
11376                 log_stderr("failure: fork");
11377                 goto out;
11378         }
11379         if (pid == 0) {
11380                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11381                         die("failure: switch_userns");
11382
11383                 /* The caller's fsids now have mappings in the idmapped mount so
11384                  * any file creation must fail.
11385                  */
11386
11387                 /* create subvolume */
11388                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11389                         die("failure: btrfs_create_subvolume");
11390
11391                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11392                         die("failure: check ownership");
11393
11394                 /*
11395                  * The scratch device is mounted with user_subvol_rm_allowed so
11396                  * subvolume deletion must succeed.
11397                  */
11398                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11399                         die("failure: btrfs_delete_subvolume");
11400
11401                 exit(EXIT_SUCCESS);
11402         }
11403         if (wait_for_pid(pid))
11404                 goto out;
11405
11406         fret = 0;
11407         log_debug("Ran test");
11408 out:
11409         safe_close(attr.userns_fd);
11410         safe_close(open_tree_fd);
11411         safe_close(tree_fd);
11412
11413         return fret;
11414 }
11415
11416 static int btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed(void)
11417 {
11418         int fret = -1;
11419         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11420         struct mount_attr attr = {
11421                 .attr_set = MOUNT_ATTR_IDMAP,
11422         };
11423         pid_t pid;
11424
11425         if (!caps_supported())
11426                 return 0;
11427
11428         /* Changing mount properties on a detached mount. */
11429         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11430         if (attr.userns_fd < 0) {
11431                 log_stderr("failure: get_userns_fd");
11432                 goto out;
11433         }
11434
11435         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11436                                      AT_EMPTY_PATH |
11437                                      AT_NO_AUTOMOUNT |
11438                                      AT_SYMLINK_NOFOLLOW |
11439                                      OPEN_TREE_CLOEXEC |
11440                                      OPEN_TREE_CLONE);
11441         if (open_tree_fd < 0) {
11442                 log_stderr("failure: sys_open_tree");
11443                 goto out;
11444         }
11445
11446         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11447                 log_stderr("failure: sys_mount_setattr");
11448                 goto out;
11449         }
11450
11451         /*
11452          * The open_tree() syscall returns an O_PATH file descriptor which we
11453          * can't use with ioctl(). So let's reopen it as a proper file
11454          * descriptor.
11455          */
11456         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11457         if (tree_fd < 0) {
11458                 log_stderr("failure: openat");
11459                 goto out;
11460         }
11461
11462         pid = fork();
11463         if (pid < 0) {
11464                 log_stderr("failure: fork");
11465                 goto out;
11466         }
11467         if (pid == 0) {
11468                 int subvolume_fd = -EBADF;
11469
11470                 if (!switch_fsids(10000, 10000))
11471                         die("failure: switch fsids");
11472
11473                 if (!caps_down())
11474                         die("failure: raise caps");
11475
11476                 /*
11477                  * The caller's fsids now have mappings in the idmapped mount so
11478                  * any file creation must succeed.
11479                  */
11480
11481                 /* create subvolume */
11482                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11483                         die("failure: btrfs_create_subvolume");
11484
11485                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11486                         die("failure: expected_uid_gid");
11487
11488                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11489                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11490                 if (subvolume_fd < 0)
11491                         die("failure: openat");
11492
11493                 /* create read-write snapshot */
11494                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11495                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11496                         die("failure: btrfs_create_snapshot");
11497
11498                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
11499                         die("failure: expected_uid_gid");
11500
11501                 /* create read-only snapshot */
11502                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11503                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11504                                           BTRFS_SUBVOL_RDONLY))
11505                         die("failure: btrfs_create_snapshot");
11506
11507                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
11508                         die("failure: expected_uid_gid");
11509
11510                 safe_close(subvolume_fd);
11511
11512                 /* remove subvolume */
11513                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11514                         die("failure: btrfs_delete_subvolume");
11515
11516                 /* remove read-write snapshot */
11517                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11518                         die("failure: btrfs_delete_subvolume");
11519
11520                 /* remove read-only snapshot */
11521                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11522                         die("failure: btrfs_delete_subvolume");
11523
11524                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11525                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11526                 if (subvolume_fd < 0)
11527                         die("failure: openat");
11528
11529                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11530                         die("failure: btrfs_set_subvolume_ro");
11531
11532                 safe_close(subvolume_fd);
11533
11534                 /* remove read-only snapshot */
11535                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11536                         die("failure: btrfs_delete_subvolume");
11537
11538                 exit(EXIT_SUCCESS);
11539         }
11540         if (wait_for_pid(pid))
11541                 goto out;
11542
11543         fret = 0;
11544         log_debug("Ran test");
11545 out:
11546         safe_close(attr.userns_fd);
11547         safe_close(open_tree_fd);
11548         safe_close(tree_fd);
11549
11550         return fret;
11551 }
11552
11553 static int btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed(void)
11554 {
11555         int fret = -1;
11556         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11557         struct mount_attr attr = {
11558                 .attr_set = MOUNT_ATTR_IDMAP,
11559         };
11560         pid_t pid;
11561
11562         if (!caps_supported())
11563                 return 0;
11564
11565         /* Changing mount properties on a detached mount. */
11566         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11567         if (attr.userns_fd < 0) {
11568                 log_stderr("failure: get_userns_fd");
11569                 goto out;
11570         }
11571
11572         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11573                                      AT_EMPTY_PATH |
11574                                      AT_NO_AUTOMOUNT |
11575                                      AT_SYMLINK_NOFOLLOW |
11576                                      OPEN_TREE_CLOEXEC |
11577                                      OPEN_TREE_CLONE);
11578         if (open_tree_fd < 0) {
11579                 log_stderr("failure: sys_open_tree");
11580                 goto out;
11581         }
11582
11583         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11584                 log_stderr("failure: sys_mount_setattr");
11585                 goto out;
11586         }
11587
11588         /*
11589          * The open_tree() syscall returns an O_PATH file descriptor which we
11590          * can't use with ioctl(). So let's reopen it as a proper file
11591          * descriptor.
11592          */
11593         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11594         if (tree_fd < 0) {
11595                 log_stderr("failure: openat");
11596                 goto out;
11597         }
11598
11599         pid = fork();
11600         if (pid < 0) {
11601                 log_stderr("failure: fork");
11602                 goto out;
11603         }
11604         if (pid == 0) {
11605                 int subvolume_fd = -EBADF;
11606
11607                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11608                         die("failure: switch_userns");
11609
11610                 /* create subvolume */
11611                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11612                         die("failure: btrfs_create_subvolume");
11613
11614                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11615                         die("failure: expected_uid_gid");
11616
11617                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11618                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11619                 if (subvolume_fd < 0)
11620                         die("failure: openat");
11621
11622                 /* create read-write snapshot */
11623                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11624                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11625                         die("failure: btrfs_create_snapshot");
11626
11627                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11628                         die("failure: expected_uid_gid");
11629
11630                 /* create read-only snapshot */
11631                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11632                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11633                                           BTRFS_SUBVOL_RDONLY))
11634                         die("failure: btrfs_create_snapshot");
11635
11636                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11637                         die("failure: expected_uid_gid");
11638
11639                 /* remove directory */
11640                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11641                         die("failure: btrfs_delete_subvolume");
11642
11643                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11644                         die("failure: expected_uid_gid");
11645
11646                 /* remove read-write snapshot */
11647                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11648                         die("failure: btrfs_delete_subvolume");
11649
11650                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11651                         die("failure: expected_uid_gid");
11652
11653                 /* remove read-only snapshot */
11654                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11655                         die("failure: btrfs_delete_subvolume");
11656
11657                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11658                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11659                 if (subvolume_fd < 0)
11660                         die("failure: openat");
11661
11662                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11663                         die("failure: btrfs_set_subvolume_ro");
11664
11665                 safe_close(subvolume_fd);
11666
11667                 /* remove read-only snapshot */
11668                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11669                         die("failure: btrfs_delete_subvolume");
11670
11671                 exit(EXIT_SUCCESS);
11672         }
11673         if (wait_for_pid(pid))
11674                 goto out;
11675
11676         fret = 0;
11677         log_debug("Ran test");
11678 out:
11679         safe_close(attr.userns_fd);
11680         safe_close(open_tree_fd);
11681         safe_close(tree_fd);
11682
11683         return fret;
11684 }
11685
11686 static int btrfs_delete_by_spec_id(void)
11687 {
11688         int fret = -1;
11689         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
11690         uint64_t subvolume_id1 = -EINVAL, subvolume_id2 = -EINVAL;
11691         struct mount_attr attr = {
11692                 .attr_set = MOUNT_ATTR_IDMAP,
11693         };
11694         pid_t pid;
11695
11696         /* Changing mount properties on a detached mount. */
11697         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11698         if (attr.userns_fd < 0) {
11699                 log_stderr("failure: get_userns_fd");
11700                 goto out;
11701         }
11702
11703         /* create subvolume */
11704         if (btrfs_create_subvolume(t_mnt_scratch_fd, "A")) {
11705                 log_stderr("failure: btrfs_create_subvolume");
11706                 goto out;
11707         }
11708
11709         /* create subvolume */
11710         if (btrfs_create_subvolume(t_mnt_scratch_fd, "B")) {
11711                 log_stderr("failure: btrfs_create_subvolume");
11712                 goto out;
11713         }
11714
11715         subvolume_fd = openat(t_mnt_scratch_fd, "B", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11716         if (subvolume_fd < 0) {
11717                 log_stderr("failure: openat");
11718                 goto out;
11719         }
11720
11721         /* create subvolume */
11722         if (btrfs_create_subvolume(subvolume_fd, "C")) {
11723                 log_stderr("failure: btrfs_create_subvolume");
11724                 goto out;
11725         }
11726
11727         safe_close(subvolume_fd);
11728
11729         subvolume_fd = openat(t_mnt_scratch_fd, "A", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11730         if (subvolume_fd < 0) {
11731                 log_stderr("failure: openat");
11732                 goto out;
11733         }
11734
11735         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id1)) {
11736                 log_stderr("failure: btrfs_get_subvolume_id");
11737                 goto out;
11738         }
11739
11740         subvolume_fd = openat(t_mnt_scratch_fd, "B/C", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11741         if (subvolume_fd < 0) {
11742                 log_stderr("failure: openat");
11743                 goto out;
11744         }
11745
11746         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id2)) {
11747                 log_stderr("failure: btrfs_get_subvolume_id");
11748                 goto out;
11749         }
11750
11751         if (sys_mount(t_device_scratch, t_mountpoint, "btrfs", 0, "subvol=B/C")) {
11752                 log_stderr("failure: mount");
11753                 goto out;
11754         }
11755
11756         open_tree_fd = sys_open_tree(-EBADF, t_mountpoint,
11757                                      AT_NO_AUTOMOUNT |
11758                                      AT_SYMLINK_NOFOLLOW |
11759                                      OPEN_TREE_CLOEXEC |
11760                                      OPEN_TREE_CLONE);
11761         if (open_tree_fd < 0) {
11762                 log_stderr("failure: sys_open_tree");
11763                 goto out;
11764         }
11765
11766         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11767                 log_stderr("failure: sys_mount_setattr");
11768                 goto out;
11769         }
11770
11771         /*
11772          * The open_tree() syscall returns an O_PATH file descriptor which we
11773          * can't use with ioctl(). So let's reopen it as a proper file
11774          * descriptor.
11775          */
11776         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11777         if (tree_fd < 0) {
11778                 log_stderr("failure: openat");
11779                 goto out;
11780         }
11781
11782         pid = fork();
11783         if (pid < 0) {
11784                 log_stderr("failure: fork");
11785                 goto out;
11786         }
11787         if (pid == 0) {
11788                 /*
11789                  * The subvolume isn't exposed in the idmapped mount so
11790                  * delation via spec id must fail.
11791                  */
11792                 if (!btrfs_delete_subvolume_id(tree_fd, subvolume_id1))
11793                         die("failure: btrfs_delete_subvolume_id");
11794                 if (errno != EOPNOTSUPP)
11795                         die("failure: errno");
11796
11797                 if (btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id1))
11798                         die("failure: btrfs_delete_subvolume_id");
11799
11800                 exit(EXIT_SUCCESS);
11801         }
11802         if (wait_for_pid(pid))
11803                 goto out;
11804
11805         fret = 0;
11806         log_debug("Ran test");
11807 out:
11808         safe_close(attr.userns_fd);
11809         safe_close(open_tree_fd);
11810         safe_close(tree_fd);
11811         sys_umount2(t_mountpoint, MNT_DETACH);
11812         btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id2);
11813         btrfs_delete_subvolume(t_mnt_scratch_fd, "B");
11814
11815         return fret;
11816 }
11817
11818 static int btrfs_subvolumes_setflags_fsids_mapped(void)
11819 {
11820         int fret = -1;
11821         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11822         struct mount_attr attr = {
11823                 .attr_set = MOUNT_ATTR_IDMAP,
11824         };
11825         pid_t pid;
11826
11827         if (!caps_supported())
11828                 return 0;
11829
11830         /* Changing mount properties on a detached mount. */
11831         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11832         if (attr.userns_fd < 0) {
11833                 log_stderr("failure: get_userns_fd");
11834                 goto out;
11835         }
11836
11837         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11838                                      AT_EMPTY_PATH |
11839                                      AT_NO_AUTOMOUNT |
11840                                      AT_SYMLINK_NOFOLLOW |
11841                                      OPEN_TREE_CLOEXEC |
11842                                      OPEN_TREE_CLONE);
11843         if (open_tree_fd < 0) {
11844                 log_stderr("failure: sys_open_tree");
11845                 goto out;
11846         }
11847
11848         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11849                 log_stderr("failure: sys_mount_setattr");
11850                 goto out;
11851         }
11852
11853         /*
11854          * The open_tree() syscall returns an O_PATH file descriptor which we
11855          * can't use with ioctl(). So let's reopen it as a proper file
11856          * descriptor.
11857          */
11858         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11859         if (tree_fd < 0) {
11860                 log_stderr("failure: openat");
11861                 goto out;
11862         }
11863
11864         pid = fork();
11865         if (pid < 0) {
11866                 log_stderr("failure: fork");
11867                 goto out;
11868         }
11869         if (pid == 0) {
11870                 int subvolume_fd = -EBADF;
11871                 bool read_only = false;
11872
11873                 if (!switch_fsids(10000, 10000))
11874                         die("failure: switch fsids");
11875
11876                 if (!caps_down())
11877                         die("failure: raise caps");
11878
11879                 /* The caller's fsids now have mappings in the idmapped mount so
11880                  * any file creation must fail.
11881                  */
11882
11883                 /* create subvolume */
11884                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11885                         die("failure: btrfs_create_subvolume");
11886
11887                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11888                         die("failure: expected_uid_gid");
11889
11890                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11891                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11892                 if (subvolume_fd < 0)
11893                         die("failure: openat");
11894
11895                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11896                         die("failure: btrfs_get_subvolume_ro");
11897
11898                 if (read_only)
11899                         die("failure: read_only");
11900
11901                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
11902                         die("failure: btrfs_set_subvolume_ro");
11903
11904                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11905                         die("failure: btrfs_get_subvolume_ro");
11906
11907                 if (!read_only)
11908                         die("failure: not read_only");
11909
11910                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11911                         die("failure: btrfs_set_subvolume_ro");
11912
11913                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11914                         die("failure: btrfs_get_subvolume_ro");
11915
11916                 if (read_only)
11917                         die("failure: read_only");
11918
11919                 safe_close(subvolume_fd);
11920
11921                 exit(EXIT_SUCCESS);
11922         }
11923         if (wait_for_pid(pid))
11924                 goto out;
11925
11926         /* remove directory */
11927         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
11928                 log_stderr("failure: btrfs_delete_subvolume");
11929                 goto out;
11930         }
11931
11932         fret = 0;
11933         log_debug("Ran test");
11934 out:
11935         safe_close(attr.userns_fd);
11936         safe_close(open_tree_fd);
11937         safe_close(tree_fd);
11938
11939         return fret;
11940 }
11941
11942 static int btrfs_subvolumes_setflags_fsids_mapped_userns(void)
11943 {
11944         int fret = -1;
11945         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11946         struct mount_attr attr = {
11947                 .attr_set = MOUNT_ATTR_IDMAP,
11948         };
11949         pid_t pid;
11950
11951         if (!caps_supported())
11952                 return 0;
11953
11954         /* Changing mount properties on a detached mount. */
11955         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11956         if (attr.userns_fd < 0) {
11957                 log_stderr("failure: get_userns_fd");
11958                 goto out;
11959         }
11960
11961         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11962                                      AT_EMPTY_PATH |
11963                                      AT_NO_AUTOMOUNT |
11964                                      AT_SYMLINK_NOFOLLOW |
11965                                      OPEN_TREE_CLOEXEC |
11966                                      OPEN_TREE_CLONE);
11967         if (open_tree_fd < 0) {
11968                 log_stderr("failure: sys_open_tree");
11969                 goto out;
11970         }
11971
11972         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11973                 log_stderr("failure: sys_mount_setattr");
11974                 goto out;
11975         }
11976
11977         /*
11978          * The open_tree() syscall returns an O_PATH file descriptor which we
11979          * can't use with ioctl(). So let's reopen it as a proper file
11980          * descriptor.
11981          */
11982         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11983         if (tree_fd < 0) {
11984                 log_stderr("failure: openat");
11985                 goto out;
11986         }
11987
11988         pid = fork();
11989         if (pid < 0) {
11990                 log_stderr("failure: fork");
11991                 goto out;
11992         }
11993         if (pid == 0) {
11994                 int subvolume_fd = -EBADF;
11995                 bool read_only = false;
11996
11997                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11998                         die("failure: switch_userns");
11999
12000                 /* The caller's fsids now have mappings in the idmapped mount so
12001                  * any file creation must fail.
12002                  */
12003
12004                 /* create subvolume */
12005                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12006                         die("failure: btrfs_create_subvolume");
12007
12008                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12009                         die("failure: expected_uid_gid");
12010
12011                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12012                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12013                 if (subvolume_fd < 0)
12014                         die("failure: openat");
12015
12016                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12017                         die("failure: btrfs_get_subvolume_ro");
12018
12019                 if (read_only)
12020                         die("failure: read_only");
12021
12022                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
12023                         die("failure: btrfs_set_subvolume_ro");
12024
12025                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12026                         die("failure: btrfs_get_subvolume_ro");
12027
12028                 if (!read_only)
12029                         die("failure: not read_only");
12030
12031                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
12032                         die("failure: btrfs_set_subvolume_ro");
12033
12034                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12035                         die("failure: btrfs_get_subvolume_ro");
12036
12037                 if (read_only)
12038                         die("failure: read_only");
12039
12040                 safe_close(subvolume_fd);
12041
12042                 exit(EXIT_SUCCESS);
12043         }
12044         if (wait_for_pid(pid))
12045                 goto out;
12046
12047         /* remove directory */
12048         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12049                 log_stderr("failure: btrfs_delete_subvolume");
12050                 goto out;
12051         }
12052
12053         fret = 0;
12054         log_debug("Ran test");
12055 out:
12056         safe_close(attr.userns_fd);
12057         safe_close(open_tree_fd);
12058         safe_close(tree_fd);
12059
12060         return fret;
12061 }
12062
12063 static int btrfs_subvolumes_setflags_fsids_unmapped(void)
12064 {
12065         int fret = -1;
12066         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12067         struct mount_attr attr = {
12068                 .attr_set = MOUNT_ATTR_IDMAP,
12069         };
12070         pid_t pid;
12071
12072         if (!caps_supported())
12073                 return 0;
12074
12075         /* Changing mount properties on a detached mount. */
12076         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12077         if (attr.userns_fd < 0) {
12078                 log_stderr("failure: get_userns_fd");
12079                 goto out;
12080         }
12081
12082         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12083                                      AT_EMPTY_PATH |
12084                                      AT_NO_AUTOMOUNT |
12085                                      AT_SYMLINK_NOFOLLOW |
12086                                      OPEN_TREE_CLOEXEC |
12087                                      OPEN_TREE_CLONE);
12088         if (open_tree_fd < 0) {
12089                 log_stderr("failure: sys_open_tree");
12090                 goto out;
12091         }
12092
12093         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12094                 log_stderr("failure: sys_mount_setattr");
12095                 goto out;
12096         }
12097
12098         /*
12099          * The open_tree() syscall returns an O_PATH file descriptor which we
12100          * can't use with ioctl(). So let's reopen it as a proper file
12101          * descriptor.
12102          */
12103         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12104         if (tree_fd < 0) {
12105                 log_stderr("failure: openat");
12106                 goto out;
12107         }
12108
12109         /* create subvolume */
12110         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12111                 log_stderr("failure: btrfs_create_subvolume");
12112                 goto out;
12113         }
12114
12115         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12116                 log_stderr("failure: expected_uid_gid");
12117                 goto out;
12118         }
12119
12120         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12121                 log_stderr("failure: expected_uid_gid");
12122                 goto out;
12123         }
12124
12125         pid = fork();
12126         if (pid < 0) {
12127                 log_stderr("failure: fork");
12128                 goto out;
12129         }
12130         if (pid == 0) {
12131                 int subvolume_fd = -EBADF;
12132                 bool read_only = false;
12133
12134                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12135                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12136                 if (subvolume_fd < 0)
12137                         die("failure: openat");
12138
12139                 if (!switch_fsids(0, 0))
12140                         die("failure: switch fsids");
12141
12142                 if (!caps_down())
12143                         die("failure: raise caps");
12144
12145                 /*
12146                  * The caller's fsids don't have mappings in the idmapped mount
12147                  * so any file creation must fail.
12148                  */
12149
12150                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12151                         die("failure: btrfs_get_subvolume_ro");
12152
12153                 if (read_only)
12154                         die("failure: read_only");
12155
12156                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12157                         die("failure: btrfs_set_subvolume_ro");
12158                 if (errno != EPERM)
12159                         die("failure: errno");
12160
12161                 safe_close(subvolume_fd);
12162
12163                 exit(EXIT_SUCCESS);
12164         }
12165         if (wait_for_pid(pid))
12166                 goto out;
12167
12168         /* remove directory */
12169         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12170                 log_stderr("failure: btrfs_delete_subvolume");
12171                 goto out;
12172         }
12173
12174         fret = 0;
12175         log_debug("Ran test");
12176 out:
12177         safe_close(attr.userns_fd);
12178         safe_close(open_tree_fd);
12179         safe_close(tree_fd);
12180
12181         return fret;
12182 }
12183
12184 static int btrfs_subvolumes_setflags_fsids_unmapped_userns(void)
12185 {
12186         int fret = -1;
12187         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
12188         struct mount_attr attr = {
12189                 .attr_set = MOUNT_ATTR_IDMAP,
12190         };
12191         pid_t pid;
12192
12193         if (!caps_supported())
12194                 return 0;
12195
12196         /* Changing mount properties on a detached mount. */
12197         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12198         if (attr.userns_fd < 0) {
12199                 log_stderr("failure: get_userns_fd");
12200                 goto out;
12201         }
12202
12203         /* Changing mount properties on a detached mount. */
12204         userns_fd = get_userns_fd(0, 30000, 10000);
12205         if (userns_fd < 0) {
12206                 log_stderr("failure: get_userns_fd");
12207                 goto out;
12208         }
12209
12210         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12211                                      AT_EMPTY_PATH |
12212                                      AT_NO_AUTOMOUNT |
12213                                      AT_SYMLINK_NOFOLLOW |
12214                                      OPEN_TREE_CLOEXEC |
12215                                      OPEN_TREE_CLONE);
12216         if (open_tree_fd < 0) {
12217                 log_stderr("failure: sys_open_tree");
12218                 goto out;
12219         }
12220
12221         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12222                 log_stderr("failure: sys_mount_setattr");
12223                 goto out;
12224         }
12225
12226         /*
12227          * The open_tree() syscall returns an O_PATH file descriptor which we
12228          * can't use with ioctl(). So let's reopen it as a proper file
12229          * descriptor.
12230          */
12231         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12232         if (tree_fd < 0) {
12233                 log_stderr("failure: openat");
12234                 goto out;
12235         }
12236
12237         /* create subvolume */
12238         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12239                 log_stderr("failure: btrfs_create_subvolume");
12240                 goto out;
12241         }
12242
12243         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12244                 log_stderr("failure: expected_uid_gid");
12245                 goto out;
12246         }
12247
12248         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12249                 log_stderr("failure: expected_uid_gid");
12250                 goto out;
12251         }
12252
12253         pid = fork();
12254         if (pid < 0) {
12255                 log_stderr("failure: fork");
12256                 goto out;
12257         }
12258         if (pid == 0) {
12259                 int subvolume_fd = -EBADF;
12260                 bool read_only = false;
12261
12262                 /*
12263                  * The caller's fsids don't have mappings in the idmapped mount
12264                  * so any file creation must fail.
12265                  */
12266
12267                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12268                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12269                 if (subvolume_fd < 0)
12270                         die("failure: openat");
12271
12272                 if (!switch_userns(userns_fd, 0, 0, false))
12273                         die("failure: switch_userns");
12274
12275                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12276                                       t_overflowuid, t_overflowgid))
12277                         die("failure: expected_uid_gid");
12278
12279                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12280                                       t_overflowuid, t_overflowgid))
12281                         die("failure: expected_uid_gid");
12282
12283                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12284                         die("failure: btrfs_get_subvolume_ro");
12285
12286                 if (read_only)
12287                         die("failure: read_only");
12288
12289                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12290                         die("failure: btrfs_set_subvolume_ro");
12291                 if (errno != EPERM)
12292                         die("failure: errno");
12293
12294                 safe_close(subvolume_fd);
12295
12296                 exit(EXIT_SUCCESS);
12297         }
12298         if (wait_for_pid(pid))
12299                 goto out;
12300
12301         /* remove directory */
12302         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12303                 log_stderr("failure: btrfs_delete_subvolume");
12304                 goto out;
12305         }
12306
12307         fret = 0;
12308         log_debug("Ran test");
12309 out:
12310         safe_close(attr.userns_fd);
12311         safe_close(open_tree_fd);
12312         safe_close(tree_fd);
12313         safe_close(userns_fd);
12314
12315         return fret;
12316 }
12317
12318 static int btrfs_snapshots_setflags_fsids_mapped(void)
12319 {
12320         int fret = -1;
12321         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12322         struct mount_attr attr = {
12323                 .attr_set = MOUNT_ATTR_IDMAP,
12324         };
12325         pid_t pid;
12326
12327         if (!caps_supported())
12328                 return 0;
12329
12330         /* Changing mount properties on a detached mount. */
12331         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12332         if (attr.userns_fd < 0) {
12333                 log_stderr("failure: get_userns_fd");
12334                 goto out;
12335         }
12336
12337         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12338                                      AT_EMPTY_PATH |
12339                                      AT_NO_AUTOMOUNT |
12340                                      AT_SYMLINK_NOFOLLOW |
12341                                      OPEN_TREE_CLOEXEC |
12342                                      OPEN_TREE_CLONE);
12343         if (open_tree_fd < 0) {
12344                 log_stderr("failure: sys_open_tree");
12345                 goto out;
12346         }
12347
12348         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12349                 log_stderr("failure: sys_mount_setattr");
12350                 goto out;
12351         }
12352
12353         /*
12354          * The open_tree() syscall returns an O_PATH file descriptor which we
12355          * can't use with ioctl(). So let's reopen it as a proper file
12356          * descriptor.
12357          */
12358         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12359         if (tree_fd < 0) {
12360                 log_stderr("failure: openat");
12361                 goto out;
12362         }
12363
12364         pid = fork();
12365         if (pid < 0) {
12366                 log_stderr("failure: fork");
12367                 goto out;
12368         }
12369         if (pid == 0) {
12370                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12371                 bool read_only = false;
12372
12373                 if (!switch_fsids(10000, 10000))
12374                         die("failure: switch fsids");
12375
12376                 if (!caps_down())
12377                         die("failure: raise caps");
12378
12379                 /*
12380                  * The caller's fsids now have mappings in the idmapped mount
12381                  * so any file creation must succeed.
12382                  */
12383
12384                 /* create subvolume */
12385                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12386                         die("failure: btrfs_create_subvolume");
12387
12388                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
12389                         die("failure: expected_uid_gid");
12390
12391                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12392                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12393                 if (subvolume_fd < 0)
12394                         die("failure: openat");
12395
12396                 /* create read-write snapshot */
12397                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12398                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12399                         die("failure: btrfs_create_snapshot");
12400
12401                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
12402                         die("failure: expected_uid_gid");
12403
12404                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12405                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12406                 if (snapshot_fd < 0)
12407                         die("failure: openat");
12408
12409                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12410                         die("failure: btrfs_get_subvolume_ro");
12411
12412                 if (read_only)
12413                         die("failure: read_only");
12414
12415                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12416                         die("failure: btrfs_set_subvolume_ro");
12417
12418                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12419                         die("failure: btrfs_get_subvolume_ro");
12420
12421                 if (!read_only)
12422                         die("failure: not read_only");
12423
12424                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12425                         die("failure: btrfs_set_subvolume_ro");
12426
12427                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12428                         die("failure: btrfs_get_subvolume_ro");
12429
12430                 if (read_only)
12431                         die("failure: read_only");
12432
12433                 safe_close(snapshot_fd);
12434                 safe_close(subvolume_fd);
12435
12436                 exit(EXIT_SUCCESS);
12437         }
12438         if (wait_for_pid(pid))
12439                 goto out;
12440
12441         /* remove directory */
12442         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12443                 log_stderr("failure: btrfs_delete_subvolume");
12444                 goto out;
12445         }
12446
12447         /* remove directory */
12448         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12449                 log_stderr("failure: btrfs_delete_subvolume");
12450                 goto out;
12451         }
12452
12453         fret = 0;
12454         log_debug("Ran test");
12455 out:
12456         safe_close(attr.userns_fd);
12457         safe_close(open_tree_fd);
12458         safe_close(tree_fd);
12459
12460         return fret;
12461 }
12462
12463 static int btrfs_snapshots_setflags_fsids_mapped_userns(void)
12464 {
12465         int fret = -1;
12466         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12467         struct mount_attr attr = {
12468                 .attr_set = MOUNT_ATTR_IDMAP,
12469         };
12470         pid_t pid;
12471
12472         if (!caps_supported())
12473                 return 0;
12474
12475         /* Changing mount properties on a detached mount. */
12476         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12477         if (attr.userns_fd < 0) {
12478                 log_stderr("failure: get_userns_fd");
12479                 goto out;
12480         }
12481
12482         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12483                                      AT_EMPTY_PATH |
12484                                      AT_NO_AUTOMOUNT |
12485                                      AT_SYMLINK_NOFOLLOW |
12486                                      OPEN_TREE_CLOEXEC |
12487                                      OPEN_TREE_CLONE);
12488         if (open_tree_fd < 0) {
12489                 log_stderr("failure: sys_open_tree");
12490                 goto out;
12491         }
12492
12493         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12494                 log_stderr("failure: sys_mount_setattr");
12495                 goto out;
12496         }
12497
12498         /*
12499          * The open_tree() syscall returns an O_PATH file descriptor which we
12500          * can't use with ioctl(). So let's reopen it as a proper file
12501          * descriptor.
12502          */
12503         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12504         if (tree_fd < 0) {
12505                 log_stderr("failure: openat");
12506                 goto out;
12507         }
12508
12509         pid = fork();
12510         if (pid < 0) {
12511                 log_stderr("failure: fork");
12512                 goto out;
12513         }
12514         if (pid == 0) {
12515                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12516                 bool read_only = false;
12517
12518                 if (!switch_userns(attr.userns_fd, 0, 0, false))
12519                         die("failure: switch_userns");
12520
12521                 /*
12522                  * The caller's fsids now have mappings in the idmapped mount so
12523                  * any file creation must succeed.
12524                  */
12525
12526                 /* create subvolume */
12527                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12528                         die("failure: btrfs_create_subvolume");
12529
12530                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12531                         die("failure: expected_uid_gid");
12532
12533                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12534                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12535                 if (subvolume_fd < 0)
12536                         die("failure: openat");
12537
12538                 /* create read-write snapshot */
12539                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12540                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12541                         die("failure: btrfs_create_snapshot");
12542
12543                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
12544                         die("failure: expected_uid_gid");
12545
12546                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12547                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12548                 if (snapshot_fd < 0)
12549                         die("failure: openat");
12550
12551                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12552                         die("failure: btrfs_get_subvolume_ro");
12553
12554                 if (read_only)
12555                         die("failure: read_only");
12556
12557                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12558                         die("failure: btrfs_set_subvolume_ro");
12559
12560                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12561                         die("failure: btrfs_get_subvolume_ro");
12562
12563                 if (!read_only)
12564                         die("failure: not read_only");
12565
12566                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12567                         die("failure: btrfs_set_subvolume_ro");
12568
12569                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12570                         die("failure: btrfs_get_subvolume_ro");
12571
12572                 if (read_only)
12573                         die("failure: read_only");
12574
12575                 safe_close(snapshot_fd);
12576                 safe_close(subvolume_fd);
12577
12578                 exit(EXIT_SUCCESS);
12579         }
12580         if (wait_for_pid(pid))
12581                 goto out;
12582
12583         /* remove directory */
12584         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12585                 log_stderr("failure: btrfs_delete_subvolume");
12586                 goto out;
12587         }
12588
12589         /* remove directory */
12590         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12591                 log_stderr("failure: btrfs_delete_subvolume");
12592                 goto out;
12593         }
12594
12595         fret = 0;
12596         log_debug("Ran test");
12597 out:
12598         safe_close(attr.userns_fd);
12599         safe_close(open_tree_fd);
12600         safe_close(tree_fd);
12601
12602         return fret;
12603 }
12604
12605 static int btrfs_snapshots_setflags_fsids_unmapped(void)
12606 {
12607         int fret = -1;
12608         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
12609         struct mount_attr attr = {
12610                 .attr_set = MOUNT_ATTR_IDMAP,
12611         };
12612         pid_t pid;
12613
12614         if (!caps_supported())
12615                 return 0;
12616
12617         /* Changing mount properties on a detached mount. */
12618         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12619         if (attr.userns_fd < 0) {
12620                 log_stderr("failure: get_userns_fd");
12621                 goto out;
12622         }
12623
12624         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12625                                      AT_EMPTY_PATH |
12626                                      AT_NO_AUTOMOUNT |
12627                                      AT_SYMLINK_NOFOLLOW |
12628                                      OPEN_TREE_CLOEXEC |
12629                                      OPEN_TREE_CLONE);
12630         if (open_tree_fd < 0) {
12631                 log_stderr("failure: sys_open_tree");
12632                 goto out;
12633         }
12634
12635         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12636                 log_stderr("failure: sys_mount_setattr");
12637                 goto out;
12638         }
12639
12640         /*
12641          * The open_tree() syscall returns an O_PATH file descriptor which we
12642          * can't use with ioctl(). So let's reopen it as a proper file
12643          * descriptor.
12644          */
12645         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12646         if (tree_fd < 0) {
12647                 log_stderr("failure: openat");
12648                 goto out;
12649         }
12650
12651         /* create subvolume */
12652         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12653                 log_stderr("failure: btrfs_create_subvolume");
12654                 goto out;
12655         }
12656
12657         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12658                 log_stderr("failure: expected_uid_gid");
12659                 goto out;
12660         }
12661
12662         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12663                 log_stderr("failure: expected_uid_gid");
12664                 goto out;
12665         }
12666
12667         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12668                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12669         if (subvolume_fd < 0) {
12670                 log_stderr("failure: openat");
12671                 goto out;
12672         }
12673
12674         /* create read-write snapshot */
12675         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12676                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12677                 log_stderr("failure: btrfs_create_snapshot");
12678                 goto out;
12679         }
12680
12681         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12682                 log_stderr("failure: expected_uid_gid");
12683                 goto out;
12684         }
12685
12686         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12687                 log_stderr("failure: expected_uid_gid");
12688                 goto out;
12689         }
12690
12691         pid = fork();
12692         if (pid < 0) {
12693                 log_stderr("failure: fork");
12694                 goto out;
12695         }
12696         if (pid == 0) {
12697                 int snapshot_fd = -EBADF;
12698                 bool read_only = false;
12699
12700                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12701                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12702                 if (snapshot_fd < 0)
12703                         die("failure: openat");
12704
12705                 if (!switch_fsids(0, 0))
12706                         die("failure: switch fsids");
12707
12708                 if (!caps_down())
12709                         die("failure: raise caps");
12710
12711                 /*
12712                  * The caller's fsids don't have mappings in the idmapped mount
12713                  * so any file creation must fail.
12714                  */
12715
12716                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12717                         die("failure: btrfs_get_subvolume_ro");
12718
12719                 if (read_only)
12720                         die("failure: read_only");
12721
12722                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12723                         die("failure: btrfs_set_subvolume_ro");
12724                 if (errno != EPERM)
12725                         die("failure: errno");
12726
12727                 safe_close(snapshot_fd);
12728
12729                 exit(EXIT_SUCCESS);
12730         }
12731         if (wait_for_pid(pid))
12732                 goto out;
12733
12734         /* remove directory */
12735         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12736                 log_stderr("failure: btrfs_delete_subvolume");
12737                 goto out;
12738         }
12739
12740         /* remove directory */
12741         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12742                 log_stderr("failure: btrfs_delete_subvolume");
12743                 goto out;
12744         }
12745
12746         fret = 0;
12747         log_debug("Ran test");
12748 out:
12749         safe_close(attr.userns_fd);
12750         safe_close(open_tree_fd);
12751         safe_close(subvolume_fd);
12752         safe_close(tree_fd);
12753
12754         return fret;
12755 }
12756
12757 static int btrfs_snapshots_setflags_fsids_unmapped_userns(void)
12758 {
12759         int fret = -1;
12760         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
12761             userns_fd = -EBADF;
12762         struct mount_attr attr = {
12763                 .attr_set = MOUNT_ATTR_IDMAP,
12764         };
12765         pid_t pid;
12766
12767         if (!caps_supported())
12768                 return 0;
12769
12770         /* Changing mount properties on a detached mount. */
12771         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12772         if (attr.userns_fd < 0) {
12773                 log_stderr("failure: get_userns_fd");
12774                 goto out;
12775         }
12776
12777         /* Changing mount properties on a detached mount. */
12778         userns_fd = get_userns_fd(0, 30000, 10000);
12779         if (userns_fd < 0) {
12780                 log_stderr("failure: get_userns_fd");
12781                 goto out;
12782         }
12783
12784         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12785                                      AT_EMPTY_PATH |
12786                                      AT_NO_AUTOMOUNT |
12787                                      AT_SYMLINK_NOFOLLOW |
12788                                      OPEN_TREE_CLOEXEC |
12789                                      OPEN_TREE_CLONE);
12790         if (open_tree_fd < 0) {
12791                 log_stderr("failure: sys_open_tree");
12792                 goto out;
12793         }
12794
12795         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12796                 log_stderr("failure: sys_mount_setattr");
12797                 goto out;
12798         }
12799
12800         /*
12801          * The open_tree() syscall returns an O_PATH file descriptor which we
12802          * can't use with ioctl(). So let's reopen it as a proper file
12803          * descriptor.
12804          */
12805         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12806         if (tree_fd < 0) {
12807                 log_stderr("failure: openat");
12808                 goto out;
12809         }
12810
12811         /* create subvolume */
12812         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12813                 log_stderr("failure: btrfs_create_subvolume");
12814                 goto out;
12815         }
12816
12817         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12818                 log_stderr("failure: expected_uid_gid");
12819                 goto out;
12820         }
12821
12822         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12823                 log_stderr("failure: expected_uid_gid");
12824                 goto out;
12825         }
12826
12827         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12828                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12829         if (subvolume_fd < 0) {
12830                 log_stderr("failure: openat");
12831                 goto out;
12832         }
12833
12834         /* create read-write snapshot */
12835         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12836                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12837                 log_stderr("failure: btrfs_create_snapshot");
12838                 goto out;
12839         }
12840
12841         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12842                 log_stderr("failure: expected_uid_gid");
12843                 goto out;
12844         }
12845
12846         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12847                 log_stderr("failure: expected_uid_gid");
12848                 goto out;
12849         }
12850
12851         pid = fork();
12852         if (pid < 0) {
12853                 log_stderr("failure: fork");
12854                 goto out;
12855         }
12856         if (pid == 0) {
12857                 int snapshot_fd = -EBADF;
12858                 bool read_only = false;
12859
12860                 /*
12861                  * The caller's fsids don't have mappings in the idmapped mount
12862                  * so any file creation must fail.
12863                  */
12864
12865                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12866                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12867                 if (snapshot_fd < 0)
12868                         die("failure: openat");
12869
12870
12871                 if (!switch_userns(userns_fd, 0, 0, false))
12872                         die("failure: switch_userns");
12873
12874                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12875                                       t_overflowuid, t_overflowgid))
12876                         die("failure: expected_uid_gid");
12877
12878                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12879                                       t_overflowuid, t_overflowgid))
12880                         die("failure: expected_uid_gid");
12881
12882                 /*
12883                  * The caller's fsids don't have mappings in the idmapped mount
12884                  * so any file creation must fail.
12885                  */
12886
12887                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12888                         die("failure: btrfs_get_subvolume_ro");
12889
12890                 if (read_only)
12891                         die("failure: read_only");
12892
12893                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12894                         die("failure: btrfs_set_subvolume_ro");
12895                 if (errno != EPERM)
12896                         die("failure: errno");
12897
12898                 safe_close(snapshot_fd);
12899
12900                 exit(EXIT_SUCCESS);
12901         }
12902         if (wait_for_pid(pid))
12903                 goto out;
12904
12905         /* remove directory */
12906         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12907                 log_stderr("failure: btrfs_delete_subvolume");
12908                 goto out;
12909         }
12910
12911         /* remove directory */
12912         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12913                 log_stderr("failure: btrfs_delete_subvolume");
12914                 goto out;
12915         }
12916
12917         fret = 0;
12918         log_debug("Ran test");
12919 out:
12920         safe_close(attr.userns_fd);
12921         safe_close(open_tree_fd);
12922         safe_close(subvolume_fd);
12923         safe_close(tree_fd);
12924         safe_close(userns_fd);
12925
12926         return fret;
12927 }
12928
12929 #define BTRFS_SUBVOLUME_SUBVOL1 "subvol1"
12930 #define BTRFS_SUBVOLUME_SUBVOL2 "subvol2"
12931 #define BTRFS_SUBVOLUME_SUBVOL3 "subvol3"
12932 #define BTRFS_SUBVOLUME_SUBVOL4 "subvol4"
12933
12934 #define BTRFS_SUBVOLUME_SUBVOL1_ID 0
12935 #define BTRFS_SUBVOLUME_SUBVOL2_ID 1
12936 #define BTRFS_SUBVOLUME_SUBVOL3_ID 2
12937 #define BTRFS_SUBVOLUME_SUBVOL4_ID 3
12938
12939 #define BTRFS_SUBVOLUME_DIR1 "dir1"
12940 #define BTRFS_SUBVOLUME_DIR2 "dir2"
12941
12942 #define BTRFS_SUBVOLUME_MNT "mnt_subvolume1"
12943
12944 #define BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3 "subvol1/subvol3"
12945 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2 "subvol1/dir1/dir2"
12946 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4 "subvol1/dir1/dir2/subvol4"
12947
12948 /*
12949  * We create the following mount layout to test lookup:
12950  *
12951  * |-/mnt/test                    /dev/loop0                   btrfs       rw,relatime,space_cache,subvolid=5,subvol=/
12952  * | |-/mnt/test/mnt1             /dev/loop1[/subvol1]         btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=268,subvol=/subvol1
12953  * '-/mnt/scratch                 /dev/loop1                   btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=5,subvol=/
12954  */
12955 static int btrfs_subvolume_lookup_user(void)
12956 {
12957         int fret = -1, i;
12958         int dir1_fd = -EBADF, dir2_fd = -EBADF, mnt_fd = -EBADF,
12959             open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
12960         int subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
12961         uint64_t subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
12962         uint64_t subvolid = -EINVAL;
12963         struct mount_attr attr = {
12964                 .attr_set = MOUNT_ATTR_IDMAP,
12965         };
12966         pid_t pid;
12967         struct btrfs_iter *iter;
12968
12969         if (!caps_supported())
12970                 return 0;
12971
12972         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
12973                 subvolume_fds[i] = -EBADF;
12974
12975         for (i = 0; i < ARRAY_SIZE(subvolume_ids); i++)
12976                 subvolume_ids[i] = -EINVAL;
12977
12978         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL1)) {
12979                 log_stderr("failure: btrfs_create_subvolume");
12980                 goto out;
12981         }
12982
12983         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL2)) {
12984                 log_stderr("failure: btrfs_create_subvolume");
12985                 goto out;
12986         }
12987
12988         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] = openat(t_mnt_scratch_fd,
12989                                                            BTRFS_SUBVOLUME_SUBVOL1,
12990                                                            O_CLOEXEC | O_DIRECTORY);
12991         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] < 0) {
12992                 log_stderr("failure: openat");
12993                 goto out;
12994         }
12995
12996         /* create subvolume */
12997         if (btrfs_create_subvolume(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_SUBVOL3)) {
12998                 log_stderr("failure: btrfs_create_subvolume");
12999                 goto out;
13000         }
13001
13002         if (mkdirat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1, 0777)) {
13003                 log_stderr("failure: mkdirat");
13004                 goto out;
13005         }
13006
13007         dir1_fd = openat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1,
13008                          O_CLOEXEC | O_DIRECTORY);
13009         if (dir1_fd < 0) {
13010                 log_stderr("failure: openat");
13011                 goto out;
13012         }
13013
13014         if (mkdirat(dir1_fd, BTRFS_SUBVOLUME_DIR2, 0777)) {
13015                 log_stderr("failure: mkdirat");
13016                 goto out;
13017         }
13018
13019         dir2_fd = openat(dir1_fd, BTRFS_SUBVOLUME_DIR2, O_CLOEXEC | O_DIRECTORY);
13020         if (dir2_fd < 0) {
13021                 log_stderr("failure: openat");
13022                 goto out;
13023         }
13024
13025         if (btrfs_create_subvolume(dir2_fd, BTRFS_SUBVOLUME_SUBVOL4)) {
13026                 log_stderr("failure: btrfs_create_subvolume");
13027                 goto out;
13028         }
13029
13030         if (mkdirat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, 0777)) {
13031                 log_stderr("failure: mkdirat");
13032                 goto out;
13033         }
13034
13035         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13036         if (sys_mount(t_device_scratch, t_buf, "btrfs", 0,
13037                       "subvol=" BTRFS_SUBVOLUME_SUBVOL1)) {
13038                 log_stderr("failure: mount");
13039                 goto out;
13040         }
13041
13042         mnt_fd = openat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, O_CLOEXEC | O_DIRECTORY);
13043         if (mnt_fd < 0) {
13044                 log_stderr("failure: openat");
13045                 goto out;
13046         }
13047
13048         if (chown_r(t_mnt_scratch_fd, ".", 1000, 1000)) {
13049                 log_stderr("failure: chown_r");
13050                 goto out;
13051         }
13052
13053         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] = openat(t_mnt_scratch_fd,
13054                                                            BTRFS_SUBVOLUME_SUBVOL2,
13055                                                            O_CLOEXEC | O_DIRECTORY);
13056         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] < 0) {
13057                 log_stderr("failure: openat");
13058                 goto out;
13059         }
13060
13061         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID],
13062                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL1_ID])) {
13063                 log_stderr("failure: btrfs_get_subvolume_id");
13064                 goto out;
13065         }
13066
13067         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID],
13068                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL2_ID])) {
13069                 log_stderr("failure: btrfs_get_subvolume_id");
13070                 goto out;
13071         }
13072
13073         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] = openat(t_mnt_scratch_fd,
13074                                                            BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3,
13075                                                            O_CLOEXEC | O_DIRECTORY);
13076         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] < 0) {
13077                 log_stderr("failure: openat");
13078                 goto out;
13079         }
13080
13081         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID],
13082                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])) {
13083                 log_stderr("failure: btrfs_get_subvolume_id");
13084                 goto out;
13085         }
13086
13087         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] = openat(t_mnt_scratch_fd,
13088                                                            BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4,
13089                                                            O_CLOEXEC | O_DIRECTORY);
13090         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] < 0) {
13091                 log_stderr("failure: openat");
13092                 goto out;
13093         }
13094
13095         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID],
13096                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])) {
13097                 log_stderr("failure: btrfs_get_subvolume_id");
13098                 goto out;
13099         }
13100
13101
13102         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13103                 log_stderr("failure: fchmod");
13104                 goto out;
13105         }
13106
13107         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13108                 log_stderr("failure: fchmod");
13109                 goto out;
13110         }
13111
13112         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
13113         if (attr.userns_fd < 0) {
13114                 log_stderr("failure: get_userns_fd");
13115                 goto out;
13116         }
13117
13118         open_tree_fd = sys_open_tree(mnt_fd, "",
13119                                      AT_EMPTY_PATH |
13120                                      AT_NO_AUTOMOUNT |
13121                                      AT_SYMLINK_NOFOLLOW |
13122                                      OPEN_TREE_CLOEXEC |
13123                                      OPEN_TREE_CLONE);
13124         if (open_tree_fd < 0) {
13125                 log_stderr("failure: sys_open_tree");
13126                 goto out;
13127         }
13128
13129         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13130                 log_stderr("failure: sys_mount_setattr");
13131                 goto out;
13132         }
13133
13134         /*
13135          * The open_tree() syscall returns an O_PATH file descriptor which we
13136          * can't use with ioctl(). So let's reopen it as a proper file
13137          * descriptor.
13138          */
13139         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
13140         if (tree_fd < 0) {
13141                 log_stderr("failure: openat");
13142                 goto out;
13143         }
13144
13145         pid = fork();
13146         if (pid < 0) {
13147                 log_stderr("failure: fork");
13148                 goto out;
13149         }
13150         if (pid == 0) {
13151                 bool subvolume3_found = false, subvolume4_found = false;
13152
13153                 if (!switch_fsids(11000, 11000))
13154                         die("failure: switch fsids");
13155
13156                 if (!caps_down())
13157                         die("failure: lower caps");
13158
13159                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13160                         die("failure: btrfs_iterator_start");
13161
13162                 for (;;) {
13163                         char *subvol_path = NULL;
13164                         int ret;
13165
13166                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13167                         if (ret == 1)
13168                                 break;
13169                         else if (ret)
13170                                 die("failure: btrfs_iterator_next");
13171
13172                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13173                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13174                                 die("failure: subvolume id %llu->%s",
13175                                     (long long unsigned)subvolid, subvol_path);
13176
13177                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13178                                 subvolume3_found = true;
13179
13180                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13181                                 subvolume4_found = true;
13182
13183                         free(subvol_path);
13184                 }
13185                 btrfs_iterator_end(iter);
13186
13187                 if (!subvolume3_found || !subvolume4_found)
13188                         die("failure: subvolume id");
13189
13190                 exit(EXIT_SUCCESS);
13191         }
13192         if (wait_for_pid(pid))
13193                 goto out;
13194
13195         pid = fork();
13196         if (pid < 0) {
13197                 log_stderr("failure: fork");
13198                 goto out;
13199         }
13200         if (pid == 0) {
13201                 bool subvolume3_found = false, subvolume4_found = false;
13202
13203                 if (!switch_userns(attr.userns_fd, 0, 0, false))
13204                         die("failure: switch_userns");
13205
13206                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13207                         die("failure: btrfs_iterator_start");
13208
13209                 for (;;) {
13210                         char *subvol_path = NULL;
13211                         int ret;
13212
13213                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13214                         if (ret == 1)
13215                                 break;
13216                         else if (ret)
13217                                 die("failure: btrfs_iterator_next");
13218
13219                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13220                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13221                                 die("failure: subvolume id %llu->%s",
13222                                     (long long unsigned)subvolid, subvol_path);
13223
13224                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13225                                 subvolume3_found = true;
13226
13227                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13228                                 subvolume4_found = true;
13229
13230                         free(subvol_path);
13231                 }
13232                 btrfs_iterator_end(iter);
13233
13234                 if (!subvolume3_found || !subvolume4_found)
13235                         die("failure: subvolume id");
13236
13237                 exit(EXIT_SUCCESS);
13238         }
13239         if (wait_for_pid(pid))
13240                 goto out;
13241
13242         pid = fork();
13243         if (pid < 0) {
13244                 log_stderr("failure: fork");
13245                 goto out;
13246         }
13247         if (pid == 0) {
13248                 bool subvolume_found = false;
13249
13250                 if (!switch_fsids(0, 0))
13251                         die("failure: switch fsids");
13252
13253                 if (!caps_down())
13254                         die("failure: lower caps");
13255
13256                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13257                         die("failure: btrfs_iterator_start");
13258
13259                 for (;;) {
13260                         char *subvol_path = NULL;
13261                         int ret;
13262
13263                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13264                         if (ret == 1)
13265                                 break;
13266                         else if (ret)
13267                                 die("failure: btrfs_iterator_next");
13268
13269                         free(subvol_path);
13270
13271                         subvolume_found = true;
13272                         break;
13273                 }
13274                 btrfs_iterator_end(iter);
13275
13276                 if (subvolume_found)
13277                         die("failure: subvolume id");
13278
13279                 exit(EXIT_SUCCESS);
13280         }
13281         if (wait_for_pid(pid))
13282                 goto out;
13283
13284         userns_fd = get_userns_fd(0, 30000, 10000);
13285         if (userns_fd < 0) {
13286                 log_stderr("failure: get_userns_fd");
13287                 goto out;
13288         }
13289
13290         pid = fork();
13291         if (pid < 0) {
13292                 log_stderr("failure: fork");
13293                 goto out;
13294         }
13295         if (pid == 0) {
13296                 bool subvolume_found = false;
13297
13298                 if (!switch_userns(userns_fd, 0, 0, true))
13299                         die("failure: switch_userns");
13300
13301                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13302                         die("failure: btrfs_iterator_start");
13303
13304                 for (;;) {
13305                         char *subvol_path = NULL;
13306                         int ret;
13307
13308                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13309                         if (ret == 1)
13310                                 break;
13311                         else if (ret)
13312                                 die("failure: btrfs_iterator_next");
13313
13314                         free(subvol_path);
13315
13316                         subvolume_found = true;
13317                         break;
13318                 }
13319                 btrfs_iterator_end(iter);
13320
13321                 if (subvolume_found)
13322                         die("failure: subvolume id");
13323
13324                 exit(EXIT_SUCCESS);
13325         }
13326         if (wait_for_pid(pid))
13327                 goto out;
13328
13329         fret = 0;
13330         log_debug("Ran test");
13331 out:
13332         safe_close(dir1_fd);
13333         safe_close(dir2_fd);
13334         safe_close(open_tree_fd);
13335         safe_close(tree_fd);
13336         safe_close(userns_fd);
13337         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
13338                 safe_close(subvolume_fds[i]);
13339         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13340         sys_umount2(t_buf, MNT_DETACH);
13341         unlinkat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, AT_REMOVEDIR);
13342
13343         return fret;
13344 }
13345
13346 #define USER1 "fsgqa"
13347 #define USER2 "fsgqa2"
13348
13349 /**
13350  * lookup_ids - lookup uid and gid for a username
13351  * @name: [in]  name of the user
13352  * @uid:  [out] pointer to the user-ID
13353  * @gid:  [out] pointer to the group-ID
13354  *
13355  * Lookup the uid and gid of a user.
13356  *
13357  * Return: On success, true is returned.
13358  *         On error, false is returned.
13359  */
13360 static bool lookup_ids(const char *name, uid_t *uid, gid_t *gid)
13361 {
13362         bool bret = false;
13363         struct passwd *pwentp = NULL;
13364         struct passwd pwent;
13365         char *buf;
13366         ssize_t bufsize;
13367         int ret;
13368
13369         bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
13370         if (bufsize < 0)
13371                 bufsize = 1024;
13372
13373         buf = malloc(bufsize);
13374         if (!buf)
13375                 return bret;
13376
13377         ret = getpwnam_r(name, &pwent, buf, bufsize, &pwentp);
13378         if (!ret && pwentp) {
13379                 *uid = pwent.pw_uid;
13380                 *gid = pwent.pw_gid;
13381                 bret = true;
13382         }
13383
13384         free(buf);
13385         return bret;
13386 }
13387
13388 /**
13389  * setattr_fix_968219708108 - test for commit 968219708108 ("fs: handle circular mappings correctly")
13390  *
13391  * Test that ->setattr() works correctly for idmapped mounts with circular
13392  * idmappings such as:
13393  *
13394  * b:1000:1001:1
13395  * b:1001:1000:1
13396  *
13397  * Assume a directory /source with two files:
13398  *
13399  * /source/file1 | 1000:1000
13400  * /source/file2 | 1001:1001
13401  *
13402  * and we create an idmapped mount of /source at /target with an idmapped of:
13403  *
13404  * mnt_userns:        1000:1001:1
13405  *                    1001:1000:1
13406  *
13407  * In the idmapped mount file1 will be owned by uid 1001 and file2 by uid 1000:
13408  *
13409  * /target/file1 | 1001:1001
13410  * /target/file2 | 1000:1000
13411  *
13412  * Because in essence the idmapped mount switches ownership for {g,u}id 1000
13413  * and {g,u}id 1001.
13414  *
13415  * 1. A user with fs{g,u}id 1000 must be allowed to setattr /target/file2 from
13416  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13417  * 2. A user with fs{g,u}id 1001 must be allowed to setattr /target/file1 from
13418  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1001.
13419  * 3. A user with fs{g,u}id 1000 must fail to setattr /target/file1 from
13420  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1000.
13421  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13422  *    {g,u}id of the file.
13423  * 4. A user with fs{g,u}id 1001 must fail to setattr /target/file2 from
13424  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13425  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13426  *    {g,u}id of the file.
13427  * 5. Both, a user with fs{g,u}id 1000 and a user with fs{g,u}id 1001, must
13428  *    fail to setattr /target/file1 owned by {g,u}id 1001 in the idmapped mount
13429  *    and /target/file2 owned by {g,u}id 1000 in the idmapped mount to any
13430  *    {g,u}id apart from {g,u}id 1000 or 1001 with EINVAL.
13431  *    Only {g,u}id 1000 and 1001 have a mapping in the idmapped mount. Other
13432  *    {g,u}id are unmapped.
13433  */
13434 static int setattr_fix_968219708108(void)
13435 {
13436         int fret = -1;
13437         int open_tree_fd = -EBADF;
13438         struct mount_attr attr = {
13439                 .attr_set       = MOUNT_ATTR_IDMAP,
13440                 .userns_fd      = -EBADF,
13441         };
13442         int ret;
13443         uid_t user1_uid, user2_uid;
13444         gid_t user1_gid, user2_gid;
13445         pid_t pid;
13446         struct list idmap;
13447         struct list *it_cur, *it_next;
13448
13449         if (!caps_supported())
13450                 return 0;
13451
13452         list_init(&idmap);
13453
13454         if (!lookup_ids(USER1, &user1_uid, &user1_gid)) {
13455                 log_stderr("failure: lookup_user");
13456                 goto out;
13457         }
13458
13459         if (!lookup_ids(USER2, &user2_uid, &user2_gid)) {
13460                 log_stderr("failure: lookup_user");
13461                 goto out;
13462         }
13463
13464         log_debug("Found " USER1 " with uid(%d) and gid(%d) and " USER2 " with uid(%d) and gid(%d)",
13465                   user1_uid, user1_gid, user2_uid, user2_gid);
13466
13467         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
13468                 log_stderr("failure: mkdirat");
13469                 goto out;
13470         }
13471
13472         if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | 0644, 0)) {
13473                 log_stderr("failure: mknodat");
13474                 goto out;
13475         }
13476
13477         if (chown_r(t_mnt_fd, T_DIR1, user1_uid, user1_gid)) {
13478                 log_stderr("failure: chown_r");
13479                 goto out;
13480         }
13481
13482         if (mknodat(t_dir1_fd, DIR1 "/" FILE2, S_IFREG | 0644, 0)) {
13483                 log_stderr("failure: mknodat");
13484                 goto out;
13485         }
13486
13487         if (fchownat(t_dir1_fd, DIR1 "/" FILE2, user2_uid, user2_gid, AT_SYMLINK_NOFOLLOW)) {
13488                 log_stderr("failure: fchownat");
13489                 goto out;
13490         }
13491
13492         print_r(t_mnt_fd, T_DIR1);
13493
13494         /* u:1000:1001:1 */
13495         ret = add_map_entry(&idmap, user1_uid, user2_uid, 1, ID_TYPE_UID);
13496         if (ret) {
13497                 log_stderr("failure: add_map_entry");
13498                 goto out;
13499         }
13500
13501         /* u:1001:1000:1 */
13502         ret = add_map_entry(&idmap, user2_uid, user1_uid, 1, ID_TYPE_UID);
13503         if (ret) {
13504                 log_stderr("failure: add_map_entry");
13505                 goto out;
13506         }
13507
13508         /* g:1000:1001:1 */
13509         ret = add_map_entry(&idmap, user1_gid, user2_gid, 1, ID_TYPE_GID);
13510         if (ret) {
13511                 log_stderr("failure: add_map_entry");
13512                 goto out;
13513         }
13514
13515         /* g:1001:1000:1 */
13516         ret = add_map_entry(&idmap, user2_gid, user1_gid, 1, ID_TYPE_GID);
13517         if (ret) {
13518                 log_stderr("failure: add_map_entry");
13519                 goto out;
13520         }
13521
13522         attr.userns_fd = get_userns_fd_from_idmap(&idmap);
13523         if (attr.userns_fd < 0) {
13524                 log_stderr("failure: get_userns_fd");
13525                 goto out;
13526         }
13527
13528         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
13529                                      AT_NO_AUTOMOUNT |
13530                                      AT_SYMLINK_NOFOLLOW |
13531                                      OPEN_TREE_CLOEXEC |
13532                                      OPEN_TREE_CLONE |
13533                                      AT_RECURSIVE);
13534         if (open_tree_fd < 0) {
13535                 log_stderr("failure: sys_open_tree");
13536                 goto out;
13537         }
13538
13539         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13540                 log_stderr("failure: sys_mount_setattr");
13541                 goto out;
13542         }
13543
13544         print_r(open_tree_fd, "");
13545
13546         pid = fork();
13547         if (pid < 0) {
13548                 log_stderr("failure: fork");
13549                 goto out;
13550         }
13551         if (pid == 0) {
13552                 /* switch to {g,u}id 1001 */
13553                 if (!switch_resids(user2_uid, user2_gid))
13554                         die("failure: switch_resids");
13555
13556                 /* drop all capabilities */
13557                 if (!caps_down())
13558                         die("failure: caps_down");
13559
13560                 /*
13561                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13562                  * needs to fail with EINVAL.
13563                  */
13564                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13565                         die("failure: change ownership");
13566                 if (errno != EINVAL)
13567                         die("failure: errno");
13568
13569                 /*
13570                  * A user with fs{g,u}id 1001 must be allowed to change
13571                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13572                  * idmapped mount to {g,u}id 1001.
13573                  */
13574                 if (fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13575                              AT_SYMLINK_NOFOLLOW))
13576                         die("failure: change ownership");
13577
13578                 /* Verify that the ownership is still {g,u}id 1001. */
13579                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13580                                       user2_uid, user2_gid))
13581                         die("failure: check ownership");
13582
13583                 /*
13584                  * A user with fs{g,u}id 1001 must not be allowed to change
13585                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13586                  * idmapped mount to {g,u}id 1000.
13587                  */
13588                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13589                               AT_SYMLINK_NOFOLLOW))
13590                         die("failure: change ownership");
13591                 if (errno != EPERM)
13592                         die("failure: errno");
13593
13594                 /* Verify that the ownership is still {g,u}id 1001. */
13595                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13596                                       user2_uid, user2_gid))
13597                         die("failure: check ownership");
13598
13599                 /*
13600                  * A user with fs{g,u}id 1001 must not be allowed to change
13601                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13602                  * idmapped mount to {g,u}id 1000.
13603                  */
13604                 if (!fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13605                               AT_SYMLINK_NOFOLLOW))
13606                         die("failure: change ownership");
13607                 if (errno != EPERM)
13608                         die("failure: errno");
13609
13610                 /* Verify that the ownership is still {g,u}id 1000. */
13611                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13612                                       user1_uid, user1_gid))
13613                         die("failure: check ownership");
13614
13615                 /*
13616                  * A user with fs{g,u}id 1001 must not be allowed to change
13617                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13618                  * idmapped mount to {g,u}id 1001.
13619                  */
13620                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13621                               AT_SYMLINK_NOFOLLOW))
13622                         die("failure: change ownership");
13623                 if (errno != EPERM)
13624                         die("failure: errno");
13625
13626                 /* Verify that the ownership is still {g,u}id 1000. */
13627                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13628                                       user1_uid, user1_gid))
13629                         die("failure: check ownership");
13630
13631                 exit(EXIT_SUCCESS);
13632         }
13633         if (wait_for_pid(pid))
13634                 goto out;
13635
13636         pid = fork();
13637         if (pid < 0) {
13638                 log_stderr("failure: fork");
13639                 goto out;
13640         }
13641         if (pid == 0) {
13642                 /* switch to {g,u}id 1000 */
13643                 if (!switch_resids(user1_uid, user1_gid))
13644                         die("failure: switch_resids");
13645
13646                 /* drop all capabilities */
13647                 if (!caps_down())
13648                         die("failure: caps_down");
13649
13650                 /*
13651                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13652                  * needs to fail with EINVAL.
13653                  */
13654                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13655                         die("failure: change ownership");
13656                 if (errno != EINVAL)
13657                         die("failure: errno");
13658
13659                 /*
13660                  * A user with fs{g,u}id 1000 must be allowed to change
13661                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13662                  * idmapped mount to {g,u}id 1000.
13663                  */
13664                 if (fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13665                              AT_SYMLINK_NOFOLLOW))
13666                         die("failure: change ownership");
13667
13668                 /* Verify that the ownership is still {g,u}id 1000. */
13669                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13670                                       user1_uid, user1_gid))
13671                         die("failure: check ownership");
13672
13673                 /*
13674                  * A user with fs{g,u}id 1000 must not be allowed to change
13675                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13676                  * idmapped mount to {g,u}id 1001.
13677                  */
13678                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13679                               AT_SYMLINK_NOFOLLOW))
13680                         die("failure: change ownership");
13681                 if (errno != EPERM)
13682                         die("failure: errno");
13683
13684                 /* Verify that the ownership is still {g,u}id 1000. */
13685                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13686                                       user1_uid, user1_gid))
13687                         die("failure: check ownership");
13688
13689                 /*
13690                  * A user with fs{g,u}id 1000 must not be allowed to change
13691                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13692                  * idmapped mount to {g,u}id 1000.
13693                  */
13694                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13695                              AT_SYMLINK_NOFOLLOW))
13696                         die("failure: change ownership");
13697                 if (errno != EPERM)
13698                         die("failure: errno");
13699
13700                 /* Verify that the ownership is still {g,u}id 1001. */
13701                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13702                                       user2_uid, user2_gid))
13703                         die("failure: check ownership");
13704
13705                 /*
13706                  * A user with fs{g,u}id 1000 must not be allowed to change
13707                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13708                  * idmapped mount to {g,u}id 1001.
13709                  */
13710                 if (!fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13711                               AT_SYMLINK_NOFOLLOW))
13712                         die("failure: change ownership");
13713                 if (errno != EPERM)
13714                         die("failure: errno");
13715
13716                 /* Verify that the ownership is still {g,u}id 1001. */
13717                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13718                                       user2_uid, user2_gid))
13719                         die("failure: check ownership");
13720
13721                 exit(EXIT_SUCCESS);
13722         }
13723         if (wait_for_pid(pid))
13724                 goto out;
13725
13726         fret = 0;
13727         log_debug("Ran test");
13728 out:
13729         safe_close(attr.userns_fd);
13730         safe_close(open_tree_fd);
13731
13732         list_for_each_safe(it_cur, &idmap, it_next) {
13733                 list_del(it_cur);
13734                 free(it_cur->elem);
13735                 free(it_cur);
13736         }
13737
13738         return fret;
13739 }
13740
13741 static void usage(void)
13742 {
13743         fprintf(stderr, "Description:\n");
13744         fprintf(stderr, "    Run idmapped mount tests\n\n");
13745
13746         fprintf(stderr, "Arguments:\n");
13747         fprintf(stderr, "--device                            Device used in the tests\n");
13748         fprintf(stderr, "--fstype                            Filesystem type used in the tests\n");
13749         fprintf(stderr, "--help                              Print help\n");
13750         fprintf(stderr, "--mountpoint                        Mountpoint of device\n");
13751         fprintf(stderr, "--supported                         Test whether idmapped mounts are supported on this filesystem\n");
13752         fprintf(stderr, "--scratch-mountpoint                Mountpoint of scratch device used in the tests\n");
13753         fprintf(stderr, "--scratch-device                    Scratch device used in the tests\n");
13754         fprintf(stderr, "--test-core                         Run core idmapped mount testsuite\n");
13755         fprintf(stderr, "--test-fscaps-regression            Run fscap regression tests\n");
13756         fprintf(stderr, "--test-nested-userns                Run nested userns idmapped mount testsuite\n");
13757         fprintf(stderr, "--test-btrfs                        Run btrfs specific idmapped mount testsuite\n");
13758         fprintf(stderr, "--test-setattr-fix-968219708108     Run setattr regression tests\n");
13759
13760         _exit(EXIT_SUCCESS);
13761 }
13762
13763 static const struct option longopts[] = {
13764         {"device",                              required_argument,      0,      'd'},
13765         {"fstype",                              required_argument,      0,      'f'},
13766         {"mountpoint",                          required_argument,      0,      'm'},
13767         {"scratch-mountpoint",                  required_argument,      0,      'a'},
13768         {"scratch-device",                      required_argument,      0,      'e'},
13769         {"supported",                           no_argument,            0,      's'},
13770         {"help",                                no_argument,            0,      'h'},
13771         {"test-core",                           no_argument,            0,      'c'},
13772         {"test-fscaps-regression",              no_argument,            0,      'g'},
13773         {"test-nested-userns",                  no_argument,            0,      'n'},
13774         {"test-btrfs",                          no_argument,            0,      'b'},
13775         {"test-setattr-fix-968219708108",       no_argument,            0,      'i'},
13776         {NULL,                                  0,                      0,        0},
13777 };
13778
13779 struct t_idmapped_mounts {
13780         int (*test)(void);
13781         const char *description;
13782 } basic_suite[] = {
13783         { acls,                                                         "posix acls on regular mounts",                                                                 },
13784         { create_in_userns,                                             "create operations in user namespace",                                                          },
13785         { device_node_in_userns,                                        "device node in user namespace",                                                                },
13786         { expected_uid_gid_idmapped_mounts,                             "expected ownership on idmapped mounts",                                                        },
13787         { fscaps,                                                       "fscaps on regular mounts",                                                                     },
13788         { fscaps_idmapped_mounts,                                       "fscaps on idmapped mounts",                                                                    },
13789         { fscaps_idmapped_mounts_in_userns,                             "fscaps on idmapped mounts in user namespace",                                                  },
13790         { fscaps_idmapped_mounts_in_userns_separate_userns,             "fscaps on idmapped mounts in user namespace with different id mappings",                       },
13791         { fsids_mapped,                                                 "mapped fsids",                                                                                 },
13792         { fsids_unmapped,                                               "unmapped fsids",                                                                               },
13793         { hardlink_crossing_mounts,                                     "cross mount hardlink",                                                                         },
13794         { hardlink_crossing_idmapped_mounts,                            "cross idmapped mount hardlink",                                                                },
13795         { hardlink_from_idmapped_mount,                                 "hardlinks from idmapped mounts",                                                               },
13796         { hardlink_from_idmapped_mount_in_userns,                       "hardlinks from idmapped mounts in user namespace",                                             },
13797 #ifdef HAVE_LIBURING_H
13798         { io_uring,                                                     "io_uring",                                                                                     },
13799         { io_uring_userns,                                              "io_uring in user namespace",                                                                   },
13800         { io_uring_idmapped,                                            "io_uring from idmapped mounts",                                                                },
13801         { io_uring_idmapped_userns,                                     "io_uring from idmapped mounts in user namespace",                                              },
13802         { io_uring_idmapped_unmapped,                                   "io_uring from idmapped mounts with unmapped ids",                                              },
13803         { io_uring_idmapped_unmapped_userns,                            "io_uring from idmapped mounts with unmapped ids in user namespace",                            },
13804 #endif
13805         { protected_symlinks,                                           "following protected symlinks on regular mounts",                                               },
13806         { protected_symlinks_idmapped_mounts,                           "following protected symlinks on idmapped mounts",                                              },
13807         { protected_symlinks_idmapped_mounts_in_userns,                 "following protected symlinks on idmapped mounts in user namespace",                            },
13808         { rename_crossing_mounts,                                       "cross mount rename",                                                                           },
13809         { rename_crossing_idmapped_mounts,                              "cross idmapped mount rename",                                                                  },
13810         { rename_from_idmapped_mount,                                   "rename from idmapped mounts",                                                                  },
13811         { rename_from_idmapped_mount_in_userns,                         "rename from idmapped mounts in user namespace",                                                },
13812         { setattr_truncate,                                             "setattr truncate",                                                                             },
13813         { setattr_truncate_idmapped,                                    "setattr truncate on idmapped mounts",                                                          },
13814         { setattr_truncate_idmapped_in_userns,                          "setattr truncate on idmapped mounts in user namespace",                                        },
13815         { setgid_create,                                                "create operations in directories with setgid bit set",                                         },
13816         { setgid_create_idmapped,                                       "create operations in directories with setgid bit set on idmapped mounts",                      },
13817         { setgid_create_idmapped_in_userns,                             "create operations in directories with setgid bit set on idmapped mounts in user namespace",    },
13818         { setid_binaries,                                               "setid binaries on regular mounts",                                                             },
13819         { setid_binaries_idmapped_mounts,                               "setid binaries on idmapped mounts",                                                            },
13820         { setid_binaries_idmapped_mounts_in_userns,                     "setid binaries on idmapped mounts in user namespace",                                          },
13821         { setid_binaries_idmapped_mounts_in_userns_separate_userns,     "setid binaries on idmapped mounts in user namespace with different id mappings",               },
13822         { sticky_bit_unlink,                                            "sticky bit unlink operations on regular mounts",                                               },
13823         { sticky_bit_unlink_idmapped_mounts,                            "sticky bit unlink operations on idmapped mounts",                                              },
13824         { sticky_bit_unlink_idmapped_mounts_in_userns,                  "sticky bit unlink operations on idmapped mounts in user namespace",                            },
13825         { sticky_bit_rename,                                            "sticky bit rename operations on regular mounts",                                               },
13826         { sticky_bit_rename_idmapped_mounts,                            "sticky bit rename operations on idmapped mounts",                                              },
13827         { sticky_bit_rename_idmapped_mounts_in_userns,                  "sticky bit rename operations on idmapped mounts in user namespace",                            },
13828         { symlink_regular_mounts,                                       "symlink from regular mounts",                                                                  },
13829         { symlink_idmapped_mounts,                                      "symlink from idmapped mounts",                                                                 },
13830         { symlink_idmapped_mounts_in_userns,                            "symlink from idmapped mounts in user namespace",                                               },
13831         { threaded_idmapped_mount_interactions,                         "threaded operations on idmapped mounts",                                                       },
13832 };
13833
13834 struct t_idmapped_mounts fscaps_in_ancestor_userns[] = {
13835         { fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns,    "fscaps on idmapped mounts in user namespace writing fscap valid in ancestor userns",           },
13836 };
13837
13838 struct t_idmapped_mounts t_nested_userns[] = {
13839         { nested_userns,                                                "test that nested user namespaces behave correctly when attached to idmapped mounts",           },
13840 };
13841
13842 struct t_idmapped_mounts t_btrfs[] = {
13843         { btrfs_subvolumes_fsids_mapped,                                "test subvolumes with mapped fsids",                                                            },
13844         { btrfs_subvolumes_fsids_mapped_userns,                         "test subvolumes with mapped fsids inside user namespace",                                      },
13845         { btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed,         "test subvolume deletion with user_subvol_rm_allowed mount option",                             },
13846         { btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed,  "test subvolume deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13847         { btrfs_subvolumes_fsids_unmapped,                              "test subvolumes with unmapped fsids",                                                          },
13848         { btrfs_subvolumes_fsids_unmapped_userns,                       "test subvolumes with unmapped fsids inside user namespace",                                    },
13849         { btrfs_snapshots_fsids_mapped,                                 "test snapshots with mapped fsids",                                                             },
13850         { btrfs_snapshots_fsids_mapped_userns,                          "test snapshots with mapped fsids inside user namespace",                                       },
13851         { btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed,          "test snapshots deletion with user_subvol_rm_allowed mount option",                             },
13852         { btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed,   "test snapshots deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13853         { btrfs_snapshots_fsids_unmapped,                               "test snapshots with unmapped fsids",                                                           },
13854         { btrfs_snapshots_fsids_unmapped_userns,                        "test snapshots with unmapped fsids inside user namespace",                                     },
13855         { btrfs_delete_by_spec_id,                                      "test subvolume deletion by spec id",                                                           },
13856         { btrfs_subvolumes_setflags_fsids_mapped,                       "test subvolume flags with mapped fsids",                                                       },
13857         { btrfs_subvolumes_setflags_fsids_mapped_userns,                "test subvolume flags with mapped fsids inside user namespace",                                 },
13858         { btrfs_subvolumes_setflags_fsids_unmapped,                     "test subvolume flags with unmapped fsids",                                                     },
13859         { btrfs_subvolumes_setflags_fsids_unmapped_userns,              "test subvolume flags with unmapped fsids inside user namespace",                               },
13860         { btrfs_snapshots_setflags_fsids_mapped,                        "test snapshots flags with mapped fsids",                                                       },
13861         { btrfs_snapshots_setflags_fsids_mapped_userns,                 "test snapshots flags with mapped fsids inside user namespace",                                 },
13862         { btrfs_snapshots_setflags_fsids_unmapped,                      "test snapshots flags with unmapped fsids",                                                     },
13863         { btrfs_snapshots_setflags_fsids_unmapped_userns,               "test snapshots flags with unmapped fsids inside user namespace",                               },
13864         { btrfs_subvolume_lookup_user,                                  "test unprivileged subvolume lookup",                                                           },
13865 };
13866
13867 /* Test for commit 968219708108 ("fs: handle circular mappings correctly"). */
13868 struct t_idmapped_mounts t_setattr_fix_968219708108[] = {
13869         { setattr_fix_968219708108,                                     "test that setattr works correctly",                                                            },
13870 };
13871
13872 static bool run_test(struct t_idmapped_mounts suite[], size_t suite_size)
13873 {
13874         int i;
13875
13876         for (i = 0; i < suite_size; i++) {
13877                 struct t_idmapped_mounts *t = &suite[i];
13878                 int ret;
13879                 pid_t pid;
13880
13881                 test_setup();
13882
13883                 pid = fork();
13884                 if (pid < 0)
13885                         return false;
13886
13887                 if (pid == 0) {
13888                         ret = t->test();
13889                         if (ret)
13890                                 die("failure: %s", t->description);
13891
13892                         exit(EXIT_SUCCESS);
13893                 }
13894
13895                 ret = wait_for_pid(pid);
13896                 test_cleanup();
13897
13898                 if (ret)
13899                         return false;
13900         }
13901
13902         return true;
13903 }
13904
13905 int main(int argc, char *argv[])
13906 {
13907         int fret, ret;
13908         int index = 0;
13909         bool supported = false, test_btrfs = false, test_core = false,
13910              test_fscaps_regression = false, test_nested_userns = false,
13911              test_setattr_fix_968219708108 = false;
13912
13913         while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
13914                 switch (ret) {
13915                 case 'd':
13916                         t_device = optarg;
13917                         break;
13918                 case 'f':
13919                         t_fstype = optarg;
13920                         break;
13921                 case 'm':
13922                         t_mountpoint = optarg;
13923                         break;
13924                 case 's':
13925                         supported = true;
13926                         break;
13927                 case 'c':
13928                         test_core = true;
13929                         break;
13930                 case 'g':
13931                         test_fscaps_regression = true;
13932                         break;
13933                 case 'n':
13934                         test_nested_userns = true;
13935                         break;
13936                 case 'b':
13937                         test_btrfs = true;
13938                         break;
13939                 case 'a':
13940                         t_mountpoint_scratch = optarg;
13941                         break;
13942                 case 'e':
13943                         t_device_scratch = optarg;
13944                         break;
13945                 case 'i':
13946                         test_setattr_fix_968219708108 = true;
13947                         break;
13948                 case 'h':
13949                         /* fallthrough */
13950                 default:
13951                         usage();
13952                 }
13953         }
13954
13955         if (!t_device)
13956                 die_errno(EINVAL, "test device missing");
13957
13958         if (!t_fstype)
13959                 die_errno(EINVAL, "test filesystem type missing");
13960
13961         if (!t_mountpoint)
13962                 die_errno(EINVAL, "mountpoint of test device missing");
13963
13964         /* create separate mount namespace */
13965         if (unshare(CLONE_NEWNS))
13966                 die("failure: create new mount namespace");
13967
13968         /* turn off mount propagation */
13969         if (sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
13970                 die("failure: turn mount propagation off");
13971
13972         t_mnt_fd = openat(-EBADF, t_mountpoint, O_CLOEXEC | O_DIRECTORY);
13973         if (t_mnt_fd < 0)
13974                 die("failed to open %s", t_mountpoint);
13975
13976         t_mnt_scratch_fd = openat(-EBADF, t_mountpoint_scratch, O_CLOEXEC | O_DIRECTORY);
13977         if (t_mnt_fd < 0)
13978                 die("failed to open %s", t_mountpoint_scratch);
13979
13980         /*
13981          * Caller just wants to know whether the filesystem we're on supports
13982          * idmapped mounts.
13983          */
13984         if (supported) {
13985                 int open_tree_fd = -EBADF;
13986                 struct mount_attr attr = {
13987                         .attr_set       = MOUNT_ATTR_IDMAP,
13988                         .userns_fd      = -EBADF,
13989                 };
13990
13991                 /* Changing mount properties on a detached mount. */
13992                 attr.userns_fd  = get_userns_fd(0, 1000, 1);
13993                 if (attr.userns_fd < 0)
13994                         exit(EXIT_FAILURE);
13995
13996                 open_tree_fd = sys_open_tree(t_mnt_fd, "",
13997                                              AT_EMPTY_PATH |
13998                                              AT_NO_AUTOMOUNT |
13999                                              AT_SYMLINK_NOFOLLOW |
14000                                              OPEN_TREE_CLOEXEC |
14001                                              OPEN_TREE_CLONE);
14002                 if (open_tree_fd < 0)
14003                         ret = -1;
14004                 else
14005                         ret = sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr));
14006
14007                 close(open_tree_fd);
14008                 close(attr.userns_fd);
14009
14010                 if (ret)
14011                         exit(EXIT_FAILURE);
14012
14013                 exit(EXIT_SUCCESS);
14014         }
14015
14016         stash_overflowuid();
14017         stash_overflowgid();
14018
14019         fret = EXIT_FAILURE;
14020
14021         if (test_core && !run_test(basic_suite, ARRAY_SIZE(basic_suite)))
14022                 goto out;
14023
14024         if (test_fscaps_regression &&
14025             !run_test(fscaps_in_ancestor_userns,
14026                       ARRAY_SIZE(fscaps_in_ancestor_userns)))
14027                 goto out;
14028
14029         if (test_nested_userns &&
14030             !run_test(t_nested_userns, ARRAY_SIZE(t_nested_userns)))
14031                 goto out;
14032
14033         if (test_btrfs && !run_test(t_btrfs, ARRAY_SIZE(t_btrfs)))
14034                 goto out;
14035
14036         if (test_setattr_fix_968219708108 &&
14037             !run_test(t_setattr_fix_968219708108,
14038                       ARRAY_SIZE(t_setattr_fix_968219708108)))
14039                 goto out;
14040
14041         fret = EXIT_SUCCESS;
14042
14043 out:
14044         exit(fret);
14045 }