idmapped-mounts: add missing ownership comparisons to setgid tests
[xfstests-dev.git] / src / idmapped-mounts / idmapped-mounts.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5
6 #include "../global.h"
7
8 #include <dirent.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <grp.h>
13 #include <limits.h>
14 #include <linux/limits.h>
15 #include <linux/types.h>
16 #include <pthread.h>
17 #include <pwd.h>
18 #include <sched.h>
19 #include <stdbool.h>
20 #include <sys/fsuid.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/xattr.h>
24 #include <unistd.h>
25
26 #ifdef HAVE_LINUX_BTRFS_H
27 # ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID
28 #  define btrfs_ioctl_vol_args_v2 override_btrfs_ioctl_vol_args_v2
29 # endif
30 #include <linux/btrfs.h>
31 # undef btrfs_ioctl_vol_args_v2
32 #endif
33
34 #ifdef HAVE_LINUX_BTRFS_TREE_H
35 #include <linux/btrfs_tree.h>
36 #endif
37
38 #ifdef HAVE_SYS_CAPABILITY_H
39 #include <sys/capability.h>
40 #endif
41
42 #ifdef HAVE_LIBURING_H
43 #include <liburing.h>
44 #endif
45
46 #include "missing.h"
47 #include "utils.h"
48
49 #define T_DIR1 "idmapped_mounts_1"
50 #define FILE1 "file1"
51 #define FILE1_RENAME "file1_rename"
52 #define FILE2 "file2"
53 #define FILE2_RENAME "file2_rename"
54 #define DIR1 "dir1"
55 #define DIR2 "dir2"
56 #define DIR3 "dir3"
57 #define DIR1_RENAME "dir1_rename"
58 #define HARDLINK1 "hardlink1"
59 #define SYMLINK1 "symlink1"
60 #define SYMLINK_USER1 "symlink_user1"
61 #define SYMLINK_USER2 "symlink_user2"
62 #define SYMLINK_USER3 "symlink_user3"
63 #define CHRDEV1 "chrdev1"
64
65 #define log_stderr(format, ...)                                                         \
66         fprintf(stderr, "%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, __func__, \
67                 ##__VA_ARGS__)
68
69 #ifdef DEBUG_TRACE
70 #define log_debug(format, ...)                                           \
71         fprintf(stderr, "%s: %d: %s - " format "\n", __FILE__, __LINE__, \
72                 __func__, ##__VA_ARGS__)
73 #else
74 #define log_debug(format, ...)
75 #endif
76
77 #define log_error_errno(__ret__, __errno__, format, ...)      \
78         ({                                                    \
79                 typeof(__ret__) __internal_ret__ = (__ret__); \
80                 errno = (__errno__);                          \
81                 log_stderr(format, ##__VA_ARGS__);            \
82                 __internal_ret__;                             \
83         })
84
85 #define log_errno(__ret__, format, ...) log_error_errno(__ret__, errno, format, ##__VA_ARGS__)
86
87 #define die_errno(__errno__, format, ...)          \
88         ({                                         \
89                 errno = (__errno__);               \
90                 log_stderr(format, ##__VA_ARGS__); \
91                 exit(EXIT_FAILURE);                \
92         })
93
94 #define die(format, ...) die_errno(errno, format, ##__VA_ARGS__)
95
96 #define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
97
98 uid_t t_overflowuid = 65534;
99 gid_t t_overflowgid = 65534;
100
101 /* path of the test device */
102 const char *t_fstype;
103
104 /* path of the test device */
105 const char *t_device;
106
107 /* path of the test scratch device */
108 const char *t_device_scratch;
109
110 /* mountpoint of the test device */
111 const char *t_mountpoint;
112
113 /* mountpoint of the test device */
114 const char *t_mountpoint_scratch;
115
116 /* fd for @t_mountpoint */
117 int t_mnt_fd;
118
119 /* fd for @t_mountpoint_scratch */
120 int t_mnt_scratch_fd;
121
122 /* fd for @T_DIR1 */
123 int t_dir1_fd;
124
125 /* temporary buffer */
126 char t_buf[PATH_MAX];
127
128 static void stash_overflowuid(void)
129 {
130         int fd;
131         ssize_t ret;
132         char buf[256];
133
134         fd = open("/proc/sys/fs/overflowuid", O_RDONLY | O_CLOEXEC);
135         if (fd < 0)
136                 return;
137
138         ret = read(fd, buf, sizeof(buf));
139         close(fd);
140         if (ret < 0)
141                 return;
142
143         t_overflowuid = atoi(buf);
144 }
145
146 static void stash_overflowgid(void)
147 {
148         int fd;
149         ssize_t ret;
150         char buf[256];
151
152         fd = open("/proc/sys/fs/overflowgid", O_RDONLY | O_CLOEXEC);
153         if (fd < 0)
154                 return;
155
156         ret = read(fd, buf, sizeof(buf));
157         close(fd);
158         if (ret < 0)
159                 return;
160
161         t_overflowgid = atoi(buf);
162 }
163
164 static bool is_xfs(void)
165 {
166         static int enabled = -1;
167
168         if (enabled == -1)
169                 enabled = !strcmp(t_fstype, "xfs");
170
171         return enabled;
172 }
173
174 static bool protected_symlinks_enabled(void)
175 {
176         static int enabled = -1;
177
178         if (enabled == -1) {
179                 int fd;
180                 ssize_t ret;
181                 char buf[256];
182
183                 enabled = 0;
184
185                 fd = open("/proc/sys/fs/protected_symlinks", O_RDONLY | O_CLOEXEC);
186                 if (fd < 0)
187                         return false;
188
189                 ret = read(fd, buf, sizeof(buf));
190                 close(fd);
191                 if (ret < 0)
192                         return false;
193
194                 if (atoi(buf) >= 1)
195                         enabled = 1;
196         }
197
198         return enabled == 1;
199 }
200
201 static bool xfs_irix_sgid_inherit_enabled(void)
202 {
203         static int enabled = -1;
204
205         if (enabled == -1) {
206                 int fd;
207                 ssize_t ret;
208                 char buf[256];
209
210                 enabled = 0;
211
212                 if (is_xfs()) {
213                         fd = open("/proc/sys/fs/xfs/irix_sgid_inherit", O_RDONLY | O_CLOEXEC);
214                         if (fd < 0)
215                                 return false;
216
217                         ret = read(fd, buf, sizeof(buf));
218                         close(fd);
219                         if (ret < 0)
220                                 return false;
221
222                         if (atoi(buf) >= 1)
223                                 enabled = 1;
224                 }
225         }
226
227         return enabled == 1;
228 }
229
230 static inline bool caps_supported(void)
231 {
232         bool ret = false;
233
234 #ifdef HAVE_SYS_CAPABILITY_H
235         ret = true;
236 #endif
237
238         return ret;
239 }
240
241 /* caps_down - lower all effective caps */
242 static int caps_down(void)
243 {
244         bool fret = false;
245 #ifdef HAVE_SYS_CAPABILITY_H
246         cap_t caps = NULL;
247         int ret = -1;
248
249         caps = cap_get_proc();
250         if (!caps)
251                 goto out;
252
253         ret = cap_clear_flag(caps, CAP_EFFECTIVE);
254         if (ret)
255                 goto out;
256
257         ret = cap_set_proc(caps);
258         if (ret)
259                 goto out;
260
261         fret = true;
262
263 out:
264         cap_free(caps);
265 #endif
266         return fret;
267 }
268
269 /* caps_up - raise all permitted caps */
270 static int caps_up(void)
271 {
272         bool fret = false;
273 #ifdef HAVE_SYS_CAPABILITY_H
274         cap_t caps = NULL;
275         cap_value_t cap;
276         int ret = -1;
277
278         caps = cap_get_proc();
279         if (!caps)
280                 goto out;
281
282         for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
283                 cap_flag_value_t flag;
284
285                 ret = cap_get_flag(caps, cap, CAP_PERMITTED, &flag);
286                 if (ret) {
287                         if (errno == EINVAL)
288                                 break;
289                         else
290                                 goto out;
291                 }
292
293                 ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, flag);
294                 if (ret)
295                         goto out;
296         }
297
298         ret = cap_set_proc(caps);
299         if (ret)
300                 goto out;
301
302         fret = true;
303 out:
304         cap_free(caps);
305 #endif
306         return fret;
307 }
308
309 /* __expected_uid_gid - check whether file is owned by the provided uid and gid */
310 static bool __expected_uid_gid(int dfd, const char *path, int flags,
311                                uid_t expected_uid, gid_t expected_gid, bool log)
312 {
313         int ret;
314         struct stat st;
315
316         ret = fstatat(dfd, path, &st, flags);
317         if (ret < 0)
318                 return log_errno(false, "failure: fstatat");
319
320         if (log && st.st_uid != expected_uid)
321                 log_stderr("failure: uid(%d) != expected_uid(%d)", st.st_uid, expected_uid);
322
323         if (log && st.st_gid != expected_gid)
324                 log_stderr("failure: gid(%d) != expected_gid(%d)", st.st_gid, expected_gid);
325
326         errno = 0; /* Don't report misleading errno. */
327         return st.st_uid == expected_uid && st.st_gid == expected_gid;
328 }
329
330 static bool expected_uid_gid(int dfd, const char *path, int flags,
331                              uid_t expected_uid, gid_t expected_gid)
332 {
333         return __expected_uid_gid(dfd, path, flags,
334                                   expected_uid, expected_gid, true);
335 }
336
337 static bool expected_file_size(int dfd, const char *path,
338                                int flags, off_t expected_size)
339 {
340         int ret;
341         struct stat st;
342
343         ret = fstatat(dfd, path, &st, flags);
344         if (ret < 0)
345                 return log_errno(false, "failure: fstatat");
346
347         if (st.st_size != expected_size)
348                 return log_errno(false, "failure: st_size(%zu) != expected_size(%zu)",
349                                  (size_t)st.st_size, (size_t)expected_size);
350
351         return true;
352 }
353
354 /* is_setid - check whether file is S_ISUID and S_ISGID */
355 static bool is_setid(int dfd, const char *path, int flags)
356 {
357         int ret;
358         struct stat st;
359
360         ret = fstatat(dfd, path, &st, flags);
361         if (ret < 0)
362                 return false;
363
364         errno = 0; /* Don't report misleading errno. */
365         return (st.st_mode & S_ISUID) || (st.st_mode & S_ISGID);
366 }
367
368 /* is_setgid - check whether file or directory is S_ISGID */
369 static bool is_setgid(int dfd, const char *path, int flags)
370 {
371         int ret;
372         struct stat st;
373
374         ret = fstatat(dfd, path, &st, flags);
375         if (ret < 0)
376                 return false;
377
378         errno = 0; /* Don't report misleading errno. */
379         return (st.st_mode & S_ISGID);
380 }
381
382 /* is_sticky - check whether file is S_ISVTX */
383 static bool is_sticky(int dfd, const char *path, int flags)
384 {
385         int ret;
386         struct stat st;
387
388         ret = fstatat(dfd, path, &st, flags);
389         if (ret < 0)
390                 return false;
391
392         errno = 0; /* Don't report misleading errno. */
393         return (st.st_mode & S_ISVTX) > 0;
394 }
395
396 static inline bool switch_fsids(uid_t fsuid, gid_t fsgid)
397 {
398         if (setfsgid(fsgid))
399                 return log_errno(false, "failure: setfsgid");
400
401         if (setfsgid(-1) != fsgid)
402                 return log_errno(false, "failure: setfsgid(-1)");
403
404         if (setfsuid(fsuid))
405                 return log_errno(false, "failure: setfsuid");
406
407         if (setfsuid(-1) != fsuid)
408                 return log_errno(false, "failure: setfsuid(-1)");
409
410         return true;
411 }
412
413 static inline bool switch_resids(uid_t uid, gid_t gid)
414 {
415         if (setresgid(gid, gid, gid))
416                 return log_errno(false, "failure: setregid");
417
418         if (setresuid(uid, uid, uid))
419                 return log_errno(false, "failure: setresuid");
420
421         if (setfsgid(-1) != gid)
422                 return log_errno(false, "failure: setfsgid(-1)");
423
424         if (setfsuid(-1) != uid)
425                 return log_errno(false, "failure: setfsuid(-1)");
426
427         return true;
428 }
429
430 static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
431 {
432         if (setns(fd, CLONE_NEWUSER))
433                 return log_errno(false, "failure: setns");
434
435         if (!switch_ids(uid, gid))
436                 return log_errno(false, "failure: switch_ids");
437
438         if (drop_caps && !caps_down())
439                 return log_errno(false, "failure: caps_down");
440
441         return true;
442 }
443
444 /* rm_r - recursively remove all files */
445 static int rm_r(int fd, const char *path)
446 {
447         int dfd, ret;
448         DIR *dir;
449         struct dirent *direntp;
450
451         if (!path || strcmp(path, "") == 0)
452                 return -1;
453
454         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
455         if (dfd < 0)
456                 return -1;
457
458         dir = fdopendir(dfd);
459         if (!dir) {
460                 close(dfd);
461                 return -1;
462         }
463
464         while ((direntp = readdir(dir))) {
465                 struct stat st;
466
467                 if (!strcmp(direntp->d_name, ".") ||
468                     !strcmp(direntp->d_name, ".."))
469                         continue;
470
471                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
472                 if (ret < 0 && errno != ENOENT)
473                         break;
474
475                 if (S_ISDIR(st.st_mode))
476                         ret = rm_r(dfd, direntp->d_name);
477                 else
478                         ret = unlinkat(dfd, direntp->d_name, 0);
479                 if (ret < 0 && errno != ENOENT)
480                         break;
481         }
482
483         ret = unlinkat(fd, path, AT_REMOVEDIR);
484         closedir(dir);
485         return ret;
486 }
487
488 /* chown_r - recursively change ownership of all files */
489 static int chown_r(int fd, const char *path, uid_t uid, gid_t gid)
490 {
491         int dfd, ret;
492         DIR *dir;
493         struct dirent *direntp;
494
495         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
496         if (dfd < 0)
497                 return -1;
498
499         dir = fdopendir(dfd);
500         if (!dir) {
501                 close(dfd);
502                 return -1;
503         }
504
505         while ((direntp = readdir(dir))) {
506                 struct stat st;
507
508                 if (!strcmp(direntp->d_name, ".") ||
509                     !strcmp(direntp->d_name, ".."))
510                         continue;
511
512                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
513                 if (ret < 0 && errno != ENOENT)
514                         break;
515
516                 if (S_ISDIR(st.st_mode))
517                         ret = chown_r(dfd, direntp->d_name, uid, gid);
518                 else
519                         ret = fchownat(dfd, direntp->d_name, uid, gid, AT_SYMLINK_NOFOLLOW);
520                 if (ret < 0 && errno != ENOENT)
521                         break;
522         }
523
524         ret = fchownat(fd, path, uid, gid, AT_SYMLINK_NOFOLLOW);
525         closedir(dir);
526         return ret;
527 }
528
529 /*
530  * There'll be scenarios where you'll want to see the attributes associated with
531  * a directory tree during debugging or just to make sure things look correct.
532  * Simply uncomment and place the print_r() helper where you need it.
533  */
534 #ifdef DEBUG_TRACE
535 static int fd_cloexec(int fd, bool cloexec)
536 {
537         int oflags, nflags;
538
539         oflags = fcntl(fd, F_GETFD, 0);
540         if (oflags < 0)
541                 return -errno;
542
543         if (cloexec)
544                 nflags = oflags | FD_CLOEXEC;
545         else
546                 nflags = oflags & ~FD_CLOEXEC;
547
548         if (nflags == oflags)
549                 return 0;
550
551         if (fcntl(fd, F_SETFD, nflags) < 0)
552                 return -errno;
553
554         return 0;
555 }
556
557 static inline int dup_cloexec(int fd)
558 {
559         int fd_dup;
560
561         fd_dup = dup(fd);
562         if (fd_dup < 0)
563                 return -errno;
564
565         if (fd_cloexec(fd_dup, true)) {
566                 close(fd_dup);
567                 return -errno;
568         }
569
570         return fd_dup;
571 }
572
573 __attribute__((unused)) static int print_r(int fd, const char *path)
574 {
575         int ret = 0;
576         int dfd, dfd_dup;
577         DIR *dir;
578         struct dirent *direntp;
579         struct stat st;
580
581         if (!path || *path == '\0') {
582                 char buf[sizeof("/proc/self/fd/") + 30];
583
584                 ret = snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd);
585                 if (ret < 0 || (size_t)ret >= sizeof(buf))
586                         return -1;
587
588                 /*
589                  * O_PATH file descriptors can't be used so we need to re-open
590                  * just in case.
591                  */
592                 dfd = openat(-EBADF, buf, O_CLOEXEC | O_DIRECTORY, 0);
593         } else {
594                 dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY, 0);
595         }
596         if (dfd < 0)
597                 return -1;
598
599         /*
600          * When fdopendir() below succeeds it assumes ownership of the fd so we
601          * to make sure we always have an fd that fdopendir() can own which is
602          * why we dup() in the case where the caller wants us to operate on the
603          * fd directly.
604          */
605         dfd_dup = dup_cloexec(dfd);
606         if (dfd_dup < 0) {
607                 close(dfd);
608                 return -1;
609         }
610
611         dir = fdopendir(dfd);
612         if (!dir) {
613                 close(dfd);
614                 close(dfd_dup);
615                 return -1;
616         }
617         /* Transfer ownership to fdopendir(). */
618         dfd = -EBADF;
619
620         while ((direntp = readdir(dir))) {
621                 if (!strcmp(direntp->d_name, ".") ||
622                     !strcmp(direntp->d_name, ".."))
623                         continue;
624
625                 ret = fstatat(dfd_dup, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
626                 if (ret < 0 && errno != ENOENT)
627                         break;
628
629                 ret = 0;
630                 if (S_ISDIR(st.st_mode))
631                         ret = print_r(dfd_dup, direntp->d_name);
632                 else
633                         fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %d/%s\n",
634                                 (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
635                                 dfd_dup, direntp->d_name);
636                 if (ret < 0 && errno != ENOENT)
637                         break;
638         }
639
640         if (!path || *path == '\0')
641                 ret = fstatat(fd, "", &st,
642                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
643                               AT_EMPTY_PATH);
644         else
645                 ret = fstatat(fd, path, &st,
646                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW);
647         if (!ret)
648                 fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %s\n",
649                         (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
650                         (path && *path) ? path : "(null)");
651
652         close(dfd_dup);
653         closedir(dir);
654
655         return ret;
656 }
657 #else
658 __attribute__((unused)) static int print_r(int fd, const char *path)
659 {
660         return 0;
661 }
662 #endif
663
664 /* fd_to_fd - transfer data from one fd to another */
665 static int fd_to_fd(int from, int to)
666 {
667         for (;;) {
668                 uint8_t buf[PATH_MAX];
669                 uint8_t *p = buf;
670                 ssize_t bytes_to_write;
671                 ssize_t bytes_read;
672
673                 bytes_read = read_nointr(from, buf, sizeof buf);
674                 if (bytes_read < 0)
675                         return -1;
676                 if (bytes_read == 0)
677                         break;
678
679                 bytes_to_write = (size_t)bytes_read;
680                 do {
681                         ssize_t bytes_written;
682
683                         bytes_written = write_nointr(to, p, bytes_to_write);
684                         if (bytes_written < 0)
685                                 return -1;
686
687                         bytes_to_write -= bytes_written;
688                         p += bytes_written;
689                 } while (bytes_to_write > 0);
690         }
691
692         return 0;
693 }
694
695 static int sys_execveat(int fd, const char *path, char **argv, char **envp,
696                         int flags)
697 {
698 #ifdef __NR_execveat
699         return syscall(__NR_execveat, fd, path, argv, envp, flags);
700 #else
701         errno = ENOSYS;
702         return -1;
703 #endif
704 }
705
706 #ifndef CAP_NET_RAW
707 #define CAP_NET_RAW 13
708 #endif
709
710 #ifndef VFS_CAP_FLAGS_EFFECTIVE
711 #define VFS_CAP_FLAGS_EFFECTIVE 0x000001
712 #endif
713
714 #ifndef VFS_CAP_U32_3
715 #define VFS_CAP_U32_3 2
716 #endif
717
718 #ifndef VFS_CAP_U32
719 #define VFS_CAP_U32 VFS_CAP_U32_3
720 #endif
721
722 #ifndef VFS_CAP_REVISION_1
723 #define VFS_CAP_REVISION_1 0x01000000
724 #endif
725
726 #ifndef VFS_CAP_REVISION_2
727 #define VFS_CAP_REVISION_2 0x02000000
728 #endif
729
730 #ifndef VFS_CAP_REVISION_3
731 #define VFS_CAP_REVISION_3 0x03000000
732 struct vfs_ns_cap_data {
733         __le32 magic_etc;
734         struct {
735                 __le32 permitted;
736                 __le32 inheritable;
737         } data[VFS_CAP_U32];
738         __le32 rootid;
739 };
740 #endif
741
742 #if __BYTE_ORDER == __BIG_ENDIAN
743 #define cpu_to_le16(w16) le16_to_cpu(w16)
744 #define le16_to_cpu(w16) ((u_int16_t)((u_int16_t)(w16) >> 8) | (u_int16_t)((u_int16_t)(w16) << 8))
745 #define cpu_to_le32(w32) le32_to_cpu(w32)
746 #define le32_to_cpu(w32)                                                                       \
747         ((u_int32_t)((u_int32_t)(w32) >> 24) | (u_int32_t)(((u_int32_t)(w32) >> 8) & 0xFF00) | \
748          (u_int32_t)(((u_int32_t)(w32) << 8) & 0xFF0000) | (u_int32_t)((u_int32_t)(w32) << 24))
749 #elif __BYTE_ORDER == __LITTLE_ENDIAN
750 #define cpu_to_le16(w16) ((u_int16_t)(w16))
751 #define le16_to_cpu(w16) ((u_int16_t)(w16))
752 #define cpu_to_le32(w32) ((u_int32_t)(w32))
753 #define le32_to_cpu(w32) ((u_int32_t)(w32))
754 #else
755 #error Expected endianess macro to be set
756 #endif
757
758 /* expected_dummy_vfs_caps_uid - check vfs caps are stored with the provided uid */
759 static bool expected_dummy_vfs_caps_uid(int fd, uid_t expected_uid)
760 {
761 #define __cap_raised_permitted(x, ns_cap_data)                                 \
762         ((ns_cap_data.data[(x) >> 5].permitted) & (1 << ((x)&31)))
763         struct vfs_ns_cap_data ns_xattr = {};
764         ssize_t ret;
765
766         ret = fgetxattr(fd, "security.capability", &ns_xattr, sizeof(ns_xattr));
767         if (ret < 0 || ret == 0)
768                 return false;
769
770         if (ns_xattr.magic_etc & VFS_CAP_REVISION_3) {
771
772                 if (le32_to_cpu(ns_xattr.rootid) != expected_uid) {
773                         errno = EINVAL;
774                         log_stderr("failure: rootid(%d) != expected_rootid(%d)", le32_to_cpu(ns_xattr.rootid), expected_uid);
775                 }
776
777                 return (le32_to_cpu(ns_xattr.rootid) == expected_uid) &&
778                        (__cap_raised_permitted(CAP_NET_RAW, ns_xattr) > 0);
779         } else {
780                 log_stderr("failure: fscaps version");
781         }
782
783         return false;
784 }
785
786 /* set_dummy_vfs_caps - set dummy vfs caps for the provided uid */
787 static int set_dummy_vfs_caps(int fd, int flags, int rootuid)
788 {
789 #define __raise_cap_permitted(x, ns_cap_data)                                  \
790         ns_cap_data.data[(x) >> 5].permitted |= (1 << ((x)&31))
791
792         struct vfs_ns_cap_data ns_xattr;
793
794         memset(&ns_xattr, 0, sizeof(ns_xattr));
795         __raise_cap_permitted(CAP_NET_RAW, ns_xattr);
796         ns_xattr.magic_etc |= VFS_CAP_REVISION_3 | VFS_CAP_FLAGS_EFFECTIVE;
797         ns_xattr.rootid = cpu_to_le32(rootuid);
798
799         return fsetxattr(fd, "security.capability",
800                          &ns_xattr, sizeof(ns_xattr), flags);
801 }
802
803 #define safe_close(fd)      \
804         if (fd >= 0) {           \
805                 int _e_ = errno; \
806                 close(fd);       \
807                 errno = _e_;     \
808                 fd = -EBADF;     \
809         }
810
811 static void test_setup(void)
812 {
813         if (mkdirat(t_mnt_fd, T_DIR1, 0777))
814                 die("failure: mkdirat");
815
816         t_dir1_fd = openat(t_mnt_fd, T_DIR1, O_CLOEXEC | O_DIRECTORY);
817         if (t_dir1_fd < 0)
818                 die("failure: openat");
819
820         if (fchmod(t_dir1_fd, 0777))
821                 die("failure: fchmod");
822 }
823
824 static void test_cleanup(void)
825 {
826         safe_close(t_dir1_fd);
827         if (rm_r(t_mnt_fd, T_DIR1))
828                 die("failure: rm_r");
829 }
830
831 /* Validate that basic file operations on idmapped mounts. */
832 static int fsids_unmapped(void)
833 {
834         int fret = -1;
835         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
836         struct mount_attr attr = {
837                 .attr_set = MOUNT_ATTR_IDMAP,
838         };
839
840         /* create hardlink target */
841         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
842         if (hardlink_target_fd < 0) {
843                 log_stderr("failure: openat");
844                 goto out;
845         }
846
847         /* create directory for rename test */
848         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
849                 log_stderr("failure: mkdirat");
850                 goto out;
851         }
852
853         /* change ownership of all files to uid 0 */
854         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
855                 log_stderr("failure: chown_r");
856                 goto out;
857         }
858
859         /* Changing mount properties on a detached mount. */
860         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
861         if (attr.userns_fd < 0) {
862                 log_stderr("failure: get_userns_fd");
863                 goto out;
864         }
865
866         open_tree_fd = sys_open_tree(t_dir1_fd, "",
867                                      AT_EMPTY_PATH |
868                                      AT_NO_AUTOMOUNT |
869                                      AT_SYMLINK_NOFOLLOW |
870                                      OPEN_TREE_CLOEXEC |
871                                      OPEN_TREE_CLONE);
872         if (open_tree_fd < 0) {
873                 log_stderr("failure: sys_open_tree");
874                 goto out;
875         }
876
877         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
878                 log_stderr("failure: sys_mount_setattr");
879                 goto out;
880         }
881
882         if (!switch_fsids(0, 0)) {
883                 log_stderr("failure: switch_fsids");
884                 goto out;
885         }
886
887         /* The caller's fsids don't have a mappings in the idmapped mount so any
888          * file creation must fail.
889          */
890
891         /* create hardlink */
892         if (!linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
893                 log_stderr("failure: linkat");
894                 goto out;
895         }
896         if (errno != EOVERFLOW) {
897                 log_stderr("failure: errno");
898                 goto out;
899         }
900
901         /* try to rename a file */
902         if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
903                 log_stderr("failure: renameat");
904                 goto out;
905         }
906         if (errno != EOVERFLOW) {
907                 log_stderr("failure: errno");
908                 goto out;
909         }
910
911         /* try to rename a directory */
912         if (!renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME)) {
913                 log_stderr("failure: renameat");
914                 goto out;
915         }
916         if (errno != EOVERFLOW) {
917                 log_stderr("failure: errno");
918                 goto out;
919         }
920
921         /* The caller is privileged over the inode so file deletion must work. */
922
923         /* remove file */
924         if (unlinkat(open_tree_fd, FILE1, 0)) {
925                 log_stderr("failure: unlinkat");
926                 goto out;
927         }
928
929         /* remove directory */
930         if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) {
931                 log_stderr("failure: unlinkat");
932                 goto out;
933         }
934
935         /* The caller's fsids don't have a mappings in the idmapped mount so
936          * any file creation must fail.
937          */
938
939         /* create regular file via open() */
940         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
941         if (file1_fd >= 0) {
942                 log_stderr("failure: create");
943                 goto out;
944         }
945         if (errno != EOVERFLOW) {
946                 log_stderr("failure: errno");
947                 goto out;
948         }
949
950         /* create regular file via mknod */
951         if (!mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0)) {
952                 log_stderr("failure: mknodat");
953                 goto out;
954         }
955         if (errno != EOVERFLOW) {
956                 log_stderr("failure: errno");
957                 goto out;
958         }
959
960         /* create character device */
961         if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
962                 log_stderr("failure: mknodat");
963                 goto out;
964         }
965         if (errno != EOVERFLOW) {
966                 log_stderr("failure: errno");
967                 goto out;
968         }
969
970         /* create symlink */
971         if (!symlinkat(FILE2, open_tree_fd, SYMLINK1)) {
972                 log_stderr("failure: symlinkat");
973                 goto out;
974         }
975         if (errno != EOVERFLOW) {
976                 log_stderr("failure: errno");
977                 goto out;
978         }
979
980         /* create directory */
981         if (!mkdirat(open_tree_fd, DIR1, 0700)) {
982                 log_stderr("failure: mkdirat");
983                 goto out;
984         }
985         if (errno != EOVERFLOW) {
986                 log_stderr("failure: errno");
987                 goto out;
988         }
989
990         fret = 0;
991         log_debug("Ran test");
992 out:
993         safe_close(attr.userns_fd);
994         safe_close(hardlink_target_fd);
995         safe_close(file1_fd);
996         safe_close(open_tree_fd);
997
998         return fret;
999 }
1000
1001 static int fsids_mapped(void)
1002 {
1003         int fret = -1;
1004         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
1005         struct mount_attr attr = {
1006                 .attr_set = MOUNT_ATTR_IDMAP,
1007         };
1008         pid_t pid;
1009
1010         if (!caps_supported())
1011                 return 0;
1012
1013         /* create hardlink target */
1014         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1015         if (hardlink_target_fd < 0) {
1016                 log_stderr("failure: openat");
1017                 goto out;
1018         }
1019
1020         /* create directory for rename test */
1021         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
1022                 log_stderr("failure: mkdirat");
1023                 goto out;
1024         }
1025
1026         /* change ownership of all files to uid 0 */
1027         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1028                 log_stderr("failure: chown_r");
1029                 goto out;
1030         }
1031
1032         /* Changing mount properties on a detached mount. */
1033         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1034         if (attr.userns_fd < 0) {
1035                 log_stderr("failure: get_userns_fd");
1036                 goto out;
1037         }
1038
1039         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1040                                      AT_EMPTY_PATH |
1041                                      AT_NO_AUTOMOUNT |
1042                                      AT_SYMLINK_NOFOLLOW |
1043                                      OPEN_TREE_CLOEXEC |
1044                                      OPEN_TREE_CLONE);
1045         if (open_tree_fd < 0) {
1046                 log_stderr("failure: sys_open_tree");
1047                 goto out;
1048         }
1049
1050         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1051                 log_stderr("failure: sys_mount_setattr");
1052                 goto out;
1053         }
1054
1055         pid = fork();
1056         if (pid < 0) {
1057                 log_stderr("failure: fork");
1058                 goto out;
1059         }
1060         if (pid == 0) {
1061                 if (!switch_fsids(10000, 10000))
1062                         die("failure: switch fsids");
1063
1064                 if (!caps_up())
1065                         die("failure: raise caps");
1066
1067                 /* The caller's fsids now have mappings in the idmapped mount so
1068                  * any file creation must fail.
1069                  */
1070
1071                 /* create hardlink */
1072                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1073                         die("failure: create hardlink");
1074
1075                 /* try to rename a file */
1076                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1077                         die("failure: rename");
1078
1079                 /* try to rename a directory */
1080                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1081                         die("failure: rename");
1082
1083                 /* remove file */
1084                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1085                         die("failure: delete");
1086
1087                 /* remove directory */
1088                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1089                         die("failure: delete");
1090
1091                 /* The caller's fsids have mappings in the idmapped mount so any
1092                  * file creation must fail.
1093                  */
1094
1095                 /* create regular file via open() */
1096                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1097                 if (file1_fd < 0)
1098                         die("failure: create");
1099
1100                 /* create regular file via mknod */
1101                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1102                         die("failure: create");
1103
1104                 /* create character device */
1105                 if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
1106                         die("failure: create");
1107
1108                 /* create symlink */
1109                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1110                         die("failure: create");
1111
1112                 /* create directory */
1113                 if (mkdirat(open_tree_fd, DIR1, 0700))
1114                         die("failure: create");
1115
1116                 exit(EXIT_SUCCESS);
1117         }
1118         if (wait_for_pid(pid))
1119                 goto out;
1120
1121         fret = 0;
1122         log_debug("Ran test");
1123 out:
1124         safe_close(attr.userns_fd);
1125         safe_close(file1_fd);
1126         safe_close(hardlink_target_fd);
1127         safe_close(open_tree_fd);
1128
1129         return fret;
1130 }
1131
1132 /* Validate that basic file operations on idmapped mounts from a user namespace. */
1133 static int create_in_userns(void)
1134 {
1135         int fret = -1;
1136         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1137         struct mount_attr attr = {
1138                 .attr_set = MOUNT_ATTR_IDMAP,
1139         };
1140         pid_t pid;
1141
1142         /* change ownership of all files to uid 0 */
1143         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1144                 log_stderr("failure: chown_r");
1145                 goto out;
1146         }
1147
1148         /* Changing mount properties on a detached mount. */
1149         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1150         if (attr.userns_fd < 0) {
1151                 log_stderr("failure: get_userns_fd");
1152                 goto out;
1153         }
1154
1155         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1156                                      AT_EMPTY_PATH |
1157                                      AT_NO_AUTOMOUNT |
1158                                      AT_SYMLINK_NOFOLLOW |
1159                                      OPEN_TREE_CLOEXEC |
1160                                      OPEN_TREE_CLONE);
1161         if (open_tree_fd < 0) {
1162                 log_stderr("failure: sys_open_tree");
1163                 goto out;
1164         }
1165
1166         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1167                 log_stderr("failure: sys_mount_setattr");
1168                 goto out;
1169         }
1170
1171         pid = fork();
1172         if (pid < 0) {
1173                 log_stderr("failure: fork");
1174                 goto out;
1175         }
1176         if (pid == 0) {
1177                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1178                         die("failure: switch_userns");
1179
1180                 /* create regular file via open() */
1181                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1182                 if (file1_fd < 0)
1183                         die("failure: open file");
1184                 safe_close(file1_fd);
1185
1186                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1187                         die("failure: check ownership");
1188
1189                 /* create regular file via mknod */
1190                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1191                         die("failure: create");
1192
1193                 if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0))
1194                         die("failure: check ownership");
1195
1196                 /* create symlink */
1197                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1198                         die("failure: create");
1199
1200                 if (!expected_uid_gid(open_tree_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0))
1201                         die("failure: check ownership");
1202
1203                 /* create directory */
1204                 if (mkdirat(open_tree_fd, DIR1, 0700))
1205                         die("failure: create");
1206
1207                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
1208                         die("failure: check ownership");
1209
1210                 /* try to rename a file */
1211                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1212                         die("failure: create");
1213
1214                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1215                         die("failure: check ownership");
1216
1217                 /* try to rename a file */
1218                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1219                         die("failure: create");
1220
1221                 if (!expected_uid_gid(open_tree_fd, DIR1_RENAME, 0, 0, 0))
1222                         die("failure: check ownership");
1223
1224                 /* remove file */
1225                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1226                         die("failure: remove");
1227
1228                 /* remove directory */
1229                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1230                         die("failure: remove");
1231
1232                 exit(EXIT_SUCCESS);
1233         }
1234
1235         if (wait_for_pid(pid))
1236                 goto out;
1237
1238         fret = 0;
1239         log_debug("Ran test");
1240 out:
1241         safe_close(attr.userns_fd);
1242         safe_close(file1_fd);
1243         safe_close(open_tree_fd);
1244
1245         return fret;
1246 }
1247
1248 static int hardlink_crossing_mounts(void)
1249 {
1250         int fret = -1;
1251         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1252
1253         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1254                 log_stderr("failure: chown_r");
1255                 goto out;
1256         }
1257
1258         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1259                                      AT_EMPTY_PATH |
1260                                      AT_NO_AUTOMOUNT |
1261                                      AT_SYMLINK_NOFOLLOW |
1262                                      OPEN_TREE_CLOEXEC |
1263                                      OPEN_TREE_CLONE);
1264         if (open_tree_fd < 0) {
1265                 log_stderr("failure: sys_open_tree");
1266                 goto out;
1267         }
1268
1269         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1270         if (file1_fd < 0) {
1271                 log_stderr("failure: openat");
1272                 goto out;
1273         }
1274
1275         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1276                 log_stderr("failure: mkdirat");
1277                 goto out;
1278         }
1279
1280         /* We're crossing a mountpoint so this must fail.
1281          *
1282          * Note that this must also fail for non-idmapped mounts but here we're
1283          * interested in making sure we're not introducing an accidental way to
1284          * violate that restriction or that suddenly this becomes possible.
1285          */
1286         if (!linkat(open_tree_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
1287                 log_stderr("failure: linkat");
1288                 goto out;
1289         }
1290         if (errno != EXDEV) {
1291                 log_stderr("failure: errno");
1292                 goto out;
1293         }
1294
1295         fret = 0;
1296         log_debug("Ran test");
1297 out:
1298         safe_close(file1_fd);
1299         safe_close(open_tree_fd);
1300
1301         return fret;
1302 }
1303
1304 static int hardlink_crossing_idmapped_mounts(void)
1305 {
1306         int fret = -1;
1307         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1308         struct mount_attr attr = {
1309                 .attr_set = MOUNT_ATTR_IDMAP,
1310         };
1311
1312         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1313                 log_stderr("failure: chown_r");
1314                 goto out;
1315         }
1316
1317         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1318         if (attr.userns_fd < 0) {
1319                 log_stderr("failure: get_userns_fd");
1320                 goto out;
1321         }
1322
1323         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1324                                      AT_EMPTY_PATH |
1325                                      AT_NO_AUTOMOUNT |
1326                                      AT_SYMLINK_NOFOLLOW |
1327                                      OPEN_TREE_CLOEXEC |
1328                                      OPEN_TREE_CLONE);
1329         if (open_tree_fd1 < 0) {
1330                 log_stderr("failure: sys_open_tree");
1331                 goto out;
1332         }
1333
1334         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1335                 log_stderr("failure: sys_mount_setattr");
1336                 goto out;
1337         }
1338
1339         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1340         if (file1_fd < 0) {
1341                 log_stderr("failure: openat");
1342                 goto out;
1343         }
1344
1345         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1346                 log_stderr("failure: expected_uid_gid");
1347                 goto out;
1348         }
1349
1350         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1351                 log_stderr("failure: expected_uid_gid");
1352                 goto out;
1353         }
1354
1355         safe_close(file1_fd);
1356
1357         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1358                 log_stderr("failure: mkdirat");
1359                 goto out;
1360         }
1361
1362         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1363                                       AT_NO_AUTOMOUNT |
1364                                       AT_SYMLINK_NOFOLLOW |
1365                                       OPEN_TREE_CLOEXEC |
1366                                       OPEN_TREE_CLONE |
1367                                       AT_RECURSIVE);
1368         if (open_tree_fd2 < 0) {
1369                 log_stderr("failure: sys_open_tree");
1370                 goto out;
1371         }
1372
1373         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1374                 log_stderr("failure: sys_mount_setattr");
1375                 goto out;
1376         }
1377
1378         /* We're crossing a mountpoint so this must fail.
1379          *
1380          * Note that this must also fail for non-idmapped mounts but here we're
1381          * interested in making sure we're not introducing an accidental way to
1382          * violate that restriction or that suddenly this becomes possible.
1383          */
1384         if (!linkat(open_tree_fd1, FILE1, open_tree_fd2, HARDLINK1, 0)) {
1385                 log_stderr("failure: linkat");
1386                 goto out;
1387         }
1388         if (errno != EXDEV) {
1389                 log_stderr("failure: errno");
1390                 goto out;
1391         }
1392
1393         fret = 0;
1394         log_debug("Ran test");
1395 out:
1396         safe_close(attr.userns_fd);
1397         safe_close(file1_fd);
1398         safe_close(open_tree_fd1);
1399         safe_close(open_tree_fd2);
1400
1401         return fret;
1402 }
1403
1404 static int hardlink_from_idmapped_mount(void)
1405 {
1406         int fret = -1;
1407         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1408         struct mount_attr attr = {
1409                 .attr_set = MOUNT_ATTR_IDMAP,
1410         };
1411
1412         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1413                 log_stderr("failure: chown_r");
1414                 goto out;
1415         }
1416
1417         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1418         if (attr.userns_fd < 0) {
1419                 log_stderr("failure: get_userns_fd");
1420                 goto out;
1421         }
1422
1423         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1424                                      AT_EMPTY_PATH |
1425                                      AT_NO_AUTOMOUNT |
1426                                      AT_SYMLINK_NOFOLLOW |
1427                                      OPEN_TREE_CLOEXEC |
1428                                      OPEN_TREE_CLONE);
1429         if (open_tree_fd < 0) {
1430                 log_stderr("failure: sys_open_tree");
1431                 goto out;
1432         }
1433
1434         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1435                 log_stderr("failure: sys_mount_setattr");
1436                 goto out;
1437         }
1438
1439         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1440         if (file1_fd < 0) {
1441                 log_stderr("failure: openat");
1442                 goto out;
1443         }
1444         safe_close(file1_fd);
1445
1446         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1447                 log_stderr("failure: expected_uid_gid");
1448                 goto out;
1449         }
1450
1451         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1452                 log_stderr("failure: expected_uid_gid");
1453                 goto out;
1454         }
1455
1456         /* We're not crossing a mountpoint so this must succeed. */
1457         if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
1458                 log_stderr("failure: linkat");
1459                 goto out;
1460         }
1461
1462
1463         fret = 0;
1464         log_debug("Ran test");
1465 out:
1466         safe_close(attr.userns_fd);
1467         safe_close(file1_fd);
1468         safe_close(open_tree_fd);
1469
1470         return fret;
1471 }
1472
1473 static int hardlink_from_idmapped_mount_in_userns(void)
1474 {
1475         int fret = -1;
1476         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1477         struct mount_attr attr = {
1478                 .attr_set = MOUNT_ATTR_IDMAP,
1479         };
1480         pid_t pid;
1481
1482         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1483                 log_stderr("failure: chown_r");
1484                 goto out;
1485         }
1486
1487         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1488         if (attr.userns_fd < 0) {
1489                 log_stderr("failure: get_userns_fd");
1490                 goto out;
1491         }
1492
1493         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1494                                      AT_EMPTY_PATH |
1495                                      AT_NO_AUTOMOUNT |
1496                                      AT_SYMLINK_NOFOLLOW |
1497                                      OPEN_TREE_CLOEXEC |
1498                                      OPEN_TREE_CLONE);
1499         if (open_tree_fd < 0) {
1500                 log_stderr("failure: sys_open_tree");
1501                 goto out;
1502         }
1503
1504         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1505                 log_stderr("failure: sys_mount_setattr");
1506                 goto out;
1507         }
1508
1509         pid = fork();
1510         if (pid < 0) {
1511                 log_stderr("failure: fork");
1512                 goto out;
1513         }
1514         if (pid == 0) {
1515                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1516                         die("failure: switch_userns");
1517
1518                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1519                 if (file1_fd < 0)
1520                         die("failure: create");
1521
1522                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1523                         die("failure: check ownership");
1524
1525                 /* We're not crossing a mountpoint so this must succeed. */
1526                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1527                         die("failure: create");
1528
1529                 if (!expected_uid_gid(open_tree_fd, HARDLINK1, 0, 0, 0))
1530                         die("failure: check ownership");
1531
1532                 exit(EXIT_SUCCESS);
1533         }
1534
1535         if (wait_for_pid(pid))
1536                 goto out;
1537
1538         fret = 0;
1539         log_debug("Ran test");
1540 out:
1541         safe_close(attr.userns_fd);
1542         safe_close(file1_fd);
1543         safe_close(open_tree_fd);
1544
1545         return fret;
1546 }
1547
1548 static int rename_crossing_mounts(void)
1549 {
1550         int fret = -1;
1551         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1552
1553         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1554                 log_stderr("failure: chown_r");
1555                 goto out;
1556         }
1557
1558         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1559                                      AT_EMPTY_PATH |
1560                                      AT_NO_AUTOMOUNT |
1561                                      AT_SYMLINK_NOFOLLOW |
1562                                      OPEN_TREE_CLOEXEC |
1563                                      OPEN_TREE_CLONE);
1564         if (open_tree_fd < 0) {
1565                 log_stderr("failure: sys_open_tree");
1566                 goto out;
1567         }
1568
1569         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1570         if (file1_fd < 0) {
1571                 log_stderr("failure: openat");
1572                 goto out;
1573         }
1574
1575         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1576                 log_stderr("failure: mkdirat");
1577                 goto out;
1578         }
1579
1580         /* We're crossing a mountpoint so this must fail.
1581          *
1582          * Note that this must also fail for non-idmapped mounts but here we're
1583          * interested in making sure we're not introducing an accidental way to
1584          * violate that restriction or that suddenly this becomes possible.
1585          */
1586         if (!renameat(open_tree_fd, FILE1, t_dir1_fd, FILE1_RENAME)) {
1587                 log_stderr("failure: renameat");
1588                 goto out;
1589         }
1590         if (errno != EXDEV) {
1591                 log_stderr("failure: errno");
1592                 goto out;
1593         }
1594
1595         fret = 0;
1596         log_debug("Ran test");
1597 out:
1598         safe_close(file1_fd);
1599         safe_close(open_tree_fd);
1600
1601         return fret;
1602 }
1603
1604 static int rename_crossing_idmapped_mounts(void)
1605 {
1606         int fret = -1;
1607         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1608         struct mount_attr attr = {
1609                 .attr_set = MOUNT_ATTR_IDMAP,
1610         };
1611
1612         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1613                 log_stderr("failure: chown_r");
1614                 goto out;
1615         }
1616
1617         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1618         if (attr.userns_fd < 0) {
1619                 log_stderr("failure: get_userns_fd");
1620                 goto out;
1621         }
1622
1623         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1624                                      AT_EMPTY_PATH |
1625                                      AT_NO_AUTOMOUNT |
1626                                      AT_SYMLINK_NOFOLLOW |
1627                                      OPEN_TREE_CLOEXEC |
1628                                      OPEN_TREE_CLONE);
1629         if (open_tree_fd1 < 0) {
1630                 log_stderr("failure: sys_open_tree");
1631                 goto out;
1632         }
1633
1634         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1635                 log_stderr("failure: sys_mount_setattr");
1636                 goto out;
1637         }
1638
1639         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1640         if (file1_fd < 0) {
1641                 log_stderr("failure: openat");
1642                 goto out;
1643         }
1644
1645         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1646                 log_stderr("failure: expected_uid_gid");
1647                 goto out;
1648         }
1649
1650         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1651                 log_stderr("failure: expected_uid_gid");
1652                 goto out;
1653         }
1654
1655         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1656                 log_stderr("failure: mkdirat");
1657                 goto out;
1658         }
1659
1660         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1661                                       AT_NO_AUTOMOUNT |
1662                                       AT_SYMLINK_NOFOLLOW |
1663                                       OPEN_TREE_CLOEXEC |
1664                                       OPEN_TREE_CLONE |
1665                                       AT_RECURSIVE);
1666         if (open_tree_fd2 < 0) {
1667                 log_stderr("failure: sys_open_tree");
1668                 goto out;
1669         }
1670
1671         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1672                 log_stderr("failure: sys_mount_setattr");
1673                 goto out;
1674         }
1675
1676         /* We're crossing a mountpoint so this must fail.
1677          *
1678          * Note that this must also fail for non-idmapped mounts but here we're
1679          * interested in making sure we're not introducing an accidental way to
1680          * violate that restriction or that suddenly this becomes possible.
1681          */
1682         if (!renameat(open_tree_fd1, FILE1, open_tree_fd2, FILE1_RENAME)) {
1683                 log_stderr("failure: renameat");
1684                 goto out;
1685         }
1686         if (errno != EXDEV) {
1687                 log_stderr("failure: errno");
1688                 goto out;
1689         }
1690
1691         fret = 0;
1692         log_debug("Ran test");
1693 out:
1694         safe_close(attr.userns_fd);
1695         safe_close(file1_fd);
1696         safe_close(open_tree_fd1);
1697         safe_close(open_tree_fd2);
1698
1699         return fret;
1700 }
1701
1702 static int rename_from_idmapped_mount(void)
1703 {
1704         int fret = -1;
1705         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1706         struct mount_attr attr = {
1707                 .attr_set = MOUNT_ATTR_IDMAP,
1708         };
1709
1710         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1711                 log_stderr("failure: chown_r");
1712                 goto out;
1713         }
1714
1715         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1716         if (attr.userns_fd < 0) {
1717                 log_stderr("failure: get_userns_fd");
1718                 goto out;
1719         }
1720
1721         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1722                                      AT_EMPTY_PATH |
1723                                      AT_NO_AUTOMOUNT |
1724                                      AT_SYMLINK_NOFOLLOW |
1725                                      OPEN_TREE_CLOEXEC |
1726                                      OPEN_TREE_CLONE);
1727         if (open_tree_fd < 0) {
1728                 log_stderr("failure: sys_open_tree");
1729                 goto out;
1730         }
1731
1732         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1733                 log_stderr("failure: sys_mount_setattr");
1734                 goto out;
1735         }
1736
1737         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1738         if (file1_fd < 0) {
1739                 log_stderr("failure: openat");
1740                 goto out;
1741         }
1742
1743         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1744                 log_stderr("failure: expected_uid_gid");
1745                 goto out;
1746         }
1747
1748         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1749                 log_stderr("failure: expected_uid_gid");
1750                 goto out;
1751         }
1752
1753         /* We're not crossing a mountpoint so this must succeed. */
1754         if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
1755                 log_stderr("failure: renameat");
1756                 goto out;
1757         }
1758
1759         fret = 0;
1760         log_debug("Ran test");
1761 out:
1762         safe_close(attr.userns_fd);
1763         safe_close(file1_fd);
1764         safe_close(open_tree_fd);
1765
1766         return fret;
1767 }
1768
1769 static int rename_from_idmapped_mount_in_userns(void)
1770 {
1771         int fret = -1;
1772         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1773         pid_t pid;
1774         struct mount_attr attr = {
1775                 .attr_set = MOUNT_ATTR_IDMAP,
1776         };
1777
1778         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1779                 log_stderr("failure: chown_r");
1780                 goto out;
1781         }
1782
1783         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1784         if (attr.userns_fd < 0) {
1785                 log_stderr("failure: get_userns_fd");
1786                 goto out;
1787         }
1788
1789         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1790                                      AT_EMPTY_PATH |
1791                                      AT_NO_AUTOMOUNT |
1792                                      AT_SYMLINK_NOFOLLOW |
1793                                      OPEN_TREE_CLOEXEC |
1794                                      OPEN_TREE_CLONE);
1795         if (open_tree_fd < 0) {
1796                 log_stderr("failure: sys_open_tree");
1797                 goto out;
1798         }
1799
1800         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1801                 log_stderr("failure: sys_mount_setattr");
1802                 goto out;
1803         }
1804
1805         pid = fork();
1806         if (pid < 0) {
1807                 log_stderr("failure: fork");
1808                 goto out;
1809         }
1810         if (pid == 0) {
1811                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1812                         die("failure: switch_userns");
1813
1814                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1815                 if (file1_fd < 0)
1816                         die("failure: create");
1817
1818                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1819                         die("failure: check ownership");
1820
1821                 /* We're not crossing a mountpoint so this must succeed. */
1822                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1823                         die("failure: create");
1824
1825                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1826                         die("failure: check ownership");
1827
1828                 exit(EXIT_SUCCESS);
1829         }
1830
1831         if (wait_for_pid(pid))
1832                 goto out;
1833
1834         fret = 0;
1835         log_debug("Ran test");
1836 out:
1837         safe_close(attr.userns_fd);
1838         safe_close(file1_fd);
1839         safe_close(open_tree_fd);
1840
1841         return fret;
1842 }
1843
1844 static int symlink_regular_mounts(void)
1845 {
1846         int fret = -1;
1847         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1848         struct stat st;
1849
1850         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1851         if (file1_fd < 0) {
1852                 log_stderr("failure: openat");
1853                 goto out;
1854         }
1855
1856         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1857                 log_stderr("failure: chown_r");
1858                 goto out;
1859         }
1860
1861         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1862                                      AT_EMPTY_PATH |
1863                                      AT_NO_AUTOMOUNT |
1864                                      AT_SYMLINK_NOFOLLOW |
1865                                      OPEN_TREE_CLOEXEC |
1866                                      OPEN_TREE_CLONE);
1867         if (open_tree_fd < 0) {
1868                 log_stderr("failure: sys_open_tree");
1869                 goto out;
1870         }
1871
1872         if (symlinkat(FILE1, open_tree_fd, FILE2)) {
1873                 log_stderr("failure: symlinkat");
1874                 goto out;
1875         }
1876
1877         if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW)) {
1878                 log_stderr("failure: fchownat");
1879                 goto out;
1880         }
1881
1882         if (fstatat(open_tree_fd, FILE2, &st, AT_SYMLINK_NOFOLLOW)) {
1883                 log_stderr("failure: fstatat");
1884                 goto out;
1885         }
1886
1887         if (st.st_uid != 15000 || st.st_gid != 15000) {
1888                 log_stderr("failure: compare ids");
1889                 goto out;
1890         }
1891
1892         if (fstatat(open_tree_fd, FILE1, &st, 0)) {
1893                 log_stderr("failure: fstatat");
1894                 goto out;
1895         }
1896
1897         if (st.st_uid != 10000 || st.st_gid != 10000) {
1898                 log_stderr("failure: compare ids");
1899                 goto out;
1900         }
1901
1902         fret = 0;
1903         log_debug("Ran test");
1904 out:
1905         safe_close(file1_fd);
1906         safe_close(open_tree_fd);
1907
1908         return fret;
1909 }
1910
1911 static int symlink_idmapped_mounts(void)
1912 {
1913         int fret = -1;
1914         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1915         struct mount_attr attr = {
1916                 .attr_set = MOUNT_ATTR_IDMAP,
1917         };
1918         pid_t pid;
1919
1920         if (!caps_supported())
1921                 return 0;
1922
1923         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1924         if (file1_fd < 0) {
1925                 log_stderr("failure: openat");
1926                 goto out;
1927         }
1928
1929         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1930                 log_stderr("failure: chown_r");
1931                 goto out;
1932         }
1933
1934         /* Changing mount properties on a detached mount. */
1935         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1936         if (attr.userns_fd < 0) {
1937                 log_stderr("failure: get_userns_fd");
1938                 goto out;
1939         }
1940
1941         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1942                                      AT_EMPTY_PATH |
1943                                      AT_NO_AUTOMOUNT |
1944                                      AT_SYMLINK_NOFOLLOW |
1945                                      OPEN_TREE_CLOEXEC |
1946                                      OPEN_TREE_CLONE);
1947         if (open_tree_fd < 0) {
1948                 log_stderr("failure: sys_open_tree");
1949                 goto out;
1950         }
1951
1952         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1953                 log_stderr("failure: sys_mount_setattr");
1954                 goto out;
1955         }
1956
1957         pid = fork();
1958         if (pid < 0) {
1959                 log_stderr("failure: fork");
1960                 goto out;
1961         }
1962         if (pid == 0) {
1963                 if (!switch_fsids(10000, 10000))
1964                         die("failure: switch fsids");
1965
1966                 if (!caps_up())
1967                         die("failure: raise caps");
1968
1969                 if (symlinkat(FILE1, open_tree_fd, FILE2))
1970                         die("failure: create");
1971
1972                 if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW))
1973                         die("failure: change ownership");
1974
1975                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 15000, 15000))
1976                         die("failure: check ownership");
1977
1978                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
1979                         die("failure: check ownership");
1980
1981                 exit(EXIT_SUCCESS);
1982         }
1983         if (wait_for_pid(pid))
1984                 goto out;
1985
1986         fret = 0;
1987         log_debug("Ran test");
1988 out:
1989         safe_close(attr.userns_fd);
1990         safe_close(file1_fd);
1991         safe_close(open_tree_fd);
1992
1993         return fret;
1994 }
1995
1996 static int symlink_idmapped_mounts_in_userns(void)
1997 {
1998         int fret = -1;
1999         int file1_fd = -EBADF, open_tree_fd = -EBADF;
2000         struct mount_attr attr = {
2001                 .attr_set = MOUNT_ATTR_IDMAP,
2002         };
2003         pid_t pid;
2004
2005         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
2006                 log_stderr("failure: chown_r");
2007                 goto out;
2008         }
2009
2010         /* Changing mount properties on a detached mount. */
2011         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2012         if (attr.userns_fd < 0) {
2013                 log_stderr("failure: get_userns_fd");
2014                 goto out;
2015         }
2016
2017         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2018                                      AT_EMPTY_PATH |
2019                                      AT_NO_AUTOMOUNT |
2020                                      AT_SYMLINK_NOFOLLOW |
2021                                      OPEN_TREE_CLOEXEC |
2022                                      OPEN_TREE_CLONE);
2023         if (open_tree_fd < 0) {
2024                 log_stderr("failure: sys_open_tree");
2025                 goto out;
2026         }
2027
2028         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2029                 log_stderr("failure: sys_mount_setattr");
2030                 goto out;
2031         }
2032
2033         pid = fork();
2034         if (pid < 0) {
2035                 log_stderr("failure: fork");
2036                 goto out;
2037         }
2038         if (pid == 0) {
2039                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2040                         die("failure: switch_userns");
2041
2042                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2043                 if (file1_fd < 0)
2044                         die("failure: create");
2045                 safe_close(file1_fd);
2046
2047                 if (symlinkat(FILE1, open_tree_fd, FILE2))
2048                         die("failure: create");
2049
2050                 if (fchownat(open_tree_fd, FILE2, 5000, 5000, AT_SYMLINK_NOFOLLOW))
2051                         die("failure: change ownership");
2052
2053                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000))
2054                         die("failure: check ownership");
2055
2056                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
2057                         die("failure: check ownership");
2058
2059                 exit(EXIT_SUCCESS);
2060         }
2061
2062         if (wait_for_pid(pid))
2063                 goto out;
2064
2065         if (!expected_uid_gid(t_dir1_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000)) {
2066                 log_stderr("failure: expected_uid_gid");
2067                 goto out;
2068         }
2069
2070         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2071                 log_stderr("failure: expected_uid_gid");
2072                 goto out;
2073         }
2074
2075         fret = 0;
2076         log_debug("Ran test");
2077 out:
2078         safe_close(attr.userns_fd);
2079         safe_close(file1_fd);
2080         safe_close(open_tree_fd);
2081
2082         return fret;
2083 }
2084
2085 /* Validate that a caller whose fsids map into the idmapped mount within it's
2086  * user namespace cannot create any device nodes.
2087  */
2088 static int device_node_in_userns(void)
2089 {
2090         int fret = -1;
2091         int open_tree_fd = -EBADF;
2092         struct mount_attr attr = {
2093                 .attr_set = MOUNT_ATTR_IDMAP,
2094         };
2095         pid_t pid;
2096
2097         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2098         if (attr.userns_fd < 0) {
2099                 log_stderr("failure: get_userns_fd");
2100                 goto out;
2101         }
2102
2103         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2104                                      AT_EMPTY_PATH |
2105                                      AT_NO_AUTOMOUNT |
2106                                      AT_SYMLINK_NOFOLLOW |
2107                                      OPEN_TREE_CLOEXEC |
2108                                      OPEN_TREE_CLONE);
2109         if (open_tree_fd < 0) {
2110                 log_stderr("failure: sys_open_tree");
2111                 goto out;
2112         }
2113
2114         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2115                 log_stderr("failure: sys_mount_setattr");
2116                 goto out;
2117         }
2118
2119         pid = fork();
2120         if (pid < 0) {
2121                 log_stderr("failure: fork");
2122                 goto out;
2123         }
2124         if (pid == 0) {
2125                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2126                         die("failure: switch_userns");
2127
2128                 /* create character device */
2129                 if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
2130                         die("failure: create");
2131
2132                 exit(EXIT_SUCCESS);
2133         }
2134
2135         if (wait_for_pid(pid))
2136                 goto out;
2137
2138         fret = 0;
2139         log_debug("Ran test");
2140 out:
2141         safe_close(attr.userns_fd);
2142         safe_close(open_tree_fd);
2143
2144         return fret;
2145 }
2146
2147
2148 /* Validate that changing file ownership works correctly on idmapped mounts. */
2149 static int expected_uid_gid_idmapped_mounts(void)
2150 {
2151         int fret = -1;
2152         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
2153         struct mount_attr attr1 = {
2154                 .attr_set = MOUNT_ATTR_IDMAP,
2155         };
2156         struct mount_attr attr2 = {
2157                 .attr_set = MOUNT_ATTR_IDMAP,
2158         };
2159         pid_t pid;
2160
2161         if (!switch_fsids(0, 0)) {
2162                 log_stderr("failure: switch_fsids");
2163                 goto out;
2164         }
2165
2166         /* create regular file via open() */
2167         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2168         if (file1_fd < 0) {
2169                 log_stderr("failure: openat");
2170                 goto out;
2171         }
2172
2173         /* create regular file via mknod */
2174         if (mknodat(t_dir1_fd, FILE2, S_IFREG | 0000, 0)) {
2175                 log_stderr("failure: mknodat");
2176                 goto out;
2177         }
2178
2179         /* create character device */
2180         if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
2181                 log_stderr("failure: mknodat");
2182                 goto out;
2183         }
2184
2185         /* create hardlink */
2186         if (linkat(t_dir1_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
2187                 log_stderr("failure: linkat");
2188                 goto out;
2189         }
2190
2191         /* create symlink */
2192         if (symlinkat(FILE2, t_dir1_fd, SYMLINK1)) {
2193                 log_stderr("failure: symlinkat");
2194                 goto out;
2195         }
2196
2197         /* create directory */
2198         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
2199                 log_stderr("failure: mkdirat");
2200                 goto out;
2201         }
2202
2203         /* Changing mount properties on a detached mount. */
2204         attr1.userns_fd = get_userns_fd(0, 10000, 10000);
2205         if (attr1.userns_fd < 0) {
2206                 log_stderr("failure: get_userns_fd");
2207                 goto out;
2208         }
2209
2210         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
2211                                      AT_EMPTY_PATH |
2212                                      AT_NO_AUTOMOUNT |
2213                                      AT_SYMLINK_NOFOLLOW |
2214                                      OPEN_TREE_CLOEXEC |
2215                                      OPEN_TREE_CLONE);
2216         if (open_tree_fd1 < 0) {
2217                 log_stderr("failure: sys_open_tree");
2218                 goto out;
2219         }
2220
2221         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr1, sizeof(attr1))) {
2222                 log_stderr("failure: sys_mount_setattr");
2223                 goto out;
2224         }
2225
2226         /* Validate that all files created through the image mountpoint are
2227          * owned by the callers fsuid and fsgid.
2228          */
2229         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2230                 log_stderr("failure: expected_uid_gid");
2231                 goto out;
2232         }
2233         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2234                 log_stderr("failure: expected_uid_gid");
2235                 goto out;
2236         }
2237         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2238                 log_stderr("failure: expected_uid_gid");
2239                 goto out;
2240         }
2241         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2242                 log_stderr("failure: expected_uid_gid");
2243                 goto out;
2244         }
2245         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
2246                 log_stderr("failure: expected_uid_gid");
2247                 goto out;
2248         }
2249         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2250                 log_stderr("failure: expected_uid_gid");
2251                 goto out;
2252         }
2253         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2254                 log_stderr("failure: expected_uid_gid");
2255                 goto out;
2256         }
2257
2258         /* Validate that all files are owned by the uid and gid specified in
2259          * the idmapping of the mount they are accessed from.
2260          */
2261         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2262                 log_stderr("failure: expected_uid_gid");
2263                 goto out;
2264         }
2265         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2266                 log_stderr("failure: expected_uid_gid");
2267                 goto out;
2268         }
2269         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2270                 log_stderr("failure: expected_uid_gid");
2271                 goto out;
2272         }
2273         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2274                 log_stderr("failure: expected_uid_gid");
2275                 goto out;
2276         }
2277         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
2278                 log_stderr("failure: expected_uid_gid");
2279                 goto out;
2280         }
2281         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2282                 log_stderr("failure: expected_uid_gid");
2283                 goto out;
2284         }
2285         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2286                 log_stderr("failure: expected_uid_gid");
2287                 goto out;
2288         }
2289
2290         /* Changing mount properties on a detached mount. */
2291         attr2.userns_fd = get_userns_fd(0, 30000, 2001);
2292         if (attr2.userns_fd < 0) {
2293                 log_stderr("failure: get_userns_fd");
2294                 goto out;
2295         }
2296
2297         open_tree_fd2 = sys_open_tree(t_dir1_fd, "",
2298                                      AT_EMPTY_PATH |
2299                                      AT_NO_AUTOMOUNT |
2300                                      AT_SYMLINK_NOFOLLOW |
2301                                      OPEN_TREE_CLOEXEC |
2302                                      OPEN_TREE_CLONE);
2303         if (open_tree_fd2 < 0) {
2304                 log_stderr("failure: sys_open_tree");
2305                 goto out;
2306         }
2307
2308         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr2, sizeof(attr2))) {
2309                 log_stderr("failure: sys_mount_setattr");
2310                 goto out;
2311         }
2312
2313         /* Validate that all files are owned by the uid and gid specified in
2314          * the idmapping of the mount they are accessed from.
2315          */
2316         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2317                 log_stderr("failure: expected_uid_gid");
2318                 goto out;
2319         }
2320         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2321                 log_stderr("failure: expected_uid_gid");
2322                 goto out;
2323         }
2324         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2325                 log_stderr("failure: expected_uid_gid");
2326                 goto out;
2327         }
2328         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2329                 log_stderr("failure: expected_uid_gid");
2330                 goto out;
2331         }
2332         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 30000, 30000)) {
2333                 log_stderr("failure: expected_uid_gid");
2334                 goto out;
2335         }
2336         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2337                 log_stderr("failure: expected_uid_gid");
2338                 goto out;
2339         }
2340         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2341                 log_stderr("failure: expected_uid_gid");
2342                 goto out;
2343         }
2344
2345         /* Change ownership throught original image mountpoint. */
2346         if (fchownat(t_dir1_fd, FILE1, 2000, 2000, 0)) {
2347                 log_stderr("failure: fchownat");
2348                 goto out;
2349         }
2350         if (fchownat(t_dir1_fd, FILE2, 2000, 2000, 0)) {
2351                 log_stderr("failure: fchownat");
2352                 goto out;
2353         }
2354         if (fchownat(t_dir1_fd, HARDLINK1, 2000, 2000, 0)) {
2355                 log_stderr("failure: fchownat");
2356                 goto out;
2357         }
2358         if (fchownat(t_dir1_fd, CHRDEV1, 2000, 2000, 0)) {
2359                 log_stderr("failure: fchownat");
2360                 goto out;
2361         }
2362         if (fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)) {
2363                 log_stderr("failure: fchownat");
2364                 goto out;
2365         }
2366         if (fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH)) {
2367                 log_stderr("failure: fchownat");
2368                 goto out;
2369         }
2370         if (fchownat(t_dir1_fd, DIR1, 2000, 2000, AT_EMPTY_PATH)) {
2371                 log_stderr("failure: fchownat");
2372                 goto out;
2373         }
2374
2375         /* Check ownership through original mount. */
2376         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 2000, 2000)) {
2377                 log_stderr("failure: expected_uid_gid");
2378                 goto out;
2379         }
2380         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 2000, 2000)) {
2381                 log_stderr("failure: expected_uid_gid");
2382                 goto out;
2383         }
2384         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 2000, 2000)) {
2385                 log_stderr("failure: expected_uid_gid");
2386                 goto out;
2387         }
2388         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 2000, 2000)) {
2389                 log_stderr("failure: expected_uid_gid");
2390                 goto out;
2391         }
2392         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 3000, 3000)) {
2393                 log_stderr("failure: expected_uid_gid");
2394                 goto out;
2395         }
2396         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 2000, 2000)) {
2397                 log_stderr("failure: expected_uid_gid");
2398                 goto out;
2399         }
2400         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 2000, 2000)) {
2401                 log_stderr("failure: expected_uid_gid");
2402                 goto out;
2403         }
2404
2405         /* Check ownership through first idmapped mount. */
2406         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 12000, 12000)) {
2407                 log_stderr("failure:expected_uid_gid ");
2408                 goto out;
2409         }
2410         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 12000, 12000)) {
2411                 log_stderr("failure: expected_uid_gid");
2412                 goto out;
2413         }
2414         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 12000, 12000)) {
2415                 log_stderr("failure: expected_uid_gid");
2416                 goto out;
2417         }
2418         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 12000, 12000)) {
2419                 log_stderr("failure: expected_uid_gid");
2420                 goto out;
2421         }
2422         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 13000, 13000)) {
2423                 log_stderr("failure: expected_uid_gid");
2424                 goto out;
2425         }
2426         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 12000, 12000)) {
2427                 log_stderr("failure:expected_uid_gid ");
2428                 goto out;
2429         }
2430         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 12000, 12000)) {
2431                 log_stderr("failure: expected_uid_gid");
2432                 goto out;
2433         }
2434
2435         /* Check ownership through second idmapped mount. */
2436         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 32000, 32000)) {
2437                 log_stderr("failure: expected_uid_gid");
2438                 goto out;
2439         }
2440         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 32000, 32000)) {
2441                 log_stderr("failure: expected_uid_gid");
2442                 goto out;
2443         }
2444         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 32000, 32000)) {
2445                 log_stderr("failure: expected_uid_gid");
2446                 goto out;
2447         }
2448         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 32000, 32000)) {
2449                 log_stderr("failure: expected_uid_gid");
2450                 goto out;
2451         }
2452         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid)) {
2453                 log_stderr("failure: expected_uid_gid");
2454                 goto out;
2455         }
2456         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 32000, 32000)) {
2457                 log_stderr("failure: expected_uid_gid");
2458                 goto out;
2459         }
2460         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 32000, 32000)) {
2461                 log_stderr("failure: expected_uid_gid");
2462                 goto out;
2463         }
2464
2465         pid = fork();
2466         if (pid < 0) {
2467                 log_stderr("failure: fork");
2468                 goto out;
2469         }
2470         if (pid == 0) {
2471                 if (!switch_userns(attr1.userns_fd, 0, 0, false))
2472                         die("failure: switch_userns");
2473
2474                 if (!fchownat(t_dir1_fd, FILE1, 1000, 1000, 0))
2475                         die("failure: fchownat");
2476                 if (!fchownat(t_dir1_fd, FILE2, 1000, 1000, 0))
2477                         die("failure: fchownat");
2478                 if (!fchownat(t_dir1_fd, HARDLINK1, 1000, 1000, 0))
2479                         die("failure: fchownat");
2480                 if (!fchownat(t_dir1_fd, CHRDEV1, 1000, 1000, 0))
2481                         die("failure: fchownat");
2482                 if (!fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2483                         die("failure: fchownat");
2484                 if (!fchownat(t_dir1_fd, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2485                         die("failure: fchownat");
2486                 if (!fchownat(t_dir1_fd, DIR1, 1000, 1000, AT_EMPTY_PATH))
2487                         die("failure: fchownat");
2488
2489                 if (!fchownat(open_tree_fd2, FILE1, 1000, 1000, 0))
2490                         die("failure: fchownat");
2491                 if (!fchownat(open_tree_fd2, FILE2, 1000, 1000, 0))
2492                         die("failure: fchownat");
2493                 if (!fchownat(open_tree_fd2, HARDLINK1, 1000, 1000, 0))
2494                         die("failure: fchownat");
2495                 if (!fchownat(open_tree_fd2, CHRDEV1, 1000, 1000, 0))
2496                         die("failure: fchownat");
2497                 if (!fchownat(open_tree_fd2, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2498                         die("failure: fchownat");
2499                 if (!fchownat(open_tree_fd2, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2500                         die("failure: fchownat");
2501                 if (!fchownat(open_tree_fd2, DIR1, 1000, 1000, AT_EMPTY_PATH))
2502                         die("failure: fchownat");
2503
2504                 if (fchownat(open_tree_fd1, FILE1, 1000, 1000, 0))
2505                         die("failure: fchownat");
2506                 if (fchownat(open_tree_fd1, FILE2, 1000, 1000, 0))
2507                         die("failure: fchownat");
2508                 if (fchownat(open_tree_fd1, HARDLINK1, 1000, 1000, 0))
2509                         die("failure: fchownat");
2510                 if (fchownat(open_tree_fd1, CHRDEV1, 1000, 1000, 0))
2511                         die("failure: fchownat");
2512                 if (fchownat(open_tree_fd1, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2513                         die("failure: fchownat");
2514                 if (fchownat(open_tree_fd1, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2515                         die("failure: fchownat");
2516                 if (fchownat(open_tree_fd1, DIR1, 1000, 1000, AT_EMPTY_PATH))
2517                         die("failure: fchownat");
2518
2519                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2520                         die("failure: expected_uid_gid");
2521                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2522                         die("failure: expected_uid_gid");
2523                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2524                         die("failure: expected_uid_gid");
2525                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2526                         die("failure: expected_uid_gid");
2527                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2528                         die("failure: expected_uid_gid");
2529                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2530                         die("failure: expected_uid_gid");
2531                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2532                         die("failure: expected_uid_gid");
2533
2534                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, t_overflowuid, t_overflowgid))
2535                         die("failure: expected_uid_gid");
2536                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, t_overflowuid, t_overflowgid))
2537                         die("failure: expected_uid_gid");
2538                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2539                         die("failure: expected_uid_gid");
2540                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2541                         die("failure: expected_uid_gid");
2542                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2543                         die("failure: expected_uid_gid");
2544                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2545                         die("failure: expected_uid_gid");
2546                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, t_overflowuid, t_overflowgid))
2547                         die("failure: expected_uid_gid");
2548
2549                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 1000, 1000))
2550                         die("failure: expected_uid_gid");
2551                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 1000, 1000))
2552                         die("failure: expected_uid_gid");
2553                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 1000, 1000))
2554                         die("failure: expected_uid_gid");
2555                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 1000, 1000))
2556                         die("failure: expected_uid_gid");
2557                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2558                         die("failure: expected_uid_gid");
2559                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 1000, 1000))
2560                         die("failure: expected_uid_gid");
2561                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 1000, 1000))
2562                         die("failure: expected_uid_gid");
2563
2564                 exit(EXIT_SUCCESS);
2565         }
2566
2567         if (wait_for_pid(pid))
2568                 goto out;
2569
2570         /* Check ownership through original mount. */
2571         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 1000, 1000)) {
2572                 log_stderr("failure: expected_uid_gid");
2573                 goto out;
2574         }
2575         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 1000, 1000)) {
2576                 log_stderr("failure: expected_uid_gid");
2577                 goto out;
2578         }
2579         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 1000, 1000)) {
2580                 log_stderr("failure: expected_uid_gid");
2581                 goto out;
2582         }
2583         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 1000, 1000)) {
2584                 log_stderr("failure: expected_uid_gid");
2585                 goto out;
2586         }
2587         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2588                 log_stderr("failure: expected_uid_gid");
2589                 goto out;
2590         }
2591         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 1000, 1000)) {
2592                 log_stderr("failure: expected_uid_gid");
2593                 goto out;
2594         }
2595         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 1000, 1000)) {
2596                 log_stderr("failure: expected_uid_gid");
2597                 goto out;
2598         }
2599
2600         /* Check ownership through first idmapped mount. */
2601         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 11000, 11000)) {
2602                 log_stderr("failure: expected_uid_gid");
2603                 goto out;
2604         }
2605         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 11000, 11000)) {
2606                 log_stderr("failure: expected_uid_gid");
2607                 goto out;
2608         }
2609         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 11000, 11000)) {
2610                 log_stderr("failure: expected_uid_gid");
2611                 goto out;
2612         }
2613         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 11000, 11000)) {
2614                 log_stderr("failure: expected_uid_gid");
2615                 goto out;
2616         }
2617         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2618                 log_stderr("failure: expected_uid_gid");
2619                 goto out;
2620         }
2621         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 11000, 11000)) {
2622                 log_stderr("failure: expected_uid_gid");
2623                 goto out;
2624         }
2625         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 11000, 11000)) {
2626                 log_stderr("failure: expected_uid_gid");
2627                 goto out;
2628         }
2629
2630         /* Check ownership through second idmapped mount. */
2631         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 31000, 31000)) {
2632                 log_stderr("failure: expected_uid_gid");
2633                 goto out;
2634         }
2635         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 31000, 31000)) {
2636                 log_stderr("failure: expected_uid_gid");
2637                 goto out;
2638         }
2639         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 31000, 31000)) {
2640                 log_stderr("failure: expected_uid_gid");
2641                 goto out;
2642         }
2643         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 31000, 31000)) {
2644                 log_stderr("failure: expected_uid_gid");
2645                 goto out;
2646         }
2647         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2648                 log_stderr("failure: expected_uid_gid");
2649                 goto out;
2650         }
2651         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 31000, 31000)) {
2652                 log_stderr("failure: expected_uid_gid");
2653                 goto out;
2654         }
2655         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 31000, 31000)) {
2656                 log_stderr("failure: expected_uid_gid");
2657                 goto out;
2658         }
2659
2660         pid = fork();
2661         if (pid < 0) {
2662                 log_stderr("failure: fork");
2663                 goto out;
2664         }
2665         if (pid == 0) {
2666                 if (!switch_userns(attr2.userns_fd, 0, 0, false))
2667                         die("failure: switch_userns");
2668
2669                 if (!fchownat(t_dir1_fd, FILE1, 0, 0, 0))
2670                         die("failure: fchownat");
2671                 if (!fchownat(t_dir1_fd, FILE2, 0, 0, 0))
2672                         die("failure: fchownat");
2673                 if (!fchownat(t_dir1_fd, HARDLINK1, 0, 0, 0))
2674                         die("failure: fchownat");
2675                 if (!fchownat(t_dir1_fd, CHRDEV1, 0, 0, 0))
2676                         die("failure: fchownat");
2677                 if (!fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2678                         die("failure: fchownat");
2679                 if (!fchownat(t_dir1_fd, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2680                         die("failure: fchownat");
2681                 if (!fchownat(t_dir1_fd, DIR1, 0, 0, AT_EMPTY_PATH))
2682                         die("failure: fchownat");
2683
2684                 if (!fchownat(open_tree_fd1, FILE1, 0, 0, 0))
2685                         die("failure: fchownat");
2686                 if (!fchownat(open_tree_fd1, FILE2, 0, 0, 0))
2687                         die("failure: fchownat");
2688                 if (!fchownat(open_tree_fd1, HARDLINK1, 0, 0, 0))
2689                         die("failure: fchownat");
2690                 if (!fchownat(open_tree_fd1, CHRDEV1, 0, 0, 0))
2691                         die("failure: fchownat");
2692                 if (!fchownat(open_tree_fd1, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2693                         die("failure: fchownat");
2694                 if (!fchownat(open_tree_fd1, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2695                         die("failure: fchownat");
2696                 if (!fchownat(open_tree_fd1, DIR1, 0, 0, AT_EMPTY_PATH))
2697                         die("failure: fchownat");
2698
2699                 if (fchownat(open_tree_fd2, FILE1, 0, 0, 0))
2700                         die("failure: fchownat");
2701                 if (fchownat(open_tree_fd2, FILE2, 0, 0, 0))
2702                         die("failure: fchownat");
2703                 if (fchownat(open_tree_fd2, HARDLINK1, 0, 0, 0))
2704                         die("failure: fchownat");
2705                 if (fchownat(open_tree_fd2, CHRDEV1, 0, 0, 0))
2706                         die("failure: fchownat");
2707                 if (!fchownat(open_tree_fd2, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2708                         die("failure: fchownat");
2709                 if (fchownat(open_tree_fd2, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2710                         die("failure: fchownat");
2711                 if (fchownat(open_tree_fd2, DIR1, 0, 0, AT_EMPTY_PATH))
2712                         die("failure: fchownat");
2713
2714                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2715                         die("failure: expected_uid_gid");
2716                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2717                         die("failure: expected_uid_gid");
2718                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2719                         die("failure: expected_uid_gid");
2720                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2721                         die("failure: expected_uid_gid");
2722                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2723                         die("failure: expected_uid_gid");
2724                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2725                         die("failure: expected_uid_gid");
2726                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2727                         die("failure: expected_uid_gid");
2728
2729                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, t_overflowuid, t_overflowgid))
2730                         die("failure: expected_uid_gid");
2731                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, t_overflowuid, t_overflowgid))
2732                         die("failure: expected_uid_gid");
2733                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2734                         die("failure: expected_uid_gid");
2735                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2736                         die("failure: expected_uid_gid");
2737                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2738                         die("failure: expected_uid_gid");
2739                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2740                         die("failure: expected_uid_gid");
2741                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, t_overflowuid, t_overflowgid))
2742                         die("failure: expected_uid_gid");
2743
2744                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 0, 0))
2745                         die("failure: expected_uid_gid");
2746                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 0, 0))
2747                         die("failure: expected_uid_gid");
2748                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 0, 0))
2749                         die("failure: expected_uid_gid");
2750                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 0, 0))
2751                         die("failure: expected_uid_gid");
2752                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2753                         die("failure: expected_uid_gid");
2754                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 0, 0))
2755                         die("failure: expected_uid_gid");
2756                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 0, 0))
2757                         die("failure: expected_uid_gid");
2758
2759                 exit(EXIT_SUCCESS);
2760         }
2761
2762         if (wait_for_pid(pid))
2763                 goto out;
2764
2765         /* Check ownership through original mount. */
2766         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2767                 log_stderr("failure: expected_uid_gid");
2768                 goto out;
2769         }
2770         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2771                 log_stderr("failure: expected_uid_gid");
2772                 goto out;
2773         }
2774         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2775                 log_stderr("failure: expected_uid_gid");
2776                 goto out;
2777         }
2778         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2779                 log_stderr("failure: expected_uid_gid");
2780                 goto out;
2781         }
2782         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2783                 log_stderr("failure: expected_uid_gid");
2784                 goto out;
2785         }
2786         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2787                 log_stderr("failure: expected_uid_gid");
2788                 goto out;
2789         }
2790         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2791                 log_stderr("failure: expected_uid_gid");
2792                 goto out;
2793         }
2794
2795         /* Check ownership through first idmapped mount. */
2796         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2797                 log_stderr("failure: expected_uid_gid");
2798                 goto out;
2799         }
2800         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2801                 log_stderr("failure: expected_uid_gid");
2802                 goto out;
2803         }
2804         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2805                 log_stderr("failure: expected_uid_gid");
2806                 goto out;
2807         }
2808         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2809                 log_stderr("failure: expected_uid_gid");
2810                 goto out;
2811         }
2812         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2813                 log_stderr("failure: expected_uid_gid");
2814                 goto out;
2815         }
2816         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2817                 log_stderr("failure: expected_uid_gid");
2818                 goto out;
2819         }
2820         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2821                 log_stderr("failure: expected_uid_gid");
2822                 goto out;
2823         }
2824
2825         /* Check ownership through second idmapped mount. */
2826         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2827                 log_stderr("failure: expected_uid_gid");
2828                 goto out;
2829         }
2830         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2831                 log_stderr("failure: expected_uid_gid");
2832                 goto out;
2833         }
2834         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2835                 log_stderr("failure: expected_uid_gid");
2836                 goto out;
2837         }
2838         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2839                 log_stderr("failure: expected_uid_gid");
2840                 goto out;
2841         }
2842         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2843                 log_stderr("failure: expected_uid_gid");
2844                 goto out;
2845         }
2846         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2847                 log_stderr("failure: expected_uid_gid");
2848                 goto out;
2849         }
2850         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2851                 log_stderr("failure: expected_uid_gid");
2852                 goto out;
2853         }
2854
2855         fret = 0;
2856         log_debug("Ran test");
2857 out:
2858         safe_close(attr1.userns_fd);
2859         safe_close(attr2.userns_fd);
2860         safe_close(file1_fd);
2861         safe_close(open_tree_fd1);
2862         safe_close(open_tree_fd2);
2863
2864         return fret;
2865 }
2866
2867 static int fscaps(void)
2868 {
2869         int fret = -1;
2870         int file1_fd = -EBADF;
2871         struct mount_attr attr = {
2872                 .attr_set = MOUNT_ATTR_IDMAP,
2873         };
2874         pid_t pid;
2875
2876         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2877         if (file1_fd < 0) {
2878                 log_stderr("failure: openat");
2879                 goto out;
2880         }
2881
2882         /* Skip if vfs caps are unsupported. */
2883         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
2884                 return 0;
2885
2886         /* Changing mount properties on a detached mount. */
2887         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2888         if (attr.userns_fd < 0) {
2889                 log_stderr("failure: get_userns_fd");
2890                 goto out;
2891         }
2892
2893         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
2894                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2895                 goto out;
2896         }
2897
2898         pid = fork();
2899         if (pid < 0) {
2900                 log_stderr("failure: fork");
2901                 goto out;
2902         }
2903         if (pid == 0) {
2904                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2905                         die("failure: switch_userns");
2906
2907                 /*
2908                  * On kernels before 5.12 this would succeed and return the
2909                  * unconverted caps. Then - for whatever reason - this behavior
2910                  * got changed and since 5.12 EOVERFLOW is returned when the
2911                  * rootid stored alongside the vfs caps does not map to uid 0 in
2912                  * the caller's user namespace.
2913                  */
2914                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
2915                         die("failure: expected_dummy_vfs_caps_uid");
2916
2917                 exit(EXIT_SUCCESS);
2918         }
2919
2920         if (wait_for_pid(pid))
2921                 goto out;
2922
2923         if (fremovexattr(file1_fd, "security.capability")) {
2924                 log_stderr("failure: fremovexattr");
2925                 goto out;
2926         }
2927         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2928                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2929                 goto out;
2930         }
2931         if (errno != ENODATA) {
2932                 log_stderr("failure: errno");
2933                 goto out;
2934         }
2935
2936         if (set_dummy_vfs_caps(file1_fd, 0, 10000)) {
2937                 log_stderr("failure: set_dummy_vfs_caps");
2938                 goto out;
2939         }
2940
2941         if (!expected_dummy_vfs_caps_uid(file1_fd, 10000)) {
2942                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2943                 goto out;
2944         }
2945
2946         pid = fork();
2947         if (pid < 0) {
2948                 log_stderr("failure: fork");
2949                 goto out;
2950         }
2951         if (pid == 0) {
2952                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2953                         die("failure: switch_userns");
2954
2955                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0))
2956                         die("failure: expected_dummy_vfs_caps_uid");
2957
2958                 exit(EXIT_SUCCESS);
2959         }
2960
2961         if (wait_for_pid(pid))
2962                 goto out;
2963
2964         if (fremovexattr(file1_fd, "security.capability")) {
2965                 log_stderr("failure: fremovexattr");
2966                 goto out;
2967         }
2968         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2969                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2970                 goto out;
2971         }
2972         if (errno != ENODATA) {
2973                 log_stderr("failure: errno");
2974                 goto out;
2975         }
2976
2977         fret = 0;
2978         log_debug("Ran test");
2979 out:
2980         safe_close(attr.userns_fd);
2981         safe_close(file1_fd);
2982
2983         return fret;
2984 }
2985
2986 static int fscaps_idmapped_mounts(void)
2987 {
2988         int fret = -1;
2989         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
2990         struct mount_attr attr = {
2991                 .attr_set = MOUNT_ATTR_IDMAP,
2992         };
2993         pid_t pid;
2994
2995         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2996         if (file1_fd < 0) {
2997                 log_stderr("failure: openat");
2998                 goto out;
2999         }
3000
3001         /* Skip if vfs caps are unsupported. */
3002         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3003                 return 0;
3004
3005         if (fremovexattr(file1_fd, "security.capability")) {
3006                 log_stderr("failure: fremovexattr");
3007                 goto out;
3008         }
3009
3010         /* Changing mount properties on a detached mount. */
3011         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3012         if (attr.userns_fd < 0) {
3013                 log_stderr("failure: get_userns_fd");
3014                 goto out;
3015         }
3016
3017         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3018                                      AT_EMPTY_PATH |
3019                                      AT_NO_AUTOMOUNT |
3020                                      AT_SYMLINK_NOFOLLOW |
3021                                      OPEN_TREE_CLOEXEC |
3022                                      OPEN_TREE_CLONE);
3023         if (open_tree_fd < 0) {
3024                 log_stderr("failure: sys_open_tree");
3025                 goto out;
3026         }
3027
3028         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3029                 log_stderr("failure: sys_mount_setattr");
3030                 goto out;
3031         }
3032
3033         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3034         if (file1_fd2 < 0) {
3035                 log_stderr("failure: openat");
3036                 goto out;
3037         }
3038
3039         if (!set_dummy_vfs_caps(file1_fd2, 0, 1000)) {
3040                 log_stderr("failure: set_dummy_vfs_caps");
3041                 goto out;
3042         }
3043
3044         if (set_dummy_vfs_caps(file1_fd2, 0, 10000)) {
3045                 log_stderr("failure: set_dummy_vfs_caps");
3046                 goto out;
3047         }
3048
3049         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3050                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3051                 goto out;
3052         }
3053
3054         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3055                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3056                 goto out;
3057         }
3058
3059         pid = fork();
3060         if (pid < 0) {
3061                 log_stderr("failure: fork");
3062                 goto out;
3063         }
3064         if (pid == 0) {
3065                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3066                         die("failure: switch_userns");
3067
3068                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3069                         die("failure: expected_dummy_vfs_caps_uid");
3070
3071                 exit(EXIT_SUCCESS);
3072         }
3073
3074         if (wait_for_pid(pid))
3075                 goto out;
3076
3077         if (fremovexattr(file1_fd2, "security.capability")) {
3078                 log_stderr("failure: fremovexattr");
3079                 goto out;
3080         }
3081         if (expected_dummy_vfs_caps_uid(file1_fd2, -1)) {
3082                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3083                 goto out;
3084         }
3085         if (errno != ENODATA) {
3086                 log_stderr("failure: errno");
3087                 goto out;
3088         }
3089
3090         if (set_dummy_vfs_caps(file1_fd2, 0, 12000)) {
3091                 log_stderr("failure: set_dummy_vfs_caps");
3092                 goto out;
3093         }
3094
3095         if (!expected_dummy_vfs_caps_uid(file1_fd2, 12000)) {
3096                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3097                 goto out;
3098         }
3099
3100         if (!expected_dummy_vfs_caps_uid(file1_fd, 2000)) {
3101                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3102                 goto out;
3103         }
3104
3105         pid = fork();
3106         if (pid < 0) {
3107                 log_stderr("failure: fork");
3108                 goto out;
3109         }
3110         if (pid == 0) {
3111                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3112                         die("failure: switch_userns");
3113
3114                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 2000))
3115                         die("failure: expected_dummy_vfs_caps_uid");
3116
3117                 exit(EXIT_SUCCESS);
3118         }
3119
3120         if (wait_for_pid(pid))
3121                 goto out;
3122
3123         fret = 0;
3124         log_debug("Ran test");
3125 out:
3126         safe_close(attr.userns_fd);
3127         safe_close(file1_fd);
3128         safe_close(file1_fd2);
3129         safe_close(open_tree_fd);
3130
3131         return fret;
3132 }
3133
3134 static int fscaps_idmapped_mounts_in_userns(void)
3135 {
3136         int fret = -1;
3137         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3138         struct mount_attr attr = {
3139                 .attr_set = MOUNT_ATTR_IDMAP,
3140         };
3141         pid_t pid;
3142
3143         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3144         if (file1_fd < 0) {
3145                 log_stderr("failure: openat");
3146                 goto out;
3147         }
3148
3149         /* Skip if vfs caps are unsupported. */
3150         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3151                 return 0;
3152
3153         if (fremovexattr(file1_fd, "security.capability")) {
3154                 log_stderr("failure: fremovexattr");
3155                 goto out;
3156         }
3157
3158         /* Changing mount properties on a detached mount. */
3159         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3160         if (attr.userns_fd < 0) {
3161                 log_stderr("failure: get_userns_fd");
3162                 goto out;
3163         }
3164
3165         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3166                                      AT_EMPTY_PATH |
3167                                      AT_NO_AUTOMOUNT |
3168                                      AT_SYMLINK_NOFOLLOW |
3169                                      OPEN_TREE_CLOEXEC |
3170                                      OPEN_TREE_CLONE);
3171         if (open_tree_fd < 0) {
3172                 log_stderr("failure: sys_open_tree");
3173                 goto out;
3174         }
3175
3176         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3177                 log_stderr("failure: sys_mount_setattr");
3178                 goto out;
3179         }
3180
3181         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3182         if (file1_fd2 < 0) {
3183                 log_stderr("failure: openat");
3184                 goto out;
3185         }
3186
3187         pid = fork();
3188         if (pid < 0) {
3189                 log_stderr("failure: fork");
3190                 goto out;
3191         }
3192         if (pid == 0) {
3193                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3194                         die("failure: switch_userns");
3195
3196                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3197                         die("failure: expected_dummy_vfs_caps_uid");
3198                 if (errno != ENODATA)
3199                         die("failure: errno");
3200
3201                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3202                         die("failure: set_dummy_vfs_caps");
3203
3204                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3205                         die("failure: expected_dummy_vfs_caps_uid");
3206
3207                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
3208                         die("failure: expected_dummy_vfs_caps_uid");
3209
3210                 exit(EXIT_SUCCESS);
3211         }
3212
3213         if (wait_for_pid(pid))
3214                 goto out;
3215
3216         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
3217                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3218                 goto out;
3219         }
3220
3221         fret = 0;
3222         log_debug("Ran test");
3223 out:
3224         safe_close(attr.userns_fd);
3225         safe_close(file1_fd);
3226         safe_close(file1_fd2);
3227         safe_close(open_tree_fd);
3228
3229         return fret;
3230 }
3231
3232 static int fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns(void)
3233 {
3234         int fret = -1;
3235         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3236         struct mount_attr attr = {
3237                 .attr_set = MOUNT_ATTR_IDMAP,
3238         };
3239         pid_t pid;
3240
3241         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3242         if (file1_fd < 0) {
3243                 log_stderr("failure: openat");
3244                 goto out;
3245         }
3246
3247         /* Skip if vfs caps are unsupported. */
3248         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3249                 return 0;
3250
3251         if (fremovexattr(file1_fd, "security.capability")) {
3252                 log_stderr("failure: fremovexattr");
3253                 goto out;
3254         }
3255         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
3256                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3257                 goto out;
3258         }
3259         if (errno != ENODATA) {
3260                 log_stderr("failure: errno");
3261                 goto out;
3262         }
3263
3264         /* Changing mount properties on a detached mount. */
3265         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3266         if (attr.userns_fd < 0) {
3267                 log_stderr("failure: get_userns_fd");
3268                 goto out;
3269         }
3270
3271         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3272                                      AT_EMPTY_PATH |
3273                                      AT_NO_AUTOMOUNT |
3274                                      AT_SYMLINK_NOFOLLOW |
3275                                      OPEN_TREE_CLOEXEC |
3276                                      OPEN_TREE_CLONE);
3277         if (open_tree_fd < 0) {
3278                 log_stderr("failure: sys_open_tree");
3279                 goto out;
3280         }
3281
3282         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3283                 log_stderr("failure: sys_mount_setattr");
3284                 goto out;
3285         }
3286
3287         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3288         if (file1_fd2 < 0) {
3289                 log_stderr("failure: openat");
3290                 goto out;
3291         }
3292
3293         /*
3294          * Verify we can set an v3 fscap for real root this was regressed at
3295          * some point. Make sure this doesn't happen again!
3296          */
3297         pid = fork();
3298         if (pid < 0) {
3299                 log_stderr("failure: fork");
3300                 goto out;
3301         }
3302         if (pid == 0) {
3303                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3304                         die("failure: switch_userns");
3305
3306                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3307                         die("failure: expected_dummy_vfs_caps_uid");
3308                 if (errno != ENODATA)
3309                         die("failure: errno");
3310
3311                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3312                         die("failure: set_dummy_vfs_caps");
3313
3314                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3315                         die("failure: expected_dummy_vfs_caps_uid");
3316
3317                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0) && errno != EOVERFLOW)
3318                         die("failure: expected_dummy_vfs_caps_uid");
3319
3320                 exit(EXIT_SUCCESS);
3321         }
3322
3323         if (wait_for_pid(pid))
3324                 goto out;
3325
3326         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3327                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3328                 goto out;
3329         }
3330
3331         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3332                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3333                 goto out;
3334         }
3335
3336         fret = 0;
3337         log_debug("Ran test");
3338 out:
3339         safe_close(attr.userns_fd);
3340         safe_close(file1_fd);
3341         safe_close(file1_fd2);
3342         safe_close(open_tree_fd);
3343
3344         return fret;
3345 }
3346
3347 static int fscaps_idmapped_mounts_in_userns_separate_userns(void)
3348 {
3349         int fret = -1;
3350         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3351         struct mount_attr attr = {
3352                 .attr_set = MOUNT_ATTR_IDMAP,
3353         };
3354         pid_t pid;
3355
3356         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3357         if (file1_fd < 0) {
3358                 log_stderr("failure: openat");
3359                 goto out;
3360         }
3361
3362         /* Skip if vfs caps are unsupported. */
3363         if (set_dummy_vfs_caps(file1_fd, 0, 1000)) {
3364                 log_stderr("failure: set_dummy_vfs_caps");
3365                 goto out;
3366         }
3367
3368         if (fremovexattr(file1_fd, "security.capability")) {
3369                 log_stderr("failure: fremovexattr");
3370                 goto out;
3371         }
3372
3373         /* change ownership of all files to uid 0 */
3374         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
3375                 log_stderr("failure: chown_r");
3376                 goto out;
3377         }
3378
3379         /* Changing mount properties on a detached mount. */
3380         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
3381         if (attr.userns_fd < 0) {
3382                 log_stderr("failure: get_userns_fd");
3383                 goto out;
3384         }
3385
3386         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3387                                      AT_EMPTY_PATH |
3388                                      AT_NO_AUTOMOUNT |
3389                                      AT_SYMLINK_NOFOLLOW |
3390                                      OPEN_TREE_CLOEXEC |
3391                                      OPEN_TREE_CLONE);
3392         if (open_tree_fd < 0) {
3393                 log_stderr("failure: sys_open_tree");
3394                 goto out;
3395         }
3396
3397         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3398                 log_stderr("failure: sys_mount_setattr");
3399                 goto out;
3400         }
3401
3402         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3403         if (file1_fd2 < 0) {
3404                 log_stderr("failure: openat");
3405                 goto out;
3406         }
3407
3408         pid = fork();
3409         if (pid < 0) {
3410                 log_stderr("failure: fork");
3411                 goto out;
3412         }
3413         if (pid == 0) {
3414                 int userns_fd;
3415
3416                 userns_fd = get_userns_fd(0, 10000, 10000);
3417                 if (userns_fd < 0)
3418                         die("failure: get_userns_fd");
3419
3420                 if (!switch_userns(userns_fd, 0, 0, false))
3421                         die("failure: switch_userns");
3422
3423                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3424                         die("failure: set fscaps");
3425
3426                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3427                         die("failure: expected_dummy_vfs_caps_uid");
3428
3429                 if (!expected_dummy_vfs_caps_uid(file1_fd, 20000) && errno != EOVERFLOW)
3430                         die("failure: expected_dummy_vfs_caps_uid");
3431
3432                 exit(EXIT_SUCCESS);
3433         }
3434
3435         if (wait_for_pid(pid))
3436                 goto out;
3437
3438         if (!expected_dummy_vfs_caps_uid(file1_fd, 20000)) {
3439                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3440                 goto out;
3441         }
3442
3443         pid = fork();
3444         if (pid < 0) {
3445                 log_stderr("failure: fork");
3446                 goto out;
3447         }
3448         if (pid == 0) {
3449                 int userns_fd;
3450
3451                 userns_fd = get_userns_fd(0, 10000, 10000);
3452                 if (userns_fd < 0)
3453                         die("failure: get_userns_fd");
3454
3455                 if (!switch_userns(userns_fd, 0, 0, false))
3456                         die("failure: switch_userns");
3457
3458                 if (fremovexattr(file1_fd2, "security.capability"))
3459                         die("failure: fremovexattr");
3460                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3461                         die("failure: expected_dummy_vfs_caps_uid");
3462                 if (errno != ENODATA)
3463                         die("failure: errno");
3464
3465                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3466                         die("failure: set_dummy_vfs_caps");
3467
3468                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3469                         die("failure: expected_dummy_vfs_caps_uid");
3470
3471                 if (!expected_dummy_vfs_caps_uid(file1_fd, 21000) && errno != EOVERFLOW)
3472                         die("failure: expected_dummy_vfs_caps_uid");
3473
3474                 exit(EXIT_SUCCESS);
3475         }
3476
3477         if (wait_for_pid(pid))
3478                 goto out;
3479
3480         if (!expected_dummy_vfs_caps_uid(file1_fd, 21000)) {
3481                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3482                 goto out;
3483         }
3484
3485         fret = 0;
3486         log_debug("Ran test");
3487 out:
3488         safe_close(attr.userns_fd);
3489         safe_close(file1_fd);
3490         safe_close(file1_fd2);
3491         safe_close(open_tree_fd);
3492
3493         return fret;
3494 }
3495
3496 /* Validate that when the IDMAP_MOUNT_TEST_RUN_SETID environment variable is set
3497  * to 1 that we are executed with setid privileges and if set to 0 we are not.
3498  * If the env variable isn't set the tests are not run.
3499  */
3500 static void __attribute__((constructor)) setuid_rexec(void)
3501 {
3502         const char *expected_euid_str, *expected_egid_str, *rexec;
3503
3504         rexec = getenv("IDMAP_MOUNT_TEST_RUN_SETID");
3505         /* This is a regular test-suite run. */
3506         if (!rexec)
3507                 return;
3508
3509         expected_euid_str = getenv("EXPECTED_EUID");
3510         expected_egid_str = getenv("EXPECTED_EGID");
3511
3512         if (expected_euid_str && expected_egid_str) {
3513                 uid_t expected_euid;
3514                 gid_t expected_egid;
3515
3516                 expected_euid = atoi(expected_euid_str);
3517                 expected_egid = atoi(expected_egid_str);
3518
3519                 if (strcmp(rexec, "1") == 0) {
3520                         /* we're expecting to run setid */
3521                         if ((getuid() != geteuid()) && (expected_euid == geteuid()) &&
3522                             (getgid() != getegid()) && (expected_egid == getegid()))
3523                                 exit(EXIT_SUCCESS);
3524                 } else if (strcmp(rexec, "0") == 0) {
3525                         /* we're expecting to not run setid */
3526                         if ((getuid() == geteuid()) && (expected_euid == geteuid()) &&
3527                             (getgid() == getegid()) && (expected_egid == getegid()))
3528                                 exit(EXIT_SUCCESS);
3529                         else
3530                                 die("failure: non-setid");
3531                 }
3532         }
3533
3534         exit(EXIT_FAILURE);
3535 }
3536
3537 /* Validate that setid transitions are handled correctly. */
3538 static int setid_binaries(void)
3539 {
3540         int fret = -1;
3541         int file1_fd = -EBADF, exec_fd = -EBADF;
3542         pid_t pid;
3543
3544         /* create a file to be used as setuid binary */
3545         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3546         if (file1_fd < 0) {
3547                 log_stderr("failure: openat");
3548                 goto out;
3549         }
3550
3551         /* open our own executable */
3552         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3553         if (exec_fd < 0) {
3554                 log_stderr("failure: openat");
3555                 goto out;
3556         }
3557
3558         /* copy our own executable into the file we created */
3559         if (fd_to_fd(exec_fd, file1_fd)) {
3560                 log_stderr("failure: fd_to_fd");
3561                 goto out;
3562         }
3563
3564         /* chown the file to the uid and gid we want to assume */
3565         if (fchown(file1_fd, 5000, 5000)) {
3566                 log_stderr("failure: fchown");
3567                 goto out;
3568         }
3569
3570         /* set the setid bits and grant execute permissions to the group */
3571         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3572                 log_stderr("failure: fchmod");
3573                 goto out;
3574         }
3575
3576         /* Verify that the sid bits got raised. */
3577         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3578                 log_stderr("failure: is_setid");
3579                 goto out;
3580         }
3581
3582         safe_close(exec_fd);
3583         safe_close(file1_fd);
3584
3585         /* Verify we run setid binary as uid and gid 5000 from the original
3586          * mount.
3587          */
3588         pid = fork();
3589         if (pid < 0) {
3590                 log_stderr("failure: fork");
3591                 goto out;
3592         }
3593         if (pid == 0) {
3594                 static char *envp[] = {
3595                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3596                         "EXPECTED_EUID=5000",
3597                         "EXPECTED_EGID=5000",
3598                         NULL,
3599                 };
3600                 static char *argv[] = {
3601                         NULL,
3602                 };
3603
3604                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 5000, 5000))
3605                         die("failure: expected_uid_gid");
3606
3607                 sys_execveat(t_dir1_fd, FILE1, argv, envp, 0);
3608                 die("failure: sys_execveat");
3609
3610                 exit(EXIT_FAILURE);
3611         }
3612         if (wait_for_pid(pid))
3613                 goto out;
3614
3615         fret = 0;
3616         log_debug("Ran test");
3617 out:
3618
3619         return fret;
3620 }
3621
3622 /* Validate that setid transitions are handled correctly on idmapped mounts. */
3623 static int setid_binaries_idmapped_mounts(void)
3624 {
3625         int fret = -1;
3626         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3627         struct mount_attr attr = {
3628                 .attr_set = MOUNT_ATTR_IDMAP,
3629         };
3630         pid_t pid;
3631
3632         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3633                 log_stderr("failure: mkdirat");
3634                 goto out;
3635         }
3636
3637         /* create a file to be used as setuid binary */
3638         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3639         if (file1_fd < 0) {
3640                 log_stderr("failure: openat");
3641                 goto out;
3642         }
3643
3644         /* open our own executable */
3645         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3646         if (exec_fd < 0) {
3647                 log_stderr("failure:openat ");
3648                 goto out;
3649         }
3650
3651         /* copy our own executable into the file we created */
3652         if (fd_to_fd(exec_fd, file1_fd)) {
3653                 log_stderr("failure: fd_to_fd");
3654                 goto out;
3655         }
3656
3657         /* chown the file to the uid and gid we want to assume */
3658         if (fchown(file1_fd, 5000, 5000)) {
3659                 log_stderr("failure: fchown");
3660                 goto out;
3661         }
3662
3663         /* set the setid bits and grant execute permissions to the group */
3664         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3665                 log_stderr("failure: fchmod");
3666                 goto out;
3667         }
3668
3669         /* Verify that the sid bits got raised. */
3670         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3671                 log_stderr("failure: is_setid");
3672                 goto out;
3673         }
3674
3675         safe_close(exec_fd);
3676         safe_close(file1_fd);
3677
3678         /* Changing mount properties on a detached mount. */
3679         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3680         if (attr.userns_fd < 0) {
3681                 log_stderr("failure: get_userns_fd");
3682                 goto out;
3683         }
3684
3685         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3686                                      AT_EMPTY_PATH |
3687                                      AT_NO_AUTOMOUNT |
3688                                      AT_SYMLINK_NOFOLLOW |
3689                                      OPEN_TREE_CLOEXEC |
3690                                      OPEN_TREE_CLONE);
3691         if (open_tree_fd < 0) {
3692                 log_stderr("failure: sys_open_tree");
3693                 goto out;
3694         }
3695
3696         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3697                 log_stderr("failure: sys_mount_setattr");
3698                 goto out;
3699         }
3700
3701         /* A detached mount will have an anonymous mount namespace attached to
3702          * it. This means that we can't execute setid binaries on a detached
3703          * mount because the mnt_may_suid() helper will fail the check_mount()
3704          * part of its check which compares the caller's mount namespace to the
3705          * detached mount's mount namespace. Since by definition an anonymous
3706          * mount namespace is not equale to any mount namespace currently in
3707          * use this can't work. So attach the mount to the filesystem first
3708          * before performing this check.
3709          */
3710         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3711                 log_stderr("failure: sys_move_mount");
3712                 goto out;
3713         }
3714
3715         /* Verify we run setid binary as uid and gid 10000 from idmapped mount mount. */
3716         pid = fork();
3717         if (pid < 0) {
3718                 log_stderr("failure: fork");
3719                 goto out;
3720         }
3721         if (pid == 0) {
3722                 static char *envp[] = {
3723                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3724                         "EXPECTED_EUID=15000",
3725                         "EXPECTED_EGID=15000",
3726                         NULL,
3727                 };
3728                 static char *argv[] = {
3729                         NULL,
3730                 };
3731
3732                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 15000, 15000))
3733                         die("failure: expected_uid_gid");
3734
3735                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3736                 die("failure: sys_execveat");
3737
3738                 exit(EXIT_FAILURE);
3739         }
3740
3741         if (wait_for_pid(pid))
3742                 goto out;
3743
3744         fret = 0;
3745         log_debug("Ran test");
3746 out:
3747         safe_close(exec_fd);
3748         safe_close(file1_fd);
3749         safe_close(open_tree_fd);
3750
3751         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
3752         sys_umount2(t_buf, MNT_DETACH);
3753         rm_r(t_mnt_fd, DIR1);
3754
3755         return fret;
3756 }
3757
3758 /* Validate that setid transitions are handled correctly on idmapped mounts
3759  * running in a user namespace where the uid and gid of the setid binary have no
3760  * mapping.
3761  */
3762 static int setid_binaries_idmapped_mounts_in_userns(void)
3763 {
3764         int fret = -1;
3765         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3766         struct mount_attr attr = {
3767                 .attr_set = MOUNT_ATTR_IDMAP,
3768         };
3769         pid_t pid;
3770
3771         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3772                 log_stderr("failure: ");
3773                 goto out;
3774         }
3775
3776         /* create a file to be used as setuid binary */
3777         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3778         if (file1_fd < 0) {
3779                 log_stderr("failure: openat");
3780                 goto out;
3781         }
3782
3783         /* open our own executable */
3784         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3785         if (exec_fd < 0) {
3786                 log_stderr("failure: openat");
3787                 goto out;
3788         }
3789
3790         /* copy our own executable into the file we created */
3791         if (fd_to_fd(exec_fd, file1_fd)) {
3792                 log_stderr("failure: fd_to_fd");
3793                 goto out;
3794         }
3795
3796         safe_close(exec_fd);
3797
3798         /* chown the file to the uid and gid we want to assume */
3799         if (fchown(file1_fd, 5000, 5000)) {
3800                 log_stderr("failure: fchown");
3801                 goto out;
3802         }
3803
3804         /* set the setid bits and grant execute permissions to the group */
3805         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3806                 log_stderr("failure: fchmod");
3807                 goto out;
3808         }
3809
3810         /* Verify that the sid bits got raised. */
3811         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3812                 log_stderr("failure: is_setid");
3813                 goto out;
3814         }
3815
3816         safe_close(file1_fd);
3817
3818         /* Changing mount properties on a detached mount. */
3819         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3820         if (attr.userns_fd < 0) {
3821                 log_stderr("failure: get_userns_fd");
3822                 goto out;
3823         }
3824
3825         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3826                                      AT_EMPTY_PATH |
3827                                      AT_NO_AUTOMOUNT |
3828                                      AT_SYMLINK_NOFOLLOW |
3829                                      OPEN_TREE_CLOEXEC |
3830                                      OPEN_TREE_CLONE);
3831         if (open_tree_fd < 0) {
3832                 log_stderr("failure: sys_open_tree");
3833                 goto out;
3834         }
3835
3836         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3837                 log_stderr("failure: sys_mount_setattr");
3838                 goto out;
3839         }
3840
3841         /* A detached mount will have an anonymous mount namespace attached to
3842          * it. This means that we can't execute setid binaries on a detached
3843          * mount because the mnt_may_suid() helper will fail the check_mount()
3844          * part of its check which compares the caller's mount namespace to the
3845          * detached mount's mount namespace. Since by definition an anonymous
3846          * mount namespace is not equale to any mount namespace currently in
3847          * use this can't work. So attach the mount to the filesystem first
3848          * before performing this check.
3849          */
3850         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3851                 log_stderr("failure: sys_move_mount");
3852                 goto out;
3853         }
3854
3855         pid = fork();
3856         if (pid < 0) {
3857                 log_stderr("failure: fork");
3858                 goto out;
3859         }
3860         if (pid == 0) {
3861                 static char *envp[] = {
3862                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3863                         "EXPECTED_EUID=5000",
3864                         "EXPECTED_EGID=5000",
3865                         NULL,
3866                 };
3867                 static char *argv[] = {
3868                         NULL,
3869                 };
3870
3871                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3872                         die("failure: switch_userns");
3873
3874                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
3875                         die("failure: expected_uid_gid");
3876
3877                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3878                 die("failure: sys_execveat");
3879
3880                 exit(EXIT_FAILURE);
3881         }
3882
3883         if (wait_for_pid(pid)) {
3884                 log_stderr("failure: wait_for_pid");
3885                 goto out;
3886         }
3887
3888         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3889         if (file1_fd < 0) {
3890                 log_stderr("failure: openat");
3891                 goto out;
3892         }
3893
3894         /* chown the file to the uid and gid we want to assume */
3895         if (fchown(file1_fd, 0, 0)) {
3896                 log_stderr("failure: fchown");
3897                 goto out;
3898         }
3899
3900         /* set the setid bits and grant execute permissions to the group */
3901         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3902                 log_stderr("failure: fchmod");
3903                 goto out;
3904         }
3905
3906         /* Verify that the sid bits got raised. */
3907         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3908                 log_stderr("failure: is_setid");
3909                 goto out;
3910         }
3911
3912         safe_close(file1_fd);
3913
3914         pid = fork();
3915         if (pid < 0) {
3916                 log_stderr("failure: fork");
3917                 goto out;
3918         }
3919         if (pid == 0) {
3920                 static char *envp[] = {
3921                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3922                         "EXPECTED_EUID=0",
3923                         "EXPECTED_EGID=0",
3924                         NULL,
3925                 };
3926                 static char *argv[] = {
3927                         NULL,
3928                 };
3929
3930                 if (!caps_supported()) {
3931                         log_debug("skip: capability library not installed");
3932                         exit(EXIT_SUCCESS);
3933                 }
3934
3935                 if (!switch_userns(attr.userns_fd, 5000, 5000, true))
3936                         die("failure: switch_userns");
3937
3938                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
3939                         die("failure: expected_uid_gid");
3940
3941                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3942                 die("failure: sys_execveat");
3943
3944                 exit(EXIT_FAILURE);
3945         }
3946
3947         if (wait_for_pid(pid)) {
3948                 log_stderr("failure: wait_for_pid");
3949                 goto out;
3950         }
3951
3952         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3953         if (file1_fd < 0) {
3954                 log_stderr("failure: openat");
3955                 goto out;
3956         }
3957
3958         /* chown the file to the uid and gid we want to assume */
3959         if (fchown(file1_fd, 30000, 30000)) {
3960                 log_stderr("failure: fchown");
3961                 goto out;
3962         }
3963
3964         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
3965                 log_stderr("failure: fchmod");
3966                 goto out;
3967         }
3968
3969         /* Verify that the sid bits got raised. */
3970         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3971                 log_stderr("failure: is_setid");
3972                 goto out;
3973         }
3974
3975         safe_close(file1_fd);
3976
3977         /* Verify that we can't assume a uid and gid of a setid binary for which
3978          * we have no mapping in our user namespace.
3979          */
3980         pid = fork();
3981         if (pid < 0) {
3982                 log_stderr("failure: fork");
3983                 goto out;
3984         }
3985         if (pid == 0) {
3986                 char expected_euid[100];
3987                 char expected_egid[100];
3988                 static char *envp[4] = {
3989                         NULL,
3990                         NULL,
3991                         NULL,
3992                         NULL,
3993                 };
3994                 static char *argv[] = {
3995                         NULL,
3996                 };
3997
3998                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3999                         die("failure: switch_userns");
4000
4001                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4002                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4003                 envp[1] = expected_euid;
4004                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4005                 envp[2] = expected_egid;
4006
4007                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4008                         die("failure: expected_uid_gid");
4009
4010                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4011                 die("failure: sys_execveat");
4012
4013                 exit(EXIT_FAILURE);
4014         }
4015
4016         if (wait_for_pid(pid)) {
4017                 log_stderr("failure: wait_for_pid");
4018                 goto out;
4019         }
4020
4021         fret = 0;
4022         log_debug("Ran test");
4023 out:
4024         safe_close(attr.userns_fd);
4025         safe_close(exec_fd);
4026         safe_close(file1_fd);
4027         safe_close(open_tree_fd);
4028
4029         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4030         sys_umount2(t_buf, MNT_DETACH);
4031         rm_r(t_mnt_fd, DIR1);
4032
4033         return fret;
4034 }
4035
4036 /* Validate that setid transitions are handled correctly on idmapped mounts
4037  * running in a user namespace where the uid and gid of the setid binary have no
4038  * mapping.
4039  */
4040 static int setid_binaries_idmapped_mounts_in_userns_separate_userns(void)
4041 {
4042         int fret = -1;
4043         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
4044         struct mount_attr attr = {
4045                 .attr_set = MOUNT_ATTR_IDMAP,
4046         };
4047         pid_t pid;
4048
4049         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
4050                 log_stderr("failure: mkdirat");
4051                 goto out;
4052         }
4053
4054         /* create a file to be used as setuid binary */
4055         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
4056         if (file1_fd < 0) {
4057                 log_stderr("failure: openat");
4058                 goto out;
4059         }
4060
4061         /* open our own executable */
4062         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
4063         if (exec_fd < 0) {
4064                 log_stderr("failure: openat");
4065                 goto out;
4066         }
4067
4068         /* copy our own executable into the file we created */
4069         if (fd_to_fd(exec_fd, file1_fd)) {
4070                 log_stderr("failure: fd_to_fd");
4071                 goto out;
4072         }
4073
4074         safe_close(exec_fd);
4075
4076         /* change ownership of all files to uid 0 */
4077         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
4078                 log_stderr("failure: chown_r");
4079                 goto out;
4080         }
4081
4082         /* chown the file to the uid and gid we want to assume */
4083         if (fchown(file1_fd, 25000, 25000)) {
4084                 log_stderr("failure: fchown");
4085                 goto out;
4086         }
4087
4088         /* set the setid bits and grant execute permissions to the group */
4089         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4090                 log_stderr("failure: fchmod");
4091                 goto out;
4092         }
4093
4094         /* Verify that the sid bits got raised. */
4095         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4096                 log_stderr("failure: is_setid");
4097                 goto out;
4098         }
4099
4100         safe_close(file1_fd);
4101
4102         /* Changing mount properties on a detached mount. */
4103         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
4104         if (attr.userns_fd < 0) {
4105                 log_stderr("failure: get_userns_fd");
4106                 goto out;
4107         }
4108
4109         open_tree_fd = sys_open_tree(t_dir1_fd, "",
4110                                      AT_EMPTY_PATH |
4111                                      AT_NO_AUTOMOUNT |
4112                                      AT_SYMLINK_NOFOLLOW |
4113                                      OPEN_TREE_CLOEXEC |
4114                                      OPEN_TREE_CLONE);
4115         if (open_tree_fd < 0) {
4116                 log_stderr("failure: sys_open_tree");
4117                 goto out;
4118         }
4119
4120         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4121                 log_stderr("failure: sys_mount_setattr");
4122                 goto out;
4123         }
4124
4125         /* A detached mount will have an anonymous mount namespace attached to
4126          * it. This means that we can't execute setid binaries on a detached
4127          * mount because the mnt_may_suid() helper will fail the check_mount()
4128          * part of its check which compares the caller's mount namespace to the
4129          * detached mount's mount namespace. Since by definition an anonymous
4130          * mount namespace is not equale to any mount namespace currently in
4131          * use this can't work. So attach the mount to the filesystem first
4132          * before performing this check.
4133          */
4134         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
4135                 log_stderr("failure: sys_move_mount");
4136                 goto out;
4137         }
4138
4139         pid = fork();
4140         if (pid < 0) {
4141                 log_stderr("failure: fork");
4142                 goto out;
4143         }
4144         if (pid == 0) {
4145                 int userns_fd;
4146                 static char *envp[] = {
4147                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4148                         "EXPECTED_EUID=5000",
4149                         "EXPECTED_EGID=5000",
4150                         NULL,
4151                 };
4152                 static char *argv[] = {
4153                         NULL,
4154                 };
4155
4156                 userns_fd = get_userns_fd(0, 10000, 10000);
4157                 if (userns_fd < 0)
4158                         die("failure: get_userns_fd");
4159
4160                 if (!switch_userns(userns_fd, 0, 0, false))
4161                         die("failure: switch_userns");
4162
4163                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
4164                         die("failure: expected_uid_gid");
4165
4166                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4167                 die("failure: sys_execveat");
4168
4169                 exit(EXIT_FAILURE);
4170         }
4171
4172         if (wait_for_pid(pid)) {
4173                 log_stderr("failure: wait_for_pid");
4174                 goto out;
4175         }
4176
4177         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4178         if (file1_fd < 0) {
4179                 log_stderr("failure: openat");
4180                 goto out;
4181         }
4182
4183         /* chown the file to the uid and gid we want to assume */
4184         if (fchown(file1_fd, 20000, 20000)) {
4185                 log_stderr("failure: fchown");
4186                 goto out;
4187         }
4188
4189         /* set the setid bits and grant execute permissions to the group */
4190         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4191                 log_stderr("failure: fchmod");
4192                 goto out;
4193         }
4194
4195         /* Verify that the sid bits got raised. */
4196         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4197                 log_stderr("failure: is_setid");
4198                 goto out;
4199         }
4200
4201         safe_close(file1_fd);
4202
4203         pid = fork();
4204         if (pid < 0) {
4205                 log_stderr("failure: fork");
4206                 goto out;
4207         }
4208         if (pid == 0) {
4209                 int userns_fd;
4210                 static char *envp[] = {
4211                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4212                         "EXPECTED_EUID=0",
4213                         "EXPECTED_EGID=0",
4214                         NULL,
4215                 };
4216                 static char *argv[] = {
4217                         NULL,
4218                 };
4219
4220                 userns_fd = get_userns_fd(0, 10000, 10000);
4221                 if (userns_fd < 0)
4222                         die("failure: get_userns_fd");
4223
4224                 if (!caps_supported()) {
4225                         log_debug("skip: capability library not installed");
4226                         exit(EXIT_SUCCESS);
4227                 }
4228
4229                 if (!switch_userns(userns_fd, 1000, 1000, true))
4230                         die("failure: switch_userns");
4231
4232                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
4233                         die("failure: expected_uid_gid");
4234
4235                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4236                 die("failure: sys_execveat");
4237
4238                 exit(EXIT_FAILURE);
4239         }
4240         if (wait_for_pid(pid)) {
4241                 log_stderr("failure: wait_for_pid");
4242                 goto out;
4243         }
4244
4245         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4246         if (file1_fd < 0) {
4247                 log_stderr("failure: openat");
4248                 goto out;
4249         }
4250
4251         /* chown the file to the uid and gid we want to assume */
4252         if (fchown(file1_fd, 0, 0)) {
4253                 log_stderr("failure: fchown");
4254                 goto out;
4255         }
4256
4257         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
4258                 log_stderr("failure: fchmod");
4259                 goto out;
4260         }
4261
4262         /* Verify that the sid bits got raised. */
4263         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4264                 log_stderr("failure: is_setid");
4265                 goto out;
4266         }
4267
4268         safe_close(file1_fd);
4269
4270         /* Verify that we can't assume a uid and gid of a setid binary for
4271          * which we have no mapping in our user namespace.
4272          */
4273         pid = fork();
4274         if (pid < 0) {
4275                 log_stderr("failure: fork");
4276                 goto out;
4277         }
4278         if (pid == 0) {
4279                 int userns_fd;
4280                 char expected_euid[100];
4281                 char expected_egid[100];
4282                 static char *envp[4] = {
4283                         NULL,
4284                         NULL,
4285                         NULL,
4286                         NULL,
4287                 };
4288                 static char *argv[] = {
4289                         NULL,
4290                 };
4291
4292                 userns_fd = get_userns_fd(0, 10000, 10000);
4293                 if (userns_fd < 0)
4294                         die("failure: get_userns_fd");
4295
4296                 if (!switch_userns(userns_fd, 0, 0, false))
4297                         die("failure: switch_userns");
4298
4299                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4300                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4301                 envp[1] = expected_euid;
4302                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4303                 envp[2] = expected_egid;
4304
4305                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4306                         die("failure: expected_uid_gid");
4307
4308                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4309                 die("failure: sys_execveat");
4310
4311                 exit(EXIT_FAILURE);
4312         }
4313         if (wait_for_pid(pid)) {
4314                 log_stderr("failure: wait_for_pid");
4315                 goto out;
4316         }
4317
4318         fret = 0;
4319         log_debug("Ran test");
4320 out:
4321         safe_close(attr.userns_fd);
4322         safe_close(exec_fd);
4323         safe_close(file1_fd);
4324         safe_close(open_tree_fd);
4325
4326         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4327         sys_umount2(t_buf, MNT_DETACH);
4328         rm_r(t_mnt_fd, DIR1);
4329
4330         return fret;
4331 }
4332
4333 static int sticky_bit_unlink(void)
4334 {
4335         int fret = -1;
4336         int dir_fd = -EBADF;
4337         pid_t pid;
4338
4339         if (!caps_supported())
4340                 return 0;
4341
4342         /* create directory */
4343         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4344                 log_stderr("failure: mkdirat");
4345                 goto out;
4346         }
4347
4348         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4349         if (dir_fd < 0) {
4350                 log_stderr("failure: openat");
4351                 goto out;
4352         }
4353
4354         if (fchown(dir_fd, 0, 0)) {
4355                 log_stderr("failure: fchown");
4356                 goto out;
4357         }
4358
4359         if (fchmod(dir_fd, 0777)) {
4360                 log_stderr("failure: fchmod");
4361                 goto out;
4362         }
4363
4364         /* create regular file via mknod */
4365         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4366                 log_stderr("failure: mknodat");
4367                 goto out;
4368         }
4369         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4370                 log_stderr("failure: fchownat");
4371                 goto out;
4372         }
4373         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4374                 log_stderr("failure: fchmodat");
4375                 goto out;
4376         }
4377
4378         /* create regular file via mknod */
4379         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4380                 log_stderr("failure: mknodat");
4381                 goto out;
4382         }
4383         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4384                 log_stderr("failure: fchownat");
4385                 goto out;
4386         }
4387         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4388                 log_stderr("failure: fchmodat");
4389                 goto out;
4390         }
4391
4392         /* The sticky bit is not set so we must be able to delete files not
4393          * owned by us.
4394          */
4395         pid = fork();
4396         if (pid < 0) {
4397                 log_stderr("failure: fork");
4398                 goto out;
4399         }
4400         if (pid == 0) {
4401                 if (!switch_ids(1000, 1000))
4402                         die("failure: switch_ids");
4403
4404                 if (unlinkat(dir_fd, FILE1, 0))
4405                         die("failure: unlinkat");
4406
4407                 if (unlinkat(dir_fd, FILE2, 0))
4408                         die("failure: unlinkat");
4409
4410                 exit(EXIT_SUCCESS);
4411         }
4412         if (wait_for_pid(pid)) {
4413                 log_stderr("failure: wait_for_pid");
4414                 goto out;
4415         }
4416
4417         /* set sticky bit */
4418         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4419                 log_stderr("failure: fchmod");
4420                 goto out;
4421         }
4422
4423         /* validate sticky bit is set */
4424         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4425                 log_stderr("failure: is_sticky");
4426                 goto out;
4427         }
4428
4429         /* create regular file via mknod */
4430         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4431                 log_stderr("failure: mknodat");
4432                 goto out;
4433         }
4434         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4435                 log_stderr("failure: fchownat");
4436                 goto out;
4437         }
4438         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4439                 log_stderr("failure: fchmodat");
4440                 goto out;
4441         }
4442
4443         /* create regular file via mknod */
4444         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4445                 log_stderr("failure: mknodat");
4446                 goto out;
4447         }
4448         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4449                 log_stderr("failure: fchownat");
4450                 goto out;
4451         }
4452         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4453                 log_stderr("failure: fchmodat");
4454                 goto out;
4455         }
4456
4457         /* The sticky bit is set so we must not be able to delete files not
4458          * owned by us.
4459          */
4460         pid = fork();
4461         if (pid < 0) {
4462                 log_stderr("failure: fork");
4463                 goto out;
4464         }
4465         if (pid == 0) {
4466                 if (!switch_ids(1000, 1000))
4467                         die("failure: switch_ids");
4468
4469                 if (!unlinkat(dir_fd, FILE1, 0))
4470                         die("failure: unlinkat");
4471                 if (errno != EPERM)
4472                         die("failure: errno");
4473
4474                 if (!unlinkat(dir_fd, FILE2, 0))
4475                         die("failure: unlinkat");
4476                 if (errno != EPERM)
4477                         die("failure: errno");
4478
4479                 exit(EXIT_SUCCESS);
4480         }
4481         if (wait_for_pid(pid)) {
4482                 log_stderr("failure: wait_for_pid");
4483                 goto out;
4484         }
4485
4486         /* The sticky bit is set and we own the files so we must be able to
4487          * delete the files now.
4488          */
4489         pid = fork();
4490         if (pid < 0) {
4491                 log_stderr("failure: fork");
4492                 goto out;
4493         }
4494         if (pid == 0) {
4495                 /* change ownership */
4496                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
4497                         die("failure: fchownat");
4498                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
4499                         die("failure: expected_uid_gid");
4500                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
4501                         die("failure: fchownat");
4502                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
4503                         die("failure: expected_uid_gid");
4504
4505                 if (!switch_ids(1000, 1000))
4506                         die("failure: switch_ids");
4507
4508                 if (unlinkat(dir_fd, FILE1, 0))
4509                         die("failure: unlinkat");
4510
4511                 if (unlinkat(dir_fd, FILE2, 0))
4512                         die("failure: unlinkat");
4513
4514                 exit(EXIT_SUCCESS);
4515         }
4516         if (wait_for_pid(pid)) {
4517                 log_stderr("failure: wait_for_pid");
4518                 goto out;
4519         }
4520
4521         /* change uid to unprivileged user */
4522         if (fchown(dir_fd, 1000, -1)) {
4523                 log_stderr("failure: fchown");
4524                 goto out;
4525         }
4526         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4527                 log_stderr("failure: fchmod");
4528                 goto out;
4529         }
4530         /* validate sticky bit is set */
4531         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4532                 log_stderr("failure: is_sticky");
4533                 goto out;
4534         }
4535
4536         /* create regular file via mknod */
4537         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4538                 log_stderr("failure: mknodat");
4539                 goto out;
4540         }
4541         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4542                 log_stderr("failure: fchownat");
4543                 goto out;
4544         }
4545         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4546                 log_stderr("failure: fchmodat");
4547                 goto out;
4548         }
4549
4550         /* create regular file via mknod */
4551         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4552                 log_stderr("failure: mknodat");
4553                 goto out;
4554         }
4555         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4556                 log_stderr("failure: fchownat");
4557                 goto out;
4558         }
4559         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4560                 log_stderr("failure: fchmodat");
4561                 goto out;
4562         }
4563
4564         /* The sticky bit is set and we own the directory so we must be able to
4565          * delete the files now.
4566          */
4567         pid = fork();
4568         if (pid < 0) {
4569                 log_stderr("failure: fork");
4570                 goto out;
4571         }
4572         if (pid == 0) {
4573                 if (!switch_ids(1000, 1000))
4574                         die("failure: switch_ids");
4575
4576                 if (unlinkat(dir_fd, FILE1, 0))
4577                         die("failure: unlinkat");
4578
4579                 if (unlinkat(dir_fd, FILE2, 0))
4580                         die("failure: unlinkat");
4581
4582                 exit(EXIT_SUCCESS);
4583         }
4584         if (wait_for_pid(pid)) {
4585                 log_stderr("failure: wait_for_pid");
4586                 goto out;
4587         }
4588
4589         fret = 0;
4590         log_debug("Ran test");
4591 out:
4592         safe_close(dir_fd);
4593
4594         return fret;
4595 }
4596
4597 static int sticky_bit_unlink_idmapped_mounts(void)
4598 {
4599         int fret = -1;
4600         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4601         struct mount_attr attr = {
4602                 .attr_set = MOUNT_ATTR_IDMAP,
4603         };
4604         pid_t pid;
4605
4606         if (!caps_supported())
4607                 return 0;
4608
4609         /* create directory */
4610         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4611                 log_stderr("failure: mkdirat");
4612                 goto out;
4613         }
4614
4615         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4616         if (dir_fd < 0) {
4617                 log_stderr("failure: openat");
4618                 goto out;
4619         }
4620         if (fchown(dir_fd, 10000, 10000)) {
4621                 log_stderr("failure: fchown");
4622                 goto out;
4623         }
4624         if (fchmod(dir_fd, 0777)) {
4625                 log_stderr("failure: fchmod");
4626                 goto out;
4627         }
4628
4629         /* create regular file via mknod */
4630         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4631                 log_stderr("failure: mknodat");
4632                 goto out;
4633         }
4634         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4635                 log_stderr("failure: fchownat");
4636                 goto out;
4637         }
4638         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4639                 log_stderr("failure: fchmodat");
4640                 goto out;
4641         }
4642
4643         /* create regular file via mknod */
4644         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4645                 log_stderr("failure: mknodat");
4646                 goto out;
4647         }
4648         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4649                 log_stderr("failure: fchownat");
4650                 goto out;
4651         }
4652         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4653                 log_stderr("failure: fchmodat");
4654                 goto out;
4655         }
4656
4657         /* Changing mount properties on a detached mount. */
4658         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
4659         if (attr.userns_fd < 0) {
4660                 log_stderr("failure: get_userns_fd");
4661                 goto out;
4662         }
4663
4664         open_tree_fd = sys_open_tree(dir_fd, "",
4665                                      AT_EMPTY_PATH |
4666                                      AT_NO_AUTOMOUNT |
4667                                      AT_SYMLINK_NOFOLLOW |
4668                                      OPEN_TREE_CLOEXEC |
4669                                      OPEN_TREE_CLONE);
4670         if (open_tree_fd < 0) {
4671                 log_stderr("failure: sys_open_tree");
4672                 goto out;
4673         }
4674
4675         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4676                 log_stderr("failure: sys_mount_setattr");
4677                 goto out;
4678         }
4679
4680         /* The sticky bit is not set so we must be able to delete files not
4681          * owned by us.
4682          */
4683         pid = fork();
4684         if (pid < 0) {
4685                 log_stderr("failure: fork");
4686                 goto out;
4687         }
4688         if (pid == 0) {
4689                 if (!switch_ids(1000, 1000))
4690                         die("failure: switch_ids");
4691
4692                 if (unlinkat(open_tree_fd, FILE1, 0))
4693                         die("failure: unlinkat");
4694
4695                 if (unlinkat(open_tree_fd, FILE2, 0))
4696                         die("failure: unlinkat");
4697
4698                 exit(EXIT_SUCCESS);
4699         }
4700         if (wait_for_pid(pid)) {
4701                 log_stderr("failure: wait_for_pid");
4702                 goto out;
4703         }
4704
4705         /* set sticky bit */
4706         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4707                 log_stderr("failure: fchmod");
4708                 goto out;
4709         }
4710
4711         /* validate sticky bit is set */
4712         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4713                 log_stderr("failure: is_sticky");
4714                 goto out;
4715         }
4716
4717         /* create regular file via mknod */
4718         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4719                 log_stderr("failure: mknodat");
4720                 goto out;
4721         }
4722         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4723                 log_stderr("failure: fchownat");
4724                 goto out;
4725         }
4726         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4727                 log_stderr("failure: fchmodat");
4728                 goto out;
4729         }
4730
4731         /* create regular file via mknod */
4732         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4733                 log_stderr("failure: mknodat");
4734                 goto out;
4735         }
4736         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4737                 log_stderr("failure: fchownat");
4738                 goto out;
4739         }
4740         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4741                 log_stderr("failure: fchmodat");
4742                 goto out;
4743         }
4744
4745         /* The sticky bit is set so we must not be able to delete files not
4746          * owned by us.
4747          */
4748         pid = fork();
4749         if (pid < 0) {
4750                 log_stderr("failure: fork");
4751                 goto out;
4752         }
4753         if (pid == 0) {
4754                 if (!switch_ids(1000, 1000))
4755                         die("failure: switch_ids");
4756
4757                 if (!unlinkat(open_tree_fd, FILE1, 0))
4758                         die("failure: unlinkat");
4759                 if (errno != EPERM)
4760                         die("failure: errno");
4761
4762                 if (!unlinkat(open_tree_fd, FILE2, 0))
4763                         die("failure: unlinkat");
4764                 if (errno != EPERM)
4765                         die("failure: errno");
4766
4767                 exit(EXIT_SUCCESS);
4768         }
4769         if (wait_for_pid(pid)) {
4770                 log_stderr("failure: wait_for_pid");
4771                 goto out;
4772         }
4773
4774         /* The sticky bit is set and we own the files so we must be able to
4775          * delete the files now.
4776          */
4777         pid = fork();
4778         if (pid < 0) {
4779                 log_stderr("failure: fork");
4780                 goto out;
4781         }
4782         if (pid == 0) {
4783                 /* change ownership */
4784                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
4785                         die("failure: fchownat");
4786                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
4787                         die("failure: expected_uid_gid");
4788                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
4789                         die("failure: fchownat");
4790                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
4791                         die("failure: expected_uid_gid");
4792
4793                 if (!switch_ids(1000, 1000))
4794                         die("failure: switch_ids");
4795
4796                 if (unlinkat(open_tree_fd, FILE1, 0))
4797                         die("failure: unlinkat");
4798
4799                 if (unlinkat(open_tree_fd, FILE2, 0))
4800                         die("failure: unlinkat");
4801
4802                 exit(EXIT_SUCCESS);
4803         }
4804         if (wait_for_pid(pid)) {
4805                 log_stderr("failure: wait_for_pid");
4806                 goto out;
4807         }
4808
4809         /* change uid to unprivileged user */
4810         if (fchown(dir_fd, 11000, -1)) {
4811                 log_stderr("failure: fchown");
4812                 goto out;
4813         }
4814         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4815                 log_stderr("failure: fchmod");
4816                 goto out;
4817         }
4818         /* validate sticky bit is set */
4819         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4820                 log_stderr("failure: is_sticky");
4821                 goto out;
4822         }
4823
4824         /* create regular file via mknod */
4825         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4826                 log_stderr("failure: mknodat");
4827                 goto out;
4828         }
4829         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4830                 log_stderr("failure: fchownat");
4831                 goto out;
4832         }
4833         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4834                 log_stderr("failure: fchmodat");
4835                 goto out;
4836         }
4837
4838         /* create regular file via mknod */
4839         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4840                 log_stderr("failure: mknodat");
4841                 goto out;
4842         }
4843         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4844                 log_stderr("failure: fchownat");
4845                 goto out;
4846         }
4847         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4848                 log_stderr("failure: fchmodat");
4849                 goto out;
4850         }
4851
4852         /* The sticky bit is set and we own the directory so we must be able to
4853          * delete the files now.
4854          */
4855         pid = fork();
4856         if (pid < 0) {
4857                 log_stderr("failure: fork");
4858                 goto out;
4859         }
4860         if (pid == 0) {
4861                 if (!switch_ids(1000, 1000))
4862                         die("failure: switch_ids");
4863
4864                 if (unlinkat(open_tree_fd, FILE1, 0))
4865                         die("failure: unlinkat");
4866
4867                 if (unlinkat(open_tree_fd, FILE2, 0))
4868                         die("failure: unlinkat");
4869
4870                 exit(EXIT_SUCCESS);
4871         }
4872         if (wait_for_pid(pid)) {
4873                 log_stderr("failure: wait_for_pid");
4874                 goto out;
4875         }
4876
4877         fret = 0;
4878         log_debug("Ran test");
4879 out:
4880         safe_close(attr.userns_fd);
4881         safe_close(dir_fd);
4882         safe_close(open_tree_fd);
4883
4884         return fret;
4885 }
4886
4887 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
4888  * operations in a user namespace.
4889  */
4890 static int sticky_bit_unlink_idmapped_mounts_in_userns(void)
4891 {
4892         int fret = -1;
4893         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4894         struct mount_attr attr = {
4895                 .attr_set = MOUNT_ATTR_IDMAP,
4896         };
4897         pid_t pid;
4898
4899         if (!caps_supported())
4900                 return 0;
4901
4902         /* create directory */
4903         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4904                 log_stderr("failure: mkdirat");
4905                 goto out;
4906         }
4907
4908         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4909         if (dir_fd < 0) {
4910                 log_stderr("failure: openat");
4911                 goto out;
4912         }
4913         if (fchown(dir_fd, 0, 0)) {
4914                 log_stderr("failure: fchown");
4915                 goto out;
4916         }
4917         if (fchmod(dir_fd, 0777)) {
4918                 log_stderr("failure: fchmod");
4919                 goto out;
4920         }
4921
4922         /* create regular file via mknod */
4923         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4924                 log_stderr("failure: mknodat");
4925                 goto out;
4926         }
4927         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4928                 log_stderr("failure: fchownat");
4929                 goto out;
4930         }
4931         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4932                 log_stderr("failure: fchmodat");
4933                 goto out;
4934         }
4935
4936         /* create regular file via mknod */
4937         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4938                 log_stderr("failure: mknodat");
4939                 goto out;
4940         }
4941         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4942                 log_stderr("failure: fchownat");
4943                 goto out;
4944         }
4945         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4946                 log_stderr("failure: fchmodat");
4947                 goto out;
4948         }
4949
4950         /* Changing mount properties on a detached mount. */
4951         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
4952         if (attr.userns_fd < 0) {
4953                 log_stderr("failure: get_userns_fd");
4954                 goto out;
4955         }
4956
4957         open_tree_fd = sys_open_tree(dir_fd, "",
4958                                      AT_EMPTY_PATH |
4959                                      AT_NO_AUTOMOUNT |
4960                                      AT_SYMLINK_NOFOLLOW |
4961                                      OPEN_TREE_CLOEXEC |
4962                                      OPEN_TREE_CLONE);
4963         if (open_tree_fd < 0) {
4964                 log_stderr("failure: sys_open_tree");
4965                 goto out;
4966         }
4967
4968         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4969                 log_stderr("failure: sys_mount_setattr");
4970                 goto out;
4971         }
4972
4973         /* The sticky bit is not set so we must be able to delete files not
4974          * owned by us.
4975          */
4976         pid = fork();
4977         if (pid < 0) {
4978                 log_stderr("failure: fork");
4979                 goto out;
4980         }
4981         if (pid == 0) {
4982                 if (!caps_supported()) {
4983                         log_debug("skip: capability library not installed");
4984                         exit(EXIT_SUCCESS);
4985                 }
4986
4987                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
4988                         die("failure: switch_userns");
4989
4990                 if (unlinkat(dir_fd, FILE1, 0))
4991                         die("failure: unlinkat");
4992
4993                 if (unlinkat(dir_fd, FILE2, 0))
4994                         die("failure: unlinkat");
4995
4996                 exit(EXIT_SUCCESS);
4997         }
4998         if (wait_for_pid(pid)) {
4999                 log_stderr("failure: wait_for_pid");
5000                 goto out;
5001         }
5002
5003         /* set sticky bit */
5004         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5005                 log_stderr("failure: fchmod");
5006                 goto out;
5007         }
5008
5009         /* validate sticky bit is set */
5010         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5011                 log_stderr("failure: is_sticky");
5012                 goto out;
5013         }
5014
5015         /* create regular file via mknod */
5016         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5017                 log_stderr("failure: mknodat");
5018                 goto out;
5019         }
5020         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5021                 log_stderr("failure: fchownat");
5022                 goto out;
5023         }
5024         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5025                 log_stderr("failure: fchmodat");
5026                 goto out;
5027         }
5028
5029         /* create regular file via mknod */
5030         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5031                 log_stderr("failure: mknodat");
5032                 goto out;
5033         }
5034         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5035                 log_stderr("failure: fchownat");
5036                 goto out;
5037         }
5038         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5039                 log_stderr("failure: fchmodat");
5040                 goto out;
5041         }
5042
5043         /* The sticky bit is set so we must not be able to delete files not
5044          * owned by us.
5045          */
5046         pid = fork();
5047         if (pid < 0) {
5048                 log_stderr("failure: fork");
5049                 goto out;
5050         }
5051         if (pid == 0) {
5052                 if (!caps_supported()) {
5053                         log_debug("skip: capability library not installed");
5054                         exit(EXIT_SUCCESS);
5055                 }
5056
5057                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5058                         die("failure: switch_userns");
5059
5060                 if (!unlinkat(dir_fd, FILE1, 0))
5061                         die("failure: unlinkat");
5062                 if (errno != EPERM)
5063                         die("failure: errno");
5064
5065                 if (!unlinkat(dir_fd, FILE2, 0))
5066                         die("failure: unlinkat");
5067                 if (errno != EPERM)
5068                         die("failure: errno");
5069
5070                 if (!unlinkat(open_tree_fd, FILE1, 0))
5071                         die("failure: unlinkat");
5072                 if (errno != EPERM)
5073                         die("failure: errno");
5074
5075                 if (!unlinkat(open_tree_fd, FILE2, 0))
5076                         die("failure: unlinkat");
5077                 if (errno != EPERM)
5078                         die("failure: errno");
5079
5080                 exit(EXIT_SUCCESS);
5081         }
5082         if (wait_for_pid(pid)) {
5083                 log_stderr("failure: wait_for_pid");
5084                 goto out;
5085         }
5086
5087         /* The sticky bit is set and we own the files so we must be able to
5088          * delete the files now.
5089          */
5090         pid = fork();
5091         if (pid < 0) {
5092                 log_stderr("failure: fork");
5093                 goto out;
5094         }
5095         if (pid == 0) {
5096                 /* change ownership */
5097                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5098                         die("failure: fchownat");
5099                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5100                         die("failure: expected_uid_gid");
5101                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5102                         die("failure: fchownat");
5103                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5104                         die("failure: expected_uid_gid");
5105
5106                 if (!caps_supported()) {
5107                         log_debug("skip: capability library not installed");
5108                         exit(EXIT_SUCCESS);
5109                 }
5110
5111                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5112                         die("failure: switch_userns");
5113
5114                 if (!unlinkat(dir_fd, FILE1, 0))
5115                         die("failure: unlinkat");
5116                 if (errno != EPERM)
5117                         die("failure: errno");
5118
5119                 if (!unlinkat(dir_fd, FILE2, 0))
5120                         die("failure: unlinkat");
5121                 if (errno != EPERM)
5122                         die("failure: errno");
5123
5124                 if (unlinkat(open_tree_fd, FILE1, 0))
5125                         die("failure: unlinkat");
5126
5127                 if (unlinkat(open_tree_fd, FILE2, 0))
5128                         die("failure: unlinkat");
5129
5130                 exit(EXIT_SUCCESS);
5131         }
5132         if (wait_for_pid(pid)) {
5133                 log_stderr("failure: wait_for_pid");
5134                 goto out;
5135         }
5136
5137         /* change uid to unprivileged user */
5138         if (fchown(dir_fd, 1000, -1)) {
5139                 log_stderr("failure: fchown");
5140                 goto out;
5141         }
5142         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5143                 log_stderr("failure: fchmod");
5144                 goto out;
5145         }
5146         /* validate sticky bit is set */
5147         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5148                 log_stderr("failure: is_sticky");
5149                 goto out;
5150         }
5151
5152         /* create regular file via mknod */
5153         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5154                 log_stderr("failure: mknodat");
5155                 goto out;
5156         }
5157         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5158                 log_stderr("failure: fchownat");
5159                 goto out;
5160         }
5161         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5162                 log_stderr("failure: fchmodat");
5163                 goto out;
5164         }
5165
5166         /* create regular file via mknod */
5167         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5168                 log_stderr("failure: mknodat");
5169                 goto out;
5170         }
5171         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5172                 log_stderr("failure: fchownat");
5173                 goto out;
5174         }
5175         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5176                 log_stderr("failure: fchmodat");
5177                 goto out;
5178         }
5179
5180         /* The sticky bit is set and we own the directory so we must be able to
5181          * delete the files now.
5182          */
5183         pid = fork();
5184         if (pid < 0) {
5185                 log_stderr("failure: fork");
5186                 goto out;
5187         }
5188         if (pid == 0) {
5189                 if (!caps_supported()) {
5190                         log_debug("skip: capability library not installed");
5191                         exit(EXIT_SUCCESS);
5192                 }
5193
5194                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5195                         die("failure: switch_userns");
5196
5197                 /* we don't own the directory from the original mount */
5198                 if (!unlinkat(dir_fd, FILE1, 0))
5199                         die("failure: unlinkat");
5200                 if (errno != EPERM)
5201                         die("failure: errno");
5202
5203                 if (!unlinkat(dir_fd, FILE2, 0))
5204                         die("failure: unlinkat");
5205                 if (errno != EPERM)
5206                         die("failure: errno");
5207
5208                 /* we own the file from the idmapped mount */
5209                 if (unlinkat(open_tree_fd, FILE1, 0))
5210                         die("failure: unlinkat");
5211                 if (unlinkat(open_tree_fd, FILE2, 0))
5212                         die("failure: unlinkat");
5213
5214                 exit(EXIT_SUCCESS);
5215         }
5216         if (wait_for_pid(pid)) {
5217                 log_stderr("failure: wait_for_pid");
5218                 goto out;
5219         }
5220
5221         fret = 0;
5222         log_debug("Ran test");
5223 out:
5224         safe_close(attr.userns_fd);
5225         safe_close(dir_fd);
5226         safe_close(open_tree_fd);
5227
5228         return fret;
5229 }
5230
5231 static int sticky_bit_rename(void)
5232 {
5233         int fret = -1;
5234         int dir_fd = -EBADF;
5235         pid_t pid;
5236
5237         if (!caps_supported())
5238                 return 0;
5239
5240         /* create directory */
5241         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5242                 log_stderr("failure: mkdirat");
5243                 goto out;
5244         }
5245
5246         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5247         if (dir_fd < 0) {
5248                 log_stderr("failure: openat");
5249                 goto out;
5250         }
5251         if (fchown(dir_fd, 0, 0)) {
5252                 log_stderr("failure: fchown");
5253                 goto out;
5254         }
5255         if (fchmod(dir_fd, 0777)) {
5256                 log_stderr("failure: fchmod");
5257                 goto out;
5258         }
5259
5260         /* create regular file via mknod */
5261         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5262                 log_stderr("failure: mknodat");
5263                 goto out;
5264         }
5265         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5266                 log_stderr("failure: fchownat");
5267                 goto out;
5268         }
5269         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5270                 log_stderr("failure: fchmodat");
5271                 goto out;
5272         }
5273
5274         /* create regular file via mknod */
5275         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5276                 log_stderr("failure: mknodat");
5277                 goto out;
5278         }
5279         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5280                 log_stderr("failure: fchownat");
5281                 goto out;
5282         }
5283         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5284                 log_stderr("failure: fchmodat");
5285                 goto out;
5286         }
5287
5288         /* The sticky bit is not set so we must be able to delete files not
5289          * owned by us.
5290          */
5291         pid = fork();
5292         if (pid < 0) {
5293                 log_stderr("failure: fork");
5294                 goto out;
5295         }
5296         if (pid == 0) {
5297                 if (!switch_ids(1000, 1000))
5298                         die("failure: switch_ids");
5299
5300                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5301                         die("failure: renameat");
5302
5303                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5304                         die("failure: renameat");
5305
5306                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5307                         die("failure: renameat");
5308
5309                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5310                         die("failure: renameat");
5311
5312                 exit(EXIT_SUCCESS);
5313         }
5314         if (wait_for_pid(pid)) {
5315                 log_stderr("failure: wait_for_pid");
5316                 goto out;
5317         }
5318
5319         /* set sticky bit */
5320         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5321                 log_stderr("failure: fchmod");
5322                 goto out;
5323         }
5324
5325         /* validate sticky bit is set */
5326         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5327                 log_stderr("failure: is_sticky");
5328                 goto out;
5329         }
5330
5331         /* The sticky bit is set so we must not be able to delete files not
5332          * owned by us.
5333          */
5334         pid = fork();
5335         if (pid < 0) {
5336                 log_stderr("failure: fork");
5337                 goto out;
5338         }
5339         if (pid == 0) {
5340                 if (!switch_ids(1000, 1000))
5341                         die("failure: switch_ids");
5342
5343                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5344                         die("failure: renameat");
5345                 if (errno != EPERM)
5346                         die("failure: errno");
5347
5348                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5349                         die("failure: renameat");
5350                 if (errno != EPERM)
5351                         die("failure: errno");
5352
5353                 exit(EXIT_SUCCESS);
5354         }
5355         if (wait_for_pid(pid)) {
5356                 log_stderr("failure: wait_for_pid");
5357                 goto out;
5358         }
5359
5360         /* The sticky bit is set and we own the files so we must be able to
5361          * delete the files now.
5362          */
5363         pid = fork();
5364         if (pid < 0) {
5365                 log_stderr("failure: fork");
5366                 goto out;
5367         }
5368         if (pid == 0) {
5369                 /* change ownership */
5370                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5371                         die("failure: fchownat");
5372                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5373                         die("failure: expected_uid_gid");
5374                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5375                         die("failure: fchownat");
5376                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5377                         die("failure: expected_uid_gid");
5378
5379                 if (!switch_ids(1000, 1000))
5380                         die("failure: switch_ids");
5381
5382                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5383                         die("failure: renameat");
5384
5385                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5386                         die("failure: renameat");
5387
5388                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5389                         die("failure: renameat");
5390
5391                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5392                         die("failure: renameat");
5393
5394                 exit(EXIT_SUCCESS);
5395         }
5396         if (wait_for_pid(pid)) {
5397                 log_stderr("failure: wait_for_pid");
5398                 goto out;
5399         }
5400
5401         /* change uid to unprivileged user */
5402         if (fchown(dir_fd, 1000, -1)) {
5403                 log_stderr("failure: fchown");
5404                 goto out;
5405         }
5406         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5407                 log_stderr("failure: fchmod");
5408                 goto out;
5409         }
5410         /* validate sticky bit is set */
5411         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5412                 log_stderr("failure: is_sticky");
5413                 goto out;
5414         }
5415
5416
5417         /* The sticky bit is set and we own the directory so we must be able to
5418          * delete the files now.
5419          */
5420         pid = fork();
5421         if (pid < 0) {
5422                 log_stderr("failure: fork");
5423                 goto out;
5424         }
5425         if (pid == 0) {
5426                 if (!switch_ids(1000, 1000))
5427                         die("failure: switch_ids");
5428
5429                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5430                         die("failure: renameat");
5431
5432                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5433                         die("failure: renameat");
5434
5435                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5436                         die("failure: renameat");
5437
5438                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5439                         die("failure: renameat");
5440
5441                 exit(EXIT_SUCCESS);
5442         }
5443         if (wait_for_pid(pid)) {
5444                 log_stderr("failure: wait_for_pid");
5445                 goto out;
5446         }
5447
5448         fret = 0;
5449         log_debug("Ran test");
5450 out:
5451         safe_close(dir_fd);
5452
5453         return fret;
5454 }
5455
5456 static int sticky_bit_rename_idmapped_mounts(void)
5457 {
5458         int fret = -1;
5459         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5460         struct mount_attr attr = {
5461                 .attr_set = MOUNT_ATTR_IDMAP,
5462         };
5463         pid_t pid;
5464
5465         if (!caps_supported())
5466                 return 0;
5467
5468         /* create directory */
5469         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5470                 log_stderr("failure: mkdirat");
5471                 goto out;
5472         }
5473
5474         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5475         if (dir_fd < 0) {
5476                 log_stderr("failure: openat");
5477                 goto out;
5478         }
5479
5480         if (fchown(dir_fd, 10000, 10000)) {
5481                 log_stderr("failure: fchown");
5482                 goto out;
5483         }
5484
5485         if (fchmod(dir_fd, 0777)) {
5486                 log_stderr("failure: fchmod");
5487                 goto out;
5488         }
5489
5490         /* create regular file via mknod */
5491         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5492                 log_stderr("failure: mknodat");
5493                 goto out;
5494         }
5495         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
5496                 log_stderr("failure: fchownat");
5497                 goto out;
5498         }
5499         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5500                 log_stderr("failure: fchmodat");
5501                 goto out;
5502         }
5503
5504         /* create regular file via mknod */
5505         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5506                 log_stderr("failure: mknodat");
5507                 goto out;
5508         }
5509         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
5510                 log_stderr("failure: fchownat");
5511                 goto out;
5512         }
5513         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5514                 log_stderr("failure: fchmodat");
5515                 goto out;
5516         }
5517
5518         /* Changing mount properties on a detached mount. */
5519         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
5520         if (attr.userns_fd < 0) {
5521                 log_stderr("failure: get_userns_fd");
5522                 goto out;
5523         }
5524
5525         open_tree_fd = sys_open_tree(dir_fd, "",
5526                                      AT_EMPTY_PATH |
5527                                      AT_NO_AUTOMOUNT |
5528                                      AT_SYMLINK_NOFOLLOW |
5529                                      OPEN_TREE_CLOEXEC |
5530                                      OPEN_TREE_CLONE);
5531         if (open_tree_fd < 0) {
5532                 log_stderr("failure: sys_open_tree");
5533                 goto out;
5534         }
5535
5536         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5537                 log_stderr("failure: sys_mount_setattr");
5538                 goto out;
5539         }
5540
5541         /* The sticky bit is not set so we must be able to delete files not
5542          * owned by us.
5543          */
5544         pid = fork();
5545         if (pid < 0) {
5546                 log_stderr("failure: fork");
5547                 goto out;
5548         }
5549         if (pid == 0) {
5550                 if (!switch_ids(1000, 1000))
5551                         die("failure: switch_ids");
5552
5553                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5554                         die("failure: renameat");
5555
5556                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5557                         die("failure: renameat");
5558
5559                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5560                         die("failure: renameat");
5561
5562                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5563                         die("failure: renameat");
5564
5565                 exit(EXIT_SUCCESS);
5566         }
5567         if (wait_for_pid(pid)) {
5568                 log_stderr("failure: wait_for_pid");
5569                 goto out;
5570         }
5571
5572         /* set sticky bit */
5573         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5574                 log_stderr("failure: fchmod");
5575                 goto out;
5576         }
5577
5578         /* validate sticky bit is set */
5579         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5580                 log_stderr("failure: is_sticky");
5581                 goto out;
5582         }
5583
5584         /* The sticky bit is set so we must not be able to delete files not
5585          * owned by us.
5586          */
5587         pid = fork();
5588         if (pid < 0) {
5589                 log_stderr("failure: fork");
5590                 goto out;
5591         }
5592         if (pid == 0) {
5593                 if (!switch_ids(1000, 1000))
5594                         die("failure: switch_ids");
5595
5596                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5597                         die("failure: renameat");
5598                 if (errno != EPERM)
5599                         die("failure: errno");
5600
5601                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5602                         die("failure: renameat");
5603                 if (errno != EPERM)
5604                         die("failure: errno");
5605
5606                 exit(EXIT_SUCCESS);
5607         }
5608         if (wait_for_pid(pid)) {
5609                 log_stderr("failure: wait_for_pid");
5610                 goto out;
5611         }
5612
5613         /* The sticky bit is set and we own the files so we must be able to
5614          * delete the files now.
5615          */
5616         pid = fork();
5617         if (pid < 0) {
5618                 log_stderr("failure: fork");
5619                 goto out;
5620         }
5621         if (pid == 0) {
5622                 /* change ownership */
5623                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
5624                         die("failure: fchownat");
5625                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
5626                         die("failure: expected_uid_gid");
5627                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
5628                         die("failure: fchownat");
5629                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
5630                         die("failure: expected_uid_gid");
5631
5632                 if (!switch_ids(1000, 1000))
5633                         die("failure: switch_ids");
5634
5635                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5636                         die("failure: renameat");
5637
5638                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5639                         die("failure: renameat");
5640
5641                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5642                         die("failure: renameat");
5643
5644                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5645                         die("failure: renameat");
5646
5647                 exit(EXIT_SUCCESS);
5648         }
5649         if (wait_for_pid(pid)) {
5650                 log_stderr("failure: wait_for_pid");
5651                 goto out;
5652         }
5653
5654         /* change uid to unprivileged user */
5655         if (fchown(dir_fd, 11000, -1)) {
5656                 log_stderr("failure: fchown");
5657                 goto out;
5658         }
5659         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5660                 log_stderr("failure: fchmod");
5661                 goto out;
5662         }
5663         /* validate sticky bit is set */
5664         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5665                 log_stderr("failure: is_sticky");
5666                 goto out;
5667         }
5668
5669         /* The sticky bit is set and we own the directory so we must be able to
5670          * delete the files now.
5671          */
5672         pid = fork();
5673         if (pid < 0) {
5674                 log_stderr("failure: fork");
5675                 goto out;
5676         }
5677         if (pid == 0) {
5678                 if (!switch_ids(1000, 1000))
5679                         die("failure: switch_ids");
5680
5681                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5682                         die("failure: renameat");
5683
5684                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5685                         die("failure: renameat");
5686
5687                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5688                         die("failure: renameat");
5689
5690                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5691                         die("failure: renameat");
5692
5693                 exit(EXIT_SUCCESS);
5694         }
5695         if (wait_for_pid(pid)) {
5696                 log_stderr("failure: wait_for_pid");
5697                 goto out;
5698         }
5699
5700         fret = 0;
5701         log_debug("Ran test");
5702 out:
5703         safe_close(attr.userns_fd);
5704         safe_close(dir_fd);
5705         safe_close(open_tree_fd);
5706
5707         return fret;
5708 }
5709
5710 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
5711  * operations in a user namespace.
5712  */
5713 static int sticky_bit_rename_idmapped_mounts_in_userns(void)
5714 {
5715         int fret = -1;
5716         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5717         struct mount_attr attr = {
5718                 .attr_set = MOUNT_ATTR_IDMAP,
5719         };
5720         pid_t pid;
5721
5722         if (!caps_supported())
5723                 return 0;
5724
5725         /* create directory */
5726         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5727                 log_stderr("failure: mkdirat");
5728                 goto out;
5729         }
5730
5731         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5732         if (dir_fd < 0) {
5733                 log_stderr("failure: openat");
5734                 goto out;
5735         }
5736         if (fchown(dir_fd, 0, 0)) {
5737                 log_stderr("failure: fchown");
5738                 goto out;
5739         }
5740         if (fchmod(dir_fd, 0777)) {
5741                 log_stderr("failure: fchmod");
5742                 goto out;
5743         }
5744
5745         /* create regular file via mknod */
5746         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5747                 log_stderr("failure: mknodat");
5748                 goto out;
5749         }
5750         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5751                 log_stderr("failure: fchownat");
5752                 goto out;
5753         }
5754         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5755                 log_stderr("failure: fchmodat");
5756                 goto out;
5757         }
5758
5759         /* create regular file via mknod */
5760         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5761                 log_stderr("failure: mknodat");
5762                 goto out;
5763         }
5764         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5765                 log_stderr("failure: fchownat");
5766                 goto out;
5767         }
5768         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5769                 log_stderr("failure: fchmodat");
5770                 goto out;
5771         }
5772
5773         /* Changing mount properties on a detached mount. */
5774         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
5775         if (attr.userns_fd < 0) {
5776                 log_stderr("failure: get_userns_fd");
5777                 goto out;
5778         }
5779
5780         open_tree_fd = sys_open_tree(dir_fd, "",
5781                                      AT_EMPTY_PATH |
5782                                      AT_NO_AUTOMOUNT |
5783                                      AT_SYMLINK_NOFOLLOW |
5784                                      OPEN_TREE_CLOEXEC |
5785                                      OPEN_TREE_CLONE);
5786         if (open_tree_fd < 0) {
5787                 log_stderr("failure: sys_open_tree");
5788                 goto out;
5789         }
5790
5791         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5792                 log_stderr("failure: sys_mount_setattr");
5793                 goto out;
5794         }
5795
5796         /* The sticky bit is not set so we must be able to delete files not
5797          * owned by us.
5798          */
5799         pid = fork();
5800         if (pid < 0) {
5801                 log_stderr("failure: fork");
5802                 goto out;
5803         }
5804         if (pid == 0) {
5805                 if (!caps_supported()) {
5806                         log_debug("skip: capability library not installed");
5807                         exit(EXIT_SUCCESS);
5808                 }
5809
5810                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5811                         die("failure: switch_userns");
5812
5813                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5814                         die("failure: renameat");
5815
5816                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5817                         die("failure: renameat");
5818
5819                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5820                         die("failure: renameat");
5821
5822                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5823                         die("failure: renameat");
5824
5825                 exit(EXIT_SUCCESS);
5826         }
5827         if (wait_for_pid(pid)) {
5828                 log_stderr("failure: wait_for_pid");
5829                 goto out;
5830         }
5831
5832         /* set sticky bit */
5833         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5834                 log_stderr("failure: fchmod");
5835                 goto out;
5836         }
5837
5838         /* validate sticky bit is set */
5839         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5840                 log_stderr("failure: is_sticky");
5841                 goto out;
5842         }
5843
5844         /* The sticky bit is set so we must not be able to delete files not
5845          * owned by us.
5846          */
5847         pid = fork();
5848         if (pid < 0) {
5849                 log_stderr("failure: fork");
5850                 goto out;
5851         }
5852         if (pid == 0) {
5853                 if (!caps_supported()) {
5854                         log_debug("skip: capability library not installed");
5855                         exit(EXIT_SUCCESS);
5856                 }
5857
5858                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5859                         die("failure: switch_userns");
5860
5861                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5862                         die("failure: renameat");
5863                 if (errno != EPERM)
5864                         die("failure: errno");
5865
5866                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5867                         die("failure: renameat");
5868                 if (errno != EPERM)
5869                         die("failure: errno");
5870
5871                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5872                         die("failure: renameat");
5873                 if (errno != EPERM)
5874                         die("failure: errno");
5875
5876                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5877                         die("failure: renameat");
5878                 if (errno != EPERM)
5879                         die("failure: errno");
5880
5881                 exit(EXIT_SUCCESS);
5882         }
5883         if (wait_for_pid(pid)) {
5884                 log_stderr("failure: wait_for_pid");
5885                 goto out;
5886         }
5887
5888         /* The sticky bit is set and we own the files so we must be able to
5889          * delete the files now.
5890          */
5891         pid = fork();
5892         if (pid < 0) {
5893                 log_stderr("failure: fork");
5894                 goto out;
5895         }
5896         if (pid == 0) {
5897                 /* change ownership */
5898                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5899                         die("failure: fchownat");
5900                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5901                         die("failure: expected_uid_gid");
5902                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5903                         die("failure: fchownat");
5904                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5905                         die("failure: expected_uid_gid");
5906
5907                 if (!caps_supported()) {
5908                         log_debug("skip: capability library not installed");
5909                         exit(EXIT_SUCCESS);
5910                 }
5911
5912                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5913                         die("failure: switch_userns");
5914
5915                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5916                         die("failure: renameat");
5917                 if (errno != EPERM)
5918                         die("failure: errno");
5919
5920                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5921                         die("failure: renameat");
5922                 if (errno != EPERM)
5923                         die("failure: errno");
5924
5925                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5926                         die("failure: renameat");
5927
5928                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5929                         die("failure: renameat");
5930
5931                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5932                         die("failure: renameat");
5933
5934                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5935                         die("failure: renameat");
5936
5937                 exit(EXIT_SUCCESS);
5938         }
5939         if (wait_for_pid(pid)) {
5940                 log_stderr("failure: wait_for_pid");
5941                 goto out;
5942         }
5943
5944         /* change uid to unprivileged user */
5945         if (fchown(dir_fd, 1000, -1)) {
5946                 log_stderr("failure: fchown");
5947                 goto out;
5948         }
5949         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5950                 log_stderr("failure: fchmod");
5951                 goto out;
5952         }
5953         /* validate sticky bit is set */
5954         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5955                 log_stderr("failure: is_sticky");
5956                 goto out;
5957         }
5958
5959         /* The sticky bit is set and we own the directory so we must be able to
5960          * delete the files now.
5961          */
5962         pid = fork();
5963         if (pid < 0) {
5964                 log_stderr("failure: fork");
5965                 goto out;
5966         }
5967         if (pid == 0) {
5968                 if (!caps_supported()) {
5969                         log_debug("skip: capability library not installed");
5970                         exit(EXIT_SUCCESS);
5971                 }
5972
5973                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5974                         die("failure: switch_userns");
5975
5976                 /* we don't own the directory from the original mount */
5977                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5978                         die("failure: renameat");
5979                 if (errno != EPERM)
5980                         die("failure: errno");
5981
5982                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5983                         die("failure: renameat");
5984                 if (errno != EPERM)
5985                         die("failure: errno");
5986
5987                 /* we own the file from the idmapped mount */
5988                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5989                         die("failure: renameat");
5990
5991                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5992                         die("failure: renameat");
5993
5994                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5995                         die("failure: renameat");
5996
5997                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5998                         die("failure: renameat");
5999
6000                 exit(EXIT_SUCCESS);
6001         }
6002         if (wait_for_pid(pid)) {
6003                 log_stderr("failure: wait_for_pid");
6004                 goto out;
6005         }
6006
6007         fret = 0;
6008         log_debug("Ran test");
6009 out:
6010         safe_close(open_tree_fd);
6011         safe_close(attr.userns_fd);
6012         safe_close(dir_fd);
6013
6014         return fret;
6015 }
6016
6017 /* Validate that protected symlinks work correctly. */
6018 static int protected_symlinks(void)
6019 {
6020         int fret = -1;
6021         int dir_fd = -EBADF, fd = -EBADF;
6022         pid_t pid;
6023
6024         if (!protected_symlinks_enabled())
6025                 return 0;
6026
6027         if (!caps_supported())
6028                 return 0;
6029
6030         /* create directory */
6031         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6032                 log_stderr("failure: mkdirat");
6033                 goto out;
6034         }
6035
6036         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6037         if (dir_fd < 0) {
6038                 log_stderr("failure: openat");
6039                 goto out;
6040         }
6041         if (fchown(dir_fd, 0, 0)) {
6042                 log_stderr("failure: fchown");
6043                 goto out;
6044         }
6045         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6046                 log_stderr("failure: fchmod");
6047                 goto out;
6048         }
6049         /* validate sticky bit is set */
6050         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6051                 log_stderr("failure: is_sticky");
6052                 goto out;
6053         }
6054
6055         /* create regular file via mknod */
6056         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6057                 log_stderr("failure: mknodat");
6058                 goto out;
6059         }
6060         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6061                 log_stderr("failure: fchownat");
6062                 goto out;
6063         }
6064         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6065                 log_stderr("failure: fchmodat");
6066                 goto out;
6067         }
6068
6069         /* create symlinks */
6070         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6071                 log_stderr("failure: symlinkat");
6072                 goto out;
6073         }
6074         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6075                 log_stderr("failure: fchownat");
6076                 goto out;
6077         }
6078         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6079                 log_stderr("failure: expected_uid_gid");
6080                 goto out;
6081         }
6082         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6083                 log_stderr("failure: expected_uid_gid");
6084                 goto out;
6085         }
6086
6087         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6088                 log_stderr("failure: symlinkat");
6089                 goto out;
6090         }
6091         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6092                 log_stderr("failure: fchownat");
6093                 goto out;
6094         }
6095         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6096                 log_stderr("failure: expected_uid_gid");
6097                 goto out;
6098         }
6099         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6100                 log_stderr("failure: expected_uid_gid");
6101                 goto out;
6102         }
6103
6104         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6105                 log_stderr("failure: symlinkat");
6106                 goto out;
6107         }
6108         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6109                 log_stderr("failure: fchownat");
6110                 goto out;
6111         }
6112         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6113                 log_stderr("failure: expected_uid_gid");
6114                 goto out;
6115         }
6116         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6117                 log_stderr("failure: expected_uid_gid");
6118                 goto out;
6119         }
6120
6121         /* validate file can be directly read */
6122         fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6123         if (fd < 0) {
6124                 log_stderr("failure: openat");
6125                 goto out;
6126         }
6127         safe_close(fd);
6128
6129         /* validate file can be read through own symlink */
6130         fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6131         if (fd < 0) {
6132                 log_stderr("failure: openat");
6133                 goto out;
6134         }
6135         safe_close(fd);
6136
6137         pid = fork();
6138         if (pid < 0) {
6139                 log_stderr("failure: fork");
6140                 goto out;
6141         }
6142         if (pid == 0) {
6143                 if (!switch_ids(1000, 1000))
6144                         die("failure: switch_ids");
6145
6146                 /* validate file can be directly read */
6147                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6148                 if (fd < 0)
6149                         die("failure: openat");
6150                 safe_close(fd);
6151
6152                 /* validate file can be read through own symlink */
6153                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6154                 if (fd < 0)
6155                         die("failure: openat");
6156                 safe_close(fd);
6157
6158                 /* validate file can be read through root symlink */
6159                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6160                 if (fd < 0)
6161                         die("failure: openat");
6162                 safe_close(fd);
6163
6164                 /* validate file can't be read through other users symlink */
6165                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6166                 if (fd >= 0)
6167                         die("failure: openat");
6168                 if (errno != EACCES)
6169                         die("failure: errno");
6170
6171                 exit(EXIT_SUCCESS);
6172         }
6173         if (wait_for_pid(pid)) {
6174                 log_stderr("failure: wait_for_pid");
6175                 goto out;
6176         }
6177
6178         pid = fork();
6179         if (pid < 0) {
6180                 log_stderr("failure: fork");
6181                 goto out;
6182         }
6183         if (pid == 0) {
6184                 if (!switch_ids(2000, 2000))
6185                         die("failure: switch_ids");
6186
6187                 /* validate file can be directly read */
6188                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6189                 if (fd < 0)
6190                         die("failure: openat");
6191                 safe_close(fd);
6192
6193                 /* validate file can be read through own symlink */
6194                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6195                 if (fd < 0)
6196                         die("failure: openat");
6197                 safe_close(fd);
6198
6199                 /* validate file can be read through root symlink */
6200                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6201                 if (fd < 0)
6202                         die("failure: openat");
6203                 safe_close(fd);
6204
6205                 /* validate file can't be read through other users symlink */
6206                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6207                 if (fd >= 0)
6208                         die("failure: openat");
6209                 if (errno != EACCES)
6210                         die("failure: errno");
6211
6212                 exit(EXIT_SUCCESS);
6213         }
6214         if (wait_for_pid(pid)) {
6215                 log_stderr("failure: wait_for_pid");
6216                 goto out;
6217         }
6218
6219         fret = 0;
6220         log_debug("Ran test");
6221 out:
6222         safe_close(fd);
6223         safe_close(dir_fd);
6224
6225         return fret;
6226 }
6227
6228 /* Validate that protected symlinks work correctly on idmapped mounts. */
6229 static int protected_symlinks_idmapped_mounts(void)
6230 {
6231         int fret = -1;
6232         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6233         struct mount_attr attr = {
6234                 .attr_set = MOUNT_ATTR_IDMAP,
6235         };
6236         pid_t pid;
6237
6238         if (!protected_symlinks_enabled())
6239                 return 0;
6240
6241         if (!caps_supported())
6242                 return 0;
6243
6244         /* create directory */
6245         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6246                 log_stderr("failure: mkdirat");
6247                 goto out;
6248         }
6249
6250         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6251         if (dir_fd < 0) {
6252                 log_stderr("failure: openat");
6253                 goto out;
6254         }
6255         if (fchown(dir_fd, 10000, 10000)) {
6256                 log_stderr("failure: fchown");
6257                 goto out;
6258         }
6259         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6260                 log_stderr("failure: fchmod");
6261                 goto out;
6262         }
6263         /* validate sticky bit is set */
6264         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6265                 log_stderr("failure: is_sticky");
6266                 goto out;
6267         }
6268
6269         /* create regular file via mknod */
6270         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6271                 log_stderr("failure: mknodat");
6272                 goto out;
6273         }
6274         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
6275                 log_stderr("failure: fchownat");
6276                 goto out;
6277         }
6278         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6279                 log_stderr("failure: fchmodat");
6280                 goto out;
6281         }
6282
6283         /* create symlinks */
6284         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6285                 log_stderr("failure: symlinkat");
6286                 goto out;
6287         }
6288         if (fchownat(dir_fd, SYMLINK_USER1, 10000, 10000, AT_SYMLINK_NOFOLLOW)) {
6289                 log_stderr("failure: fchownat");
6290                 goto out;
6291         }
6292         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
6293                 log_stderr("failure: expected_uid_gid");
6294                 goto out;
6295         }
6296         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6297                 log_stderr("failure: expected_uid_gid");
6298                 goto out;
6299         }
6300
6301         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6302                 log_stderr("failure: symlinkat");
6303                 goto out;
6304         }
6305         if (fchownat(dir_fd, SYMLINK_USER2, 11000, 11000, AT_SYMLINK_NOFOLLOW)) {
6306                 log_stderr("failure: fchownat");
6307                 goto out;
6308         }
6309         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 11000, 11000)) {
6310                 log_stderr("failure: expected_uid_gid");
6311                 goto out;
6312         }
6313         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6314                 log_stderr("failure: expected_uid_gid");
6315                 goto out;
6316         }
6317
6318         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6319                 log_stderr("failure: symlinkat");
6320                 goto out;
6321         }
6322         if (fchownat(dir_fd, SYMLINK_USER3, 12000, 12000, AT_SYMLINK_NOFOLLOW)) {
6323                 log_stderr("failure: fchownat");
6324                 goto out;
6325         }
6326         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
6327                 log_stderr("failure: expected_uid_gid");
6328                 goto out;
6329         }
6330         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6331                 log_stderr("failure: expected_uid_gid");
6332                 goto out;
6333         }
6334
6335         /* Changing mount properties on a detached mount. */
6336         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
6337         if (attr.userns_fd < 0) {
6338                 log_stderr("failure: get_userns_fd");
6339                 goto out;
6340         }
6341
6342         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6343                                      AT_EMPTY_PATH |
6344                                      AT_NO_AUTOMOUNT |
6345                                      AT_SYMLINK_NOFOLLOW |
6346                                      OPEN_TREE_CLOEXEC |
6347                                      OPEN_TREE_CLONE);
6348         if (open_tree_fd < 0) {
6349                 log_stderr("failure: open_tree_fd");
6350                 goto out;
6351         }
6352
6353         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6354                 log_stderr("failure: sys_mount_setattr");
6355                 goto out;
6356         }
6357
6358         /* validate file can be directly read */
6359         fd = openat(open_tree_fd, DIR1 "/"  FILE1, O_RDONLY | O_CLOEXEC, 0);
6360         if (fd < 0) {
6361                 log_stderr("failure: openat");
6362                 goto out;
6363         }
6364         safe_close(fd);
6365
6366         /* validate file can be read through own symlink */
6367         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6368         if (fd < 0) {
6369                 log_stderr("failure: openat");
6370                 goto out;
6371         }
6372         safe_close(fd);
6373
6374         pid = fork();
6375         if (pid < 0) {
6376                 log_stderr("failure: fork");
6377                 goto out;
6378         }
6379         if (pid == 0) {
6380                 if (!switch_ids(1000, 1000))
6381                         die("failure: switch_ids");
6382
6383                 /* validate file can be directly read */
6384                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6385                 if (fd < 0)
6386                         die("failure: openat");
6387                 safe_close(fd);
6388
6389                 /* validate file can be read through own symlink */
6390                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6391                 if (fd < 0)
6392                         die("failure: openat");
6393                 safe_close(fd);
6394
6395                 /* validate file can be read through root symlink */
6396                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6397                 if (fd < 0)
6398                         die("failure: openat");
6399                 safe_close(fd);
6400
6401                 /* validate file can't be read through other users symlink */
6402                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6403                 if (fd >= 0)
6404                         die("failure: openat");
6405                 if (errno != EACCES)
6406                         die("failure: errno");
6407
6408                 exit(EXIT_SUCCESS);
6409         }
6410         if (wait_for_pid(pid)) {
6411                 log_stderr("failure: wait_for_pid");
6412                 goto out;
6413         }
6414
6415         pid = fork();
6416         if (pid < 0) {
6417                 log_stderr("failure: fork");
6418                 goto out;
6419         }
6420         if (pid == 0) {
6421                 if (!switch_ids(2000, 2000))
6422                         die("failure: switch_ids");
6423
6424                 /* validate file can be directly read */
6425                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6426                 if (fd < 0)
6427                         die("failure: openat");
6428                 safe_close(fd);
6429
6430                 /* validate file can be read through own symlink */
6431                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6432                 if (fd < 0)
6433                         die("failure: openat");
6434                 safe_close(fd);
6435
6436                 /* validate file can be read through root symlink */
6437                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6438                 if (fd < 0)
6439                         die("failure: openat");
6440                 safe_close(fd);
6441
6442                 /* validate file can't be read through other users symlink */
6443                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6444                 if (fd >= 0)
6445                         die("failure: openat");
6446                 if (errno != EACCES)
6447                         die("failure: errno");
6448
6449                 exit(EXIT_SUCCESS);
6450         }
6451         if (wait_for_pid(pid)) {
6452                 log_stderr("failure: wait_for_pid");
6453                 goto out;
6454         }
6455
6456         fret = 0;
6457         log_debug("Ran test");
6458 out:
6459         safe_close(attr.userns_fd);
6460         safe_close(fd);
6461         safe_close(dir_fd);
6462         safe_close(open_tree_fd);
6463
6464         return fret;
6465 }
6466
6467 /* Validate that protected symlinks work correctly on idmapped mounts inside a
6468  * user namespace.
6469  */
6470 static int protected_symlinks_idmapped_mounts_in_userns(void)
6471 {
6472         int fret = -1;
6473         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6474         struct mount_attr attr = {
6475                 .attr_set = MOUNT_ATTR_IDMAP,
6476         };
6477         pid_t pid;
6478
6479         if (!protected_symlinks_enabled())
6480                 return 0;
6481
6482         if (!caps_supported())
6483                 return 0;
6484
6485         /* create directory */
6486         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6487                 log_stderr("failure: mkdirat");
6488                 goto out;
6489         }
6490
6491         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6492         if (dir_fd < 0) {
6493                 log_stderr("failure: openat");
6494                 goto out;
6495         }
6496         if (fchown(dir_fd, 0, 0)) {
6497                 log_stderr("failure: fchown");
6498                 goto out;
6499         }
6500         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6501                 log_stderr("failure: fchmod");
6502                 goto out;
6503         }
6504         /* validate sticky bit is set */
6505         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6506                 log_stderr("failure: is_sticky");
6507                 goto out;
6508         }
6509
6510         /* create regular file via mknod */
6511         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6512                 log_stderr("failure: mknodat");
6513                 goto out;
6514         }
6515         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6516                 log_stderr("failure: fchownat");
6517                 goto out;
6518         }
6519         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6520                 log_stderr("failure: fchmodat");
6521                 goto out;
6522         }
6523
6524         /* create symlinks */
6525         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6526                 log_stderr("failure: symlinkat");
6527                 goto out;
6528         }
6529         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6530                 log_stderr("failure: fchownat");
6531                 goto out;
6532         }
6533         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6534                 log_stderr("failure: expected_uid_gid");
6535                 goto out;
6536         }
6537         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6538                 log_stderr("failure: expected_uid_gid");
6539                 goto out;
6540         }
6541
6542         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6543                 log_stderr("failure: symlinkat");
6544                 goto out;
6545         }
6546         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6547                 log_stderr("failure: fchownat");
6548                 goto out;
6549         }
6550         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6551                 log_stderr("failure: expected_uid_gid");
6552                 goto out;
6553         }
6554         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6555                 log_stderr("failure: expected_uid_gid");
6556                 goto out;
6557         }
6558
6559         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6560                 log_stderr("failure: symlinkat");
6561                 goto out;
6562         }
6563         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6564                 log_stderr("failure: fchownat");
6565                 goto out;
6566         }
6567         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6568                 log_stderr("failure: expected_uid_gid");
6569                 goto out;
6570         }
6571         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6572                 log_stderr("failure: expected_uid_gid");
6573                 goto out;
6574         }
6575
6576         /* Changing mount properties on a detached mount. */
6577         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
6578         if (attr.userns_fd < 0) {
6579                 log_stderr("failure: get_userns_fd");
6580                 goto out;
6581         }
6582
6583         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6584                                      AT_EMPTY_PATH |
6585                                      AT_NO_AUTOMOUNT |
6586                                      AT_SYMLINK_NOFOLLOW |
6587                                      OPEN_TREE_CLOEXEC |
6588                                      OPEN_TREE_CLONE);
6589         if (open_tree_fd < 0) {
6590                 log_stderr("failure: sys_open_tree");
6591                 goto out;
6592         }
6593
6594         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6595                 log_stderr("failure: sys_mount_setattr");
6596                 goto out;
6597         }
6598
6599         /* validate file can be directly read */
6600         fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6601         if (fd < 0) {
6602                 log_stderr("failure: openat");
6603                 goto out;
6604         }
6605         safe_close(fd);
6606
6607         /* validate file can be read through own symlink */
6608         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6609         if (fd < 0) {
6610                 log_stderr("failure: openat");
6611                 goto out;
6612         }
6613         safe_close(fd);
6614
6615         pid = fork();
6616         if (pid < 0) {
6617                 log_stderr("failure: fork");
6618                 goto out;
6619         }
6620         if (pid == 0) {
6621                 if (!caps_supported()) {
6622                         log_debug("skip: capability library not installed");
6623                         exit(EXIT_SUCCESS);
6624                 }
6625
6626                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
6627                         die("failure: switch_userns");
6628
6629                 /* validate file can be directly read */
6630                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6631                 if (fd < 0)
6632                         die("failure: openat");
6633                 safe_close(fd);
6634
6635                 /* validate file can be read through own symlink */
6636                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6637                 if (fd < 0)
6638                         die("failure: openat");
6639                 safe_close(fd);
6640
6641                 /* validate file can be read through root symlink */
6642                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6643                 if (fd < 0)
6644                         die("failure: openat");
6645                 safe_close(fd);
6646
6647                 /* validate file can't be read through other users symlink */
6648                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6649                 if (fd >= 0)
6650                         die("failure: openat");
6651                 if (errno != EACCES)
6652                         die("failure: errno");
6653
6654                 exit(EXIT_SUCCESS);
6655         }
6656         if (wait_for_pid(pid)) {
6657                 log_stderr("failure: wait_for_pid");
6658                 goto out;
6659         }
6660
6661         pid = fork();
6662         if (pid < 0) {
6663                 log_stderr("failure: fork");
6664                 goto out;
6665         }
6666         if (pid == 0) {
6667                 if (!caps_supported()) {
6668                         log_debug("skip: capability library not installed");
6669                         exit(EXIT_SUCCESS);
6670                 }
6671
6672                 if (!switch_userns(attr.userns_fd, 2000, 2000, true))
6673                         die("failure: switch_userns");
6674
6675                 /* validate file can be directly read */
6676                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6677                 if (fd < 0)
6678                         die("failure: openat");
6679                 safe_close(fd);
6680
6681                 /* validate file can be read through own symlink */
6682                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6683                 if (fd < 0)
6684                         die("failure: openat");
6685                 safe_close(fd);
6686
6687                 /* validate file can be read through root symlink */
6688                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6689                 if (fd < 0)
6690                         die("failure: openat");
6691                 safe_close(fd);
6692
6693                 /* validate file can't be read through other users symlink */
6694                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6695                 if (fd >= 0)
6696                         die("failure: openat");
6697                 if (errno != EACCES)
6698                         die("failure: errno");
6699
6700                 exit(EXIT_SUCCESS);
6701         }
6702         if (wait_for_pid(pid)) {
6703                 log_stderr("failure: wait_for_pid");
6704                 goto out;
6705         }
6706
6707         fret = 0;
6708         log_debug("Ran test");
6709 out:
6710         safe_close(dir_fd);
6711         safe_close(open_tree_fd);
6712         safe_close(attr.userns_fd);
6713
6714         return fret;
6715 }
6716
6717 static int acls(void)
6718 {
6719         int fret = -1;
6720         int dir1_fd = -EBADF, open_tree_fd = -EBADF;
6721         struct mount_attr attr = {
6722                 .attr_set = MOUNT_ATTR_IDMAP,
6723         };
6724         pid_t pid;
6725
6726         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
6727                 log_stderr("failure: mkdirat");
6728                 goto out;
6729         }
6730         if (fchmodat(t_dir1_fd, DIR1, 0777, 0)) {
6731                 log_stderr("failure: fchmodat");
6732                 goto out;
6733         }
6734
6735         if (mkdirat(t_dir1_fd, DIR2, 0777)) {
6736                 log_stderr("failure: mkdirat");
6737                 goto out;
6738         }
6739         if (fchmodat(t_dir1_fd, DIR2, 0777, 0)) {
6740                 log_stderr("failure: fchmodat");
6741                 goto out;
6742         }
6743
6744         /* Changing mount properties on a detached mount. */
6745         attr.userns_fd = get_userns_fd(100010, 100020, 5);
6746         if (attr.userns_fd < 0) {
6747                 log_stderr("failure: get_userns_fd");
6748                 goto out;
6749         }
6750
6751         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
6752                                      AT_NO_AUTOMOUNT |
6753                                      AT_SYMLINK_NOFOLLOW |
6754                                      OPEN_TREE_CLOEXEC |
6755                                      OPEN_TREE_CLONE);
6756         if (open_tree_fd < 0) {
6757                 log_stderr("failure: sys_open_tree");
6758                 goto out;
6759         }
6760
6761         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6762                 log_stderr("failure: sys_mount_setattr");
6763                 goto out;
6764         }
6765
6766         if (sys_move_mount(open_tree_fd, "", t_dir1_fd, DIR2, MOVE_MOUNT_F_EMPTY_PATH)) {
6767                 log_stderr("failure: sys_move_mount");
6768                 goto out;
6769         }
6770
6771         dir1_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6772         if (dir1_fd < 0) {
6773                 log_stderr("failure: openat");
6774                 goto out;
6775         }
6776
6777         if (mkdirat(dir1_fd, DIR3, 0000)) {
6778                 log_stderr("failure: mkdirat");
6779                 goto out;
6780         }
6781         if (fchown(dir1_fd, 100010, 100010)) {
6782                 log_stderr("failure: fchown");
6783                 goto out;
6784         }
6785         if (fchmod(dir1_fd, 0777)) {
6786                 log_stderr("failure: fchmod");
6787                 goto out;
6788         }
6789
6790         snprintf(t_buf, sizeof(t_buf), "setfacl -m u:100010:rwx %s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6791         if (system(t_buf)) {
6792                 log_stderr("failure: system");
6793                 goto out;
6794         }
6795
6796         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100010:rwx", t_mountpoint, T_DIR1, DIR1, DIR3);
6797         if (system(t_buf)) {
6798                 log_stderr("failure: system");
6799                 goto out;
6800         }
6801
6802         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100020:rwx", t_mountpoint, T_DIR1, DIR2, DIR3);
6803         if (system(t_buf)) {
6804                 log_stderr("failure: system");
6805                 goto out;
6806         }
6807
6808         pid = fork();
6809         if (pid < 0) {
6810                 log_stderr("failure: fork");
6811                 goto out;
6812         }
6813         if (pid == 0) {
6814                 if (!caps_supported()) {
6815                         log_debug("skip: capability library not installed");
6816                         exit(EXIT_SUCCESS);
6817                 }
6818
6819                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6820                         die("failure: switch_userns");
6821
6822                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6823                          t_mountpoint, T_DIR1, DIR1, DIR3, 4294967295LU);
6824                 if (system(t_buf))
6825                         die("failure: system");
6826
6827                 exit(EXIT_SUCCESS);
6828         }
6829         if (wait_for_pid(pid)) {
6830                 log_stderr("failure: wait_for_pid");
6831                 goto out;
6832         }
6833
6834         pid = fork();
6835         if (pid < 0) {
6836                 log_stderr("failure: fork");
6837                 goto out;
6838         }
6839         if (pid == 0) {
6840                 if (!caps_supported()) {
6841                         log_debug("skip: capability library not installed");
6842                         exit(EXIT_SUCCESS);
6843                 }
6844
6845                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6846                         die("failure: switch_userns");
6847
6848                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6849                          t_mountpoint, T_DIR1, DIR2, DIR3, 100010LU);
6850                 if (system(t_buf))
6851                         die("failure: system");
6852
6853                 exit(EXIT_SUCCESS);
6854         }
6855         if (wait_for_pid(pid)) {
6856                 log_stderr("failure: wait_for_pid");
6857                 goto out;
6858         }
6859
6860         /* Now, dir is owned by someone else in the user namespace, but we can
6861          * still read it because of acls.
6862          */
6863         if (fchown(dir1_fd, 100012, 100012)) {
6864                 log_stderr("failure: fchown");
6865                 goto out;
6866         }
6867
6868         pid = fork();
6869         if (pid < 0) {
6870                 log_stderr("failure: fork");
6871                 goto out;
6872         }
6873         if (pid == 0) {
6874                 int fd;
6875
6876                 if (!caps_supported()) {
6877                         log_debug("skip: capability library not installed");
6878                         exit(EXIT_SUCCESS);
6879                 }
6880
6881                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6882                         die("failure: switch_userns");
6883
6884                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6885                 if (fd < 0)
6886                         die("failure: openat");
6887
6888                 exit(EXIT_SUCCESS);
6889         }
6890         if (wait_for_pid(pid)) {
6891                 log_stderr("failure: wait_for_pid");
6892                 goto out;
6893         }
6894
6895         /* if we delete the acls, the ls should fail because it's 700. */
6896         snprintf(t_buf, sizeof(t_buf), "%s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6897         if (removexattr(t_buf, "system.posix_acl_access")) {
6898                 log_stderr("failure: removexattr");
6899                 goto out;
6900         }
6901
6902         pid = fork();
6903         if (pid < 0) {
6904                 log_stderr("failure: fork");
6905                 goto out;
6906         }
6907         if (pid == 0) {
6908                 int fd;
6909
6910                 if (!caps_supported()) {
6911                         log_debug("skip: capability library not installed");
6912                         exit(EXIT_SUCCESS);
6913                 }
6914
6915                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6916                         die("failure: switch_userns");
6917
6918                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6919                 if (fd >= 0)
6920                         die("failure: openat");
6921
6922                 exit(EXIT_SUCCESS);
6923         }
6924         if (wait_for_pid(pid)) {
6925                 log_stderr("failure: wait_for_pid");
6926                 goto out;
6927         }
6928
6929         snprintf(t_buf, sizeof(t_buf), "%s/" T_DIR1 "/" DIR2, t_mountpoint);
6930         sys_umount2(t_buf, MNT_DETACH);
6931
6932         fret = 0;
6933         log_debug("Ran test");
6934 out:
6935         safe_close(attr.userns_fd);
6936         safe_close(dir1_fd);
6937         safe_close(open_tree_fd);
6938
6939         return fret;
6940 }
6941
6942 #ifdef HAVE_LIBURING_H
6943 static int io_uring_openat_with_creds(struct io_uring *ring, int dfd, const char *path, int cred_id,
6944                                       bool with_link, int *ret_cqe)
6945 {
6946         struct io_uring_cqe *cqe;
6947         struct io_uring_sqe *sqe;
6948         int ret, i, to_submit = 1;
6949
6950         if (with_link) {
6951                 sqe = io_uring_get_sqe(ring);
6952                 if (!sqe)
6953                         return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6954                 io_uring_prep_nop(sqe);
6955                 sqe->flags |= IOSQE_IO_LINK;
6956                 sqe->user_data = 1;
6957                 to_submit++;
6958         }
6959
6960         sqe = io_uring_get_sqe(ring);
6961         if (!sqe)
6962                 return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6963         io_uring_prep_openat(sqe, dfd, path, O_RDONLY | O_CLOEXEC, 0);
6964         sqe->user_data = 2;
6965
6966         if (cred_id != -1)
6967                 sqe->personality = cred_id;
6968
6969         ret = io_uring_submit(ring);
6970         if (ret != to_submit) {
6971                 log_stderr("failure: io_uring_submit");
6972                 goto out;
6973         }
6974
6975         for (i = 0; i < to_submit; i++) {
6976                 ret = io_uring_wait_cqe(ring, &cqe);
6977                 if (ret < 0) {
6978                         log_stderr("failure: io_uring_wait_cqe");
6979                         goto out;
6980                 }
6981
6982                 ret = cqe->res;
6983                 /*
6984                  * Make sure caller can identify that this is a proper io_uring
6985                  * failure and not some earlier error.
6986                  */
6987                 if (ret_cqe)
6988                         *ret_cqe = ret;
6989                 io_uring_cqe_seen(ring, cqe);
6990         }
6991         log_debug("Ran test");
6992 out:
6993         return ret;
6994 }
6995
6996 static int io_uring(void)
6997 {
6998         int fret = -1;
6999         int file1_fd = -EBADF;
7000         struct io_uring *ring;
7001         int cred_id, ret, ret_cqe;
7002         pid_t pid;
7003
7004         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7005                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7006         if (!ring)
7007                 return log_errno(-1, "failure: io_uring_queue_init");
7008
7009         ret = io_uring_queue_init(8, ring, 0);
7010         if (ret) {
7011                 log_stderr("failure: io_uring_queue_init");
7012                 goto out_unmap;
7013         }
7014
7015         ret = io_uring_register_personality(ring);
7016         if (ret < 0) {
7017                 fret = 0;
7018                 goto out_unmap; /* personalities not supported */
7019         }
7020         cred_id = ret;
7021
7022         /* create file only owner can open */
7023         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7024         if (file1_fd < 0) {
7025                 log_stderr("failure: openat");
7026                 goto out;
7027         }
7028         if (fchown(file1_fd, 0, 0)) {
7029                 log_stderr("failure: fchown");
7030                 goto out;
7031         }
7032         if (fchmod(file1_fd, 0600)) {
7033                 log_stderr("failure: fchmod");
7034                 goto out;
7035         }
7036         safe_close(file1_fd);
7037
7038         pid = fork();
7039         if (pid < 0) {
7040                 log_stderr("failure: fork");
7041                 goto out;
7042         }
7043         if (pid == 0) {
7044                 /* Verify we can open it with our current credentials. */
7045                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7046                                                       -1, false, NULL);
7047                 if (file1_fd < 0)
7048                         die("failure: io_uring_open_file");
7049
7050                 exit(EXIT_SUCCESS);
7051         }
7052         if (wait_for_pid(pid)) {
7053                 log_stderr("failure: wait_for_pid");
7054                 goto out;
7055         }
7056
7057         pid = fork();
7058         if (pid < 0) {
7059                 log_stderr("failure: fork");
7060                 goto out;
7061         }
7062         if (pid == 0) {
7063                 if (!switch_ids(1000, 1000))
7064                         die("failure: switch_ids");
7065
7066                 /* Verify we can't open it with our current credentials. */
7067                 ret_cqe = 0;
7068                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7069                                                       -1, false, &ret_cqe);
7070                 if (file1_fd >= 0)
7071                         die("failure: io_uring_open_file");
7072                 if (ret_cqe == 0)
7073                         die("failure: non-open() related io_uring_open_file failure %d", ret_cqe);
7074                 if (ret_cqe != -EACCES)
7075                         die("failure: errno(%d)", abs(ret_cqe));
7076
7077                 exit(EXIT_SUCCESS);
7078         }
7079         if (wait_for_pid(pid)) {
7080                 log_stderr("failure: wait_for_pid");
7081                 goto out;
7082         }
7083
7084         pid = fork();
7085         if (pid < 0) {
7086                 log_stderr("failure: fork");
7087                 goto out;
7088         }
7089         if (pid == 0) {
7090                 if (!switch_ids(1000, 1000))
7091                         die("failure: switch_ids");
7092
7093                 /* Verify we can open it with the registered credentials. */
7094                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7095                                                       cred_id, false, NULL);
7096                 if (file1_fd < 0)
7097                         die("failure: io_uring_open_file");
7098
7099                 /* Verify we can open it with the registered credentials and as
7100                  * a link.
7101                  */
7102                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7103                                                       cred_id, true, NULL);
7104                 if (file1_fd < 0)
7105                         die("failure: io_uring_open_file");
7106
7107                 exit(EXIT_SUCCESS);
7108         }
7109         if (wait_for_pid(pid)) {
7110                 log_stderr("failure: wait_for_pid");
7111                 goto out;
7112         }
7113
7114         fret = 0;
7115         log_debug("Ran test");
7116 out:
7117         ret = io_uring_unregister_personality(ring, cred_id);
7118         if (ret)
7119                 log_stderr("failure: io_uring_unregister_personality");
7120
7121 out_unmap:
7122         munmap(ring, sizeof(struct io_uring));
7123
7124         safe_close(file1_fd);
7125
7126         return fret;
7127 }
7128
7129 static int io_uring_userns(void)
7130 {
7131         int fret = -1;
7132         int file1_fd = -EBADF, userns_fd = -EBADF;
7133         struct io_uring *ring;
7134         int cred_id, ret, ret_cqe;
7135         pid_t pid;
7136
7137         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7138                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7139         if (!ring)
7140                 return log_errno(-1, "failure: io_uring_queue_init");
7141
7142         ret = io_uring_queue_init(8, ring, 0);
7143         if (ret) {
7144                 log_stderr("failure: io_uring_queue_init");
7145                 goto out_unmap;
7146         }
7147
7148         ret = io_uring_register_personality(ring);
7149         if (ret < 0) {
7150                 fret = 0;
7151                 goto out_unmap; /* personalities not supported */
7152         }
7153         cred_id = ret;
7154
7155         /* create file only owner can open */
7156         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7157         if (file1_fd < 0) {
7158                 log_stderr("failure: openat");
7159                 goto out;
7160         }
7161         if (fchown(file1_fd, 0, 0)) {
7162                 log_stderr("failure: fchown");
7163                 goto out;
7164         }
7165         if (fchmod(file1_fd, 0600)) {
7166                 log_stderr("failure: fchmod");
7167                 goto out;
7168         }
7169         safe_close(file1_fd);
7170
7171         userns_fd = get_userns_fd(0, 10000, 10000);
7172         if (userns_fd < 0) {
7173                 log_stderr("failure: get_userns_fd");
7174                 goto out;
7175         }
7176
7177         pid = fork();
7178         if (pid < 0) {
7179                 log_stderr("failure: fork");
7180                 goto out;
7181         }
7182         if (pid == 0) {
7183                 /* Verify we can open it with our current credentials. */
7184                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7185                                                       -1, false, NULL);
7186                 if (file1_fd < 0)
7187                         die("failure: io_uring_open_file");
7188
7189                 exit(EXIT_SUCCESS);
7190         }
7191         if (wait_for_pid(pid)) {
7192                 log_stderr("failure: wait_for_pid");
7193                 goto out;
7194         }
7195
7196         pid = fork();
7197         if (pid < 0) {
7198                 log_stderr("failure: fork");
7199                 goto out;
7200         }
7201         if (pid == 0) {
7202                 if (!switch_userns(userns_fd, 0, 0, false))
7203                         die("failure: switch_userns");
7204
7205                 /* Verify we can't open it with our current credentials. */
7206                 ret_cqe = 0;
7207                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7208                                                       -1, false, &ret_cqe);
7209                 if (file1_fd >= 0)
7210                         die("failure: io_uring_open_file");
7211                 if (ret_cqe == 0)
7212                         die("failure: non-open() related io_uring_open_file failure");
7213                 if (ret_cqe != -EACCES)
7214                         die("failure: errno(%d)", abs(ret_cqe));
7215
7216                 exit(EXIT_SUCCESS);
7217         }
7218         if (wait_for_pid(pid)) {
7219                 log_stderr("failure: wait_for_pid");
7220                 goto out;
7221         }
7222
7223         pid = fork();
7224         if (pid < 0) {
7225                 log_stderr("failure: fork");
7226                 goto out;
7227         }
7228         if (pid == 0) {
7229                 if (!switch_userns(userns_fd, 0, 0, false))
7230                         die("failure: switch_userns");
7231
7232                 /* Verify we can open it with the registered credentials. */
7233                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7234                                                       cred_id, false, NULL);
7235                 if (file1_fd < 0)
7236                         die("failure: io_uring_open_file");
7237
7238                 /* Verify we can open it with the registered credentials and as
7239                  * a link.
7240                  */
7241                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7242                                                       cred_id, true, NULL);
7243                 if (file1_fd < 0)
7244                         die("failure: io_uring_open_file");
7245
7246                 exit(EXIT_SUCCESS);
7247         }
7248         if (wait_for_pid(pid)) {
7249                 log_stderr("failure: wait_for_pid");
7250                 goto out;
7251         }
7252
7253         fret = 0;
7254         log_debug("Ran test");
7255 out:
7256         ret = io_uring_unregister_personality(ring, cred_id);
7257         if (ret)
7258                 log_stderr("failure: io_uring_unregister_personality");
7259
7260 out_unmap:
7261         munmap(ring, sizeof(struct io_uring));
7262
7263         safe_close(file1_fd);
7264         safe_close(userns_fd);
7265
7266         return fret;
7267 }
7268
7269 static int io_uring_idmapped(void)
7270 {
7271         int fret = -1;
7272         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7273         struct io_uring *ring;
7274         struct mount_attr attr = {
7275                 .attr_set = MOUNT_ATTR_IDMAP,
7276         };
7277         int cred_id, ret;
7278         pid_t pid;
7279
7280         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7281                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7282         if (!ring)
7283                 return log_errno(-1, "failure: io_uring_queue_init");
7284
7285         ret = io_uring_queue_init(8, ring, 0);
7286         if (ret) {
7287                 log_stderr("failure: io_uring_queue_init");
7288                 goto out_unmap;
7289         }
7290
7291         ret = io_uring_register_personality(ring);
7292         if (ret < 0) {
7293                 fret = 0;
7294                 goto out_unmap; /* personalities not supported */
7295         }
7296         cred_id = ret;
7297
7298         /* create file only owner can open */
7299         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7300         if (file1_fd < 0) {
7301                 log_stderr("failure: openat");
7302                 goto out;
7303         }
7304         if (fchown(file1_fd, 0, 0)) {
7305                 log_stderr("failure: fchown");
7306                 goto out;
7307         }
7308         if (fchmod(file1_fd, 0600)) {
7309                 log_stderr("failure: fchmod");
7310                 goto out;
7311         }
7312         safe_close(file1_fd);
7313
7314         /* Changing mount properties on a detached mount. */
7315         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7316         if (attr.userns_fd < 0)
7317                 return log_errno(-1, "failure: create user namespace");
7318
7319         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7320                                      AT_EMPTY_PATH |
7321                                      AT_NO_AUTOMOUNT |
7322                                      AT_SYMLINK_NOFOLLOW |
7323                                      OPEN_TREE_CLOEXEC |
7324                                      OPEN_TREE_CLONE);
7325         if (open_tree_fd < 0)
7326                 return log_errno(-1, "failure: create detached mount");
7327
7328         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7329                 return log_errno(-1, "failure: set mount attributes");
7330
7331         pid = fork();
7332         if (pid < 0) {
7333                 log_stderr("failure: fork");
7334                 goto out;
7335         }
7336         if (pid == 0) {
7337                 if (!switch_ids(10000, 10000))
7338                         die("failure: switch_ids");
7339
7340                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7341                                                       -1, false, NULL);
7342                 if (file1_fd < 0)
7343                         die("failure: io_uring_open_file");
7344
7345                 exit(EXIT_SUCCESS);
7346         }
7347         if (wait_for_pid(pid)) {
7348                 log_stderr("failure: wait_for_pid");
7349                 goto out;
7350         }
7351
7352         pid = fork();
7353         if (pid < 0) {
7354                 log_stderr("failure: fork");
7355                 goto out;
7356         }
7357         if (pid == 0) {
7358                 if (!switch_ids(10001, 10001))
7359                         die("failure: switch_ids");
7360
7361                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7362                                                       cred_id, false, NULL);
7363                 if (file1_fd < 0)
7364                         die("failure: io_uring_open_file");
7365
7366                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7367                                                       cred_id, true, NULL);
7368                 if (file1_fd < 0)
7369                         die("failure: io_uring_open_file");
7370
7371                 exit(EXIT_SUCCESS);
7372         }
7373         if (wait_for_pid(pid)) {
7374                 log_stderr("failure: wait_for_pid");
7375                 goto out;
7376         }
7377
7378         fret = 0;
7379         log_debug("Ran test");
7380 out:
7381         ret = io_uring_unregister_personality(ring, cred_id);
7382         if (ret)
7383                 log_stderr("failure: io_uring_unregister_personality");
7384
7385 out_unmap:
7386         munmap(ring, sizeof(struct io_uring));
7387
7388         safe_close(attr.userns_fd);
7389         safe_close(file1_fd);
7390         safe_close(open_tree_fd);
7391
7392         return fret;
7393 }
7394
7395 /*
7396  * Create an idmapped mount where the we leave the owner of the file unmapped.
7397  * In no circumstances, even with recorded credentials can it be allowed to
7398  * open the file.
7399  */
7400 static int io_uring_idmapped_unmapped(void)
7401 {
7402         int fret = -1;
7403         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7404         struct io_uring *ring;
7405         struct mount_attr attr = {
7406                 .attr_set = MOUNT_ATTR_IDMAP,
7407         };
7408         int cred_id, ret, ret_cqe;
7409         pid_t pid;
7410
7411         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7412                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7413         if (!ring)
7414                 return log_errno(-1, "failure: io_uring_queue_init");
7415
7416         ret = io_uring_queue_init(8, ring, 0);
7417         if (ret) {
7418                 log_stderr("failure: io_uring_queue_init");
7419                 goto out_unmap;
7420         }
7421
7422         ret = io_uring_register_personality(ring);
7423         if (ret < 0) {
7424                 fret = 0;
7425                 goto out_unmap; /* personalities not supported */
7426         }
7427         cred_id = ret;
7428
7429         /* create file only owner can open */
7430         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7431         if (file1_fd < 0) {
7432                 log_stderr("failure: openat");
7433                 goto out;
7434         }
7435         if (fchown(file1_fd, 0, 0)) {
7436                 log_stderr("failure: fchown");
7437                 goto out;
7438         }
7439         if (fchmod(file1_fd, 0600)) {
7440                 log_stderr("failure: fchmod");
7441                 goto out;
7442         }
7443         safe_close(file1_fd);
7444
7445         /* Changing mount properties on a detached mount. */
7446         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7447         if (attr.userns_fd < 0)
7448                 return log_errno(-1, "failure: create user namespace");
7449
7450         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7451                                      AT_EMPTY_PATH |
7452                                      AT_NO_AUTOMOUNT |
7453                                      AT_SYMLINK_NOFOLLOW |
7454                                      OPEN_TREE_CLOEXEC |
7455                                      OPEN_TREE_CLONE);
7456         if (open_tree_fd < 0)
7457                 return log_errno(-1, "failure: create detached mount");
7458
7459         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7460                 return log_errno(-1, "failure: set mount attributes");
7461
7462         pid = fork();
7463         if (pid < 0) {
7464                 log_stderr("failure: fork");
7465                 goto out;
7466         }
7467         if (pid == 0) {
7468                 if (!switch_ids(10000, 10000))
7469                         die("failure: switch_ids");
7470
7471                 ret_cqe = 0;
7472                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7473                                                       cred_id, false, &ret_cqe);
7474                 if (file1_fd >= 0)
7475                         die("failure: io_uring_open_file");
7476                 if (ret_cqe == 0)
7477                         die("failure: non-open() related io_uring_open_file failure");
7478                 if (ret_cqe != -EACCES)
7479                         die("failure: errno(%d)", abs(ret_cqe));
7480
7481                 ret_cqe = 0;
7482                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7483                                                       cred_id, true, &ret_cqe);
7484                 if (file1_fd >= 0)
7485                         die("failure: io_uring_open_file");
7486                 if (ret_cqe == 0)
7487                         die("failure: non-open() related io_uring_open_file failure");
7488                 if (ret_cqe != -EACCES)
7489                         die("failure: errno(%d)", abs(ret_cqe));
7490
7491                 exit(EXIT_SUCCESS);
7492         }
7493         if (wait_for_pid(pid)) {
7494                 log_stderr("failure: wait_for_pid");
7495                 goto out;
7496         }
7497
7498         fret = 0;
7499         log_debug("Ran test");
7500 out:
7501         ret = io_uring_unregister_personality(ring, cred_id);
7502         if (ret)
7503                 log_stderr("failure: io_uring_unregister_personality");
7504
7505 out_unmap:
7506         munmap(ring, sizeof(struct io_uring));
7507
7508         safe_close(attr.userns_fd);
7509         safe_close(file1_fd);
7510         safe_close(open_tree_fd);
7511
7512         return fret;
7513 }
7514
7515 static int io_uring_idmapped_userns(void)
7516 {
7517         int fret = -1;
7518         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7519         struct io_uring *ring;
7520         struct mount_attr attr = {
7521                 .attr_set = MOUNT_ATTR_IDMAP,
7522         };
7523         int cred_id, ret, ret_cqe;
7524         pid_t pid;
7525
7526         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7527                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7528         if (!ring)
7529                 return log_errno(-1, "failure: io_uring_queue_init");
7530
7531         ret = io_uring_queue_init(8, ring, 0);
7532         if (ret) {
7533                 log_stderr("failure: io_uring_queue_init");
7534                 goto out_unmap;
7535         }
7536
7537         ret = io_uring_register_personality(ring);
7538         if (ret < 0) {
7539                 fret = 0;
7540                 goto out_unmap; /* personalities not supported */
7541         }
7542         cred_id = ret;
7543
7544         /* create file only owner can open */
7545         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7546         if (file1_fd < 0) {
7547                 log_stderr("failure: openat");
7548                 goto out;
7549         }
7550         if (fchown(file1_fd, 0, 0)) {
7551                 log_stderr("failure: fchown");
7552                 goto out;
7553         }
7554         if (fchmod(file1_fd, 0600)) {
7555                 log_stderr("failure: fchmod");
7556                 goto out;
7557         }
7558         safe_close(file1_fd);
7559
7560         /* Changing mount properties on a detached mount. */
7561         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7562         if (attr.userns_fd < 0)
7563                 return log_errno(-1, "failure: create user namespace");
7564
7565         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7566                                      AT_EMPTY_PATH |
7567                                      AT_NO_AUTOMOUNT |
7568                                      AT_SYMLINK_NOFOLLOW |
7569                                      OPEN_TREE_CLOEXEC |
7570                                      OPEN_TREE_CLONE);
7571         if (open_tree_fd < 0)
7572                 return log_errno(-1, "failure: create detached mount");
7573
7574         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7575                 return log_errno(-1, "failure: set mount attributes");
7576
7577         pid = fork();
7578         if (pid < 0) {
7579                 log_stderr("failure: fork");
7580                 goto out;
7581         }
7582         if (pid == 0) {
7583                 if (!switch_userns(attr.userns_fd, 0, 0, false))
7584                         die("failure: switch_userns");
7585
7586                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7587                                                       -1, false, NULL);
7588                 if (file1_fd < 0)
7589                         die("failure: io_uring_open_file");
7590
7591                 exit(EXIT_SUCCESS);
7592         }
7593         if (wait_for_pid(pid)) {
7594                 log_stderr("failure: wait_for_pid");
7595                 goto out;
7596         }
7597
7598         pid = fork();
7599         if (pid < 0) {
7600                 log_stderr("failure: fork");
7601                 goto out;
7602         }
7603         if (pid == 0) {
7604                 if (!caps_supported()) {
7605                         log_debug("skip: capability library not installed");
7606                         exit(EXIT_SUCCESS);
7607                 }
7608
7609                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
7610                         die("failure: switch_userns");
7611
7612                 ret_cqe = 0;
7613                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7614                                                       -1, false, &ret_cqe);
7615                 if (file1_fd >= 0)
7616                         die("failure: io_uring_open_file");
7617                 if (ret_cqe == 0)
7618                         die("failure: non-open() related io_uring_open_file failure");
7619                 if (ret_cqe != -EACCES)
7620                         die("failure: errno(%d)", abs(ret_cqe));
7621
7622                 ret_cqe = 0;
7623                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7624                                                       -1, true, &ret_cqe);
7625                 if (file1_fd >= 0)
7626                         die("failure: io_uring_open_file");
7627                 if (ret_cqe == 0)
7628                         die("failure: non-open() related io_uring_open_file failure");
7629                 if (ret_cqe != -EACCES)
7630                         die("failure: errno(%d)", abs(ret_cqe));
7631
7632                 ret_cqe = 0;
7633                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7634                                                       -1, false, &ret_cqe);
7635                 if (file1_fd >= 0)
7636                         die("failure: io_uring_open_file");
7637                 if (ret_cqe == 0)
7638                         die("failure: non-open() related io_uring_open_file failure");
7639                 if (ret_cqe != -EACCES)
7640                         die("failure: errno(%d)", abs(ret_cqe));
7641
7642                 ret_cqe = 0;
7643                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7644                                                       -1, true, &ret_cqe);
7645                 if (file1_fd >= 0)
7646                         die("failure: io_uring_open_file");
7647                 if (ret_cqe == 0)
7648                         die("failure: non-open() related io_uring_open_file failure");
7649                 if (ret_cqe != -EACCES)
7650                         die("failure: errno(%d)", abs(ret_cqe));
7651
7652                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7653                                                       cred_id, false, NULL);
7654                 if (file1_fd < 0)
7655                         die("failure: io_uring_open_file");
7656
7657                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7658                                                       cred_id, true, NULL);
7659                 if (file1_fd < 0)
7660                         die("failure: io_uring_open_file");
7661
7662                 exit(EXIT_SUCCESS);
7663         }
7664         if (wait_for_pid(pid)) {
7665                 log_stderr("failure: wait_for_pid");
7666                 goto out;
7667         }
7668
7669         fret = 0;
7670         log_debug("Ran test");
7671 out:
7672         ret = io_uring_unregister_personality(ring, cred_id);
7673         if (ret)
7674                 log_stderr("failure: io_uring_unregister_personality");
7675
7676 out_unmap:
7677         munmap(ring, sizeof(struct io_uring));
7678
7679         safe_close(attr.userns_fd);
7680         safe_close(file1_fd);
7681         safe_close(open_tree_fd);
7682
7683         return fret;
7684 }
7685
7686 static int io_uring_idmapped_unmapped_userns(void)
7687 {
7688         int fret = -1;
7689         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7690         struct io_uring *ring;
7691         struct mount_attr attr = {
7692                 .attr_set = MOUNT_ATTR_IDMAP,
7693         };
7694         int cred_id, ret, ret_cqe;
7695         pid_t pid;
7696
7697         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7698                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7699         if (!ring)
7700                 return log_errno(-1, "failure: io_uring_queue_init");
7701
7702         ret = io_uring_queue_init(8, ring, 0);
7703         if (ret) {
7704                 log_stderr("failure: io_uring_queue_init");
7705                 goto out_unmap;
7706         }
7707
7708         ret = io_uring_register_personality(ring);
7709         if (ret < 0) {
7710                 fret = 0;
7711                 goto out_unmap; /* personalities not supported */
7712         }
7713         cred_id = ret;
7714
7715         /* create file only owner can open */
7716         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7717         if (file1_fd < 0) {
7718                 log_stderr("failure: openat");
7719                 goto out;
7720         }
7721         if (fchown(file1_fd, 0, 0)) {
7722                 log_stderr("failure: fchown");
7723                 goto out;
7724         }
7725         if (fchmod(file1_fd, 0600)) {
7726                 log_stderr("failure: fchmod");
7727                 goto out;
7728         }
7729         safe_close(file1_fd);
7730
7731         /* Changing mount properties on a detached mount. */
7732         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7733         if (attr.userns_fd < 0)
7734                 return log_errno(-1, "failure: create user namespace");
7735
7736         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7737                                      AT_EMPTY_PATH |
7738                                      AT_NO_AUTOMOUNT |
7739                                      AT_SYMLINK_NOFOLLOW |
7740                                      OPEN_TREE_CLOEXEC |
7741                                      OPEN_TREE_CLONE);
7742         if (open_tree_fd < 0)
7743                 return log_errno(-1, "failure: create detached mount");
7744
7745         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7746                 return log_errno(-1, "failure: set mount attributes");
7747
7748         pid = fork();
7749         if (pid < 0) {
7750                 log_stderr("failure: fork");
7751                 goto out;
7752         }
7753         if (pid == 0) {
7754                 if (!caps_supported()) {
7755                         log_debug("skip: capability library not installed");
7756                         exit(EXIT_SUCCESS);
7757                 }
7758
7759                 if (!switch_userns(attr.userns_fd, 10000, 10000, true))
7760                         die("failure: switch_ids");
7761
7762                 ret_cqe = 0;
7763                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7764                                                       cred_id, false, &ret_cqe);
7765                 if (file1_fd >= 0)
7766                         die("failure: io_uring_open_file");
7767                 if (ret_cqe == 0)
7768                         die("failure: non-open() related io_uring_open_file failure");
7769                 if (ret_cqe != -EACCES)
7770                         die("failure: errno(%d)", abs(ret_cqe));
7771
7772                 ret_cqe = 0;
7773                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7774                                                       cred_id, true, &ret_cqe);
7775                 if (file1_fd >= 0)
7776                         die("failure: io_uring_open_file");
7777                 if (ret_cqe == 0)
7778                         die("failure: non-open() related io_uring_open_file failure");
7779                 if (ret_cqe != -EACCES)
7780                         die("failure: errno(%d)", abs(ret_cqe));
7781
7782                 exit(EXIT_SUCCESS);
7783         }
7784         if (wait_for_pid(pid)) {
7785                 log_stderr("failure: wait_for_pid");
7786                 goto out;
7787         }
7788
7789         fret = 0;
7790         log_debug("Ran test");
7791 out:
7792         ret = io_uring_unregister_personality(ring, cred_id);
7793         if (ret)
7794                 log_stderr("failure: io_uring_unregister_personality");
7795
7796 out_unmap:
7797         munmap(ring, sizeof(struct io_uring));
7798
7799         safe_close(attr.userns_fd);
7800         safe_close(file1_fd);
7801         safe_close(open_tree_fd);
7802
7803         return fret;
7804 }
7805 #endif /* HAVE_LIBURING_H */
7806
7807 /* The following tests are concerned with setgid inheritance. These can be
7808  * filesystem type specific. For xfs, if a new file or directory is created
7809  * within a setgid directory and irix_sgid_inhiert is set then inherit the
7810  * setgid bit if the caller is in the group of the directory.
7811  */
7812 static int setgid_create(void)
7813 {
7814         int fret = -1;
7815         int file1_fd = -EBADF;
7816         pid_t pid;
7817
7818         if (!caps_supported())
7819                 return 0;
7820
7821         if (fchmod(t_dir1_fd, S_IRUSR |
7822                               S_IWUSR |
7823                               S_IRGRP |
7824                               S_IWGRP |
7825                               S_IROTH |
7826                               S_IWOTH |
7827                               S_IXUSR |
7828                               S_IXGRP |
7829                               S_IXOTH |
7830                               S_ISGID), 0) {
7831                 log_stderr("failure: fchmod");
7832                 goto out;
7833         }
7834
7835         /* Verify that the setgid bit got raised. */
7836         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7837                 log_stderr("failure: is_setgid");
7838                 goto out;
7839         }
7840
7841         pid = fork();
7842         if (pid < 0) {
7843                 log_stderr("failure: fork");
7844                 goto out;
7845         }
7846         if (pid == 0) {
7847                 /* create regular file via open() */
7848                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7849                 if (file1_fd < 0)
7850                         die("failure: create");
7851
7852                 /* We're capable_wrt_inode_uidgid() and also our fsgid matches
7853                  * the directories gid.
7854                  */
7855                 if (!is_setgid(t_dir1_fd, FILE1, 0))
7856                         die("failure: is_setgid");
7857
7858                 /* create directory */
7859                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7860                         die("failure: create");
7861
7862                 /* Directories always inherit the setgid bit. */
7863                 if (!is_setgid(t_dir1_fd, DIR1, 0))
7864                         die("failure: is_setgid");
7865
7866                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0))
7867                         die("failure: check ownership");
7868
7869                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0))
7870                         die("failure: check ownership");
7871
7872                 if (unlinkat(t_dir1_fd, FILE1, 0))
7873                         die("failure: delete");
7874
7875                 if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR))
7876                         die("failure: delete");
7877
7878                 exit(EXIT_SUCCESS);
7879         }
7880         if (wait_for_pid(pid))
7881                 goto out;
7882
7883         pid = fork();
7884         if (pid < 0) {
7885                 log_stderr("failure: fork");
7886                 goto out;
7887         }
7888         if (pid == 0) {
7889                 if (!switch_ids(0, 10000))
7890                         die("failure: switch_ids");
7891
7892                 if (!caps_down())
7893                         die("failure: caps_down");
7894
7895                 /* create regular file via open() */
7896                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7897                 if (file1_fd < 0)
7898                         die("failure: create");
7899
7900                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
7901                  * bit needs to be stripped.
7902                  */
7903                 if (is_setgid(t_dir1_fd, FILE1, 0))
7904                         die("failure: is_setgid");
7905
7906                 /* create directory */
7907                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7908                         die("failure: create");
7909
7910                 if (xfs_irix_sgid_inherit_enabled()) {
7911                         /* We're not in_group_p(). */
7912                         if (is_setgid(t_dir1_fd, DIR1, 0))
7913                                 die("failure: is_setgid");
7914                 } else {
7915                         /* Directories always inherit the setgid bit. */
7916                         if (!is_setgid(t_dir1_fd, DIR1, 0))
7917                                 die("failure: is_setgid");
7918                 }
7919
7920                 /*
7921                  * In setgid directories newly created files always inherit the
7922                  * gid from the parent directory. Verify that the file is owned
7923                  * by gid 0, not by gid 10000.
7924                  */
7925                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0))
7926                         die("failure: check ownership");
7927
7928                 /*
7929                  * In setgid directories newly created directories always
7930                  * inherit the gid from the parent directory. Verify that the
7931                  * directory is owned by gid 0, not by gid 10000.
7932                  */
7933                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0))
7934                         die("failure: check ownership");
7935
7936                 exit(EXIT_SUCCESS);
7937         }
7938         if (wait_for_pid(pid))
7939                 goto out;
7940
7941         fret = 0;
7942         log_debug("Ran test");
7943 out:
7944         safe_close(file1_fd);
7945
7946         return fret;
7947 }
7948
7949 static int setgid_create_idmapped(void)
7950 {
7951         int fret = -1;
7952         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7953         struct mount_attr attr = {
7954                 .attr_set = MOUNT_ATTR_IDMAP,
7955         };
7956         pid_t pid;
7957
7958         if (!caps_supported())
7959                 return 0;
7960
7961         if (fchmod(t_dir1_fd, S_IRUSR |
7962                               S_IWUSR |
7963                               S_IRGRP |
7964                               S_IWGRP |
7965                               S_IROTH |
7966                               S_IWOTH |
7967                               S_IXUSR |
7968                               S_IXGRP |
7969                               S_IXOTH |
7970                               S_ISGID), 0) {
7971                 log_stderr("failure: fchmod");
7972                 goto out;
7973         }
7974
7975         /* Verify that the sid bits got raised. */
7976         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7977                 log_stderr("failure: is_setgid");
7978                 goto out;
7979         }
7980
7981         /* Changing mount properties on a detached mount. */
7982         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7983         if (attr.userns_fd < 0) {
7984                 log_stderr("failure: get_userns_fd");
7985                 goto out;
7986         }
7987
7988         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7989                                      AT_EMPTY_PATH |
7990                                      AT_NO_AUTOMOUNT |
7991                                      AT_SYMLINK_NOFOLLOW |
7992                                      OPEN_TREE_CLOEXEC |
7993                                      OPEN_TREE_CLONE);
7994         if (open_tree_fd < 0) {
7995                 log_stderr("failure: sys_open_tree");
7996                 goto out;
7997         }
7998
7999         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8000                 log_stderr("failure: sys_mount_setattr");
8001                 goto out;
8002         }
8003
8004         pid = fork();
8005         if (pid < 0) {
8006                 log_stderr("failure: fork");
8007                 goto out;
8008         }
8009         if (pid == 0) {
8010                 if (!switch_ids(10000, 11000))
8011                         die("failure: switch fsids");
8012
8013                 /* create regular file via open() */
8014                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8015                 if (file1_fd < 0)
8016                         die("failure: create");
8017
8018                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8019                  * bit needs to be stripped.
8020                  */
8021                 if (is_setgid(open_tree_fd, FILE1, 0))
8022                         die("failure: is_setgid");
8023
8024                 /* create directory */
8025                 if (mkdirat(open_tree_fd, DIR1, 0000))
8026                         die("failure: create");
8027
8028                 if (xfs_irix_sgid_inherit_enabled()) {
8029                         /* We're not in_group_p(). */
8030                         if (is_setgid(open_tree_fd, DIR1, 0))
8031                                 die("failure: is_setgid");
8032                 } else {
8033                         /* Directories always inherit the setgid bit. */
8034                         if (!is_setgid(open_tree_fd, DIR1, 0))
8035                                 die("failure: is_setgid");
8036                 }
8037
8038                 /*
8039                  * In setgid directories newly created files always inherit the
8040                  * gid from the parent directory. Verify that the file is owned
8041                  * by gid 10000, not by gid 11000.
8042                  */
8043                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8044                         die("failure: check ownership");
8045
8046                 /*
8047                  * In setgid directories newly created directories always
8048                  * inherit the gid from the parent directory. Verify that the
8049                  * directory is owned by gid 10000, not by gid 11000.
8050                  */
8051                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000))
8052                         die("failure: check ownership");
8053
8054                 exit(EXIT_SUCCESS);
8055         }
8056         if (wait_for_pid(pid))
8057                 goto out;
8058
8059         fret = 0;
8060         log_debug("Ran test");
8061 out:
8062         safe_close(attr.userns_fd);
8063         safe_close(file1_fd);
8064         safe_close(open_tree_fd);
8065
8066         return fret;
8067 }
8068
8069 static int setgid_create_idmapped_in_userns(void)
8070 {
8071         int fret = -1;
8072         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8073         struct mount_attr attr = {
8074                 .attr_set = MOUNT_ATTR_IDMAP,
8075         };
8076         pid_t pid;
8077
8078         if (!caps_supported())
8079                 return 0;
8080
8081         if (fchmod(t_dir1_fd, S_IRUSR |
8082                               S_IWUSR |
8083                               S_IRGRP |
8084                               S_IWGRP |
8085                               S_IROTH |
8086                               S_IWOTH |
8087                               S_IXUSR |
8088                               S_IXGRP |
8089                               S_IXOTH |
8090                               S_ISGID), 0) {
8091                 log_stderr("failure: fchmod");
8092                 goto out;
8093         }
8094
8095         /* Verify that the sid bits got raised. */
8096         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
8097                 log_stderr("failure: is_setgid");
8098                 goto out;
8099         }
8100
8101         /* Changing mount properties on a detached mount. */
8102         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8103         if (attr.userns_fd < 0) {
8104                 log_stderr("failure: get_userns_fd");
8105                 goto out;
8106         }
8107
8108         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8109                                      AT_EMPTY_PATH |
8110                                      AT_NO_AUTOMOUNT |
8111                                      AT_SYMLINK_NOFOLLOW |
8112                                      OPEN_TREE_CLOEXEC |
8113                                      OPEN_TREE_CLONE);
8114         if (open_tree_fd < 0) {
8115                 log_stderr("failure: sys_open_tree");
8116                 goto out;
8117         }
8118
8119         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8120                 log_stderr("failure: sys_mount_setattr");
8121                 goto out;
8122         }
8123
8124         pid = fork();
8125         if (pid < 0) {
8126                 log_stderr("failure: fork");
8127                 goto out;
8128         }
8129         if (pid == 0) {
8130                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8131                         die("failure: switch_userns");
8132
8133                 /* create regular file via open() */
8134                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8135                 if (file1_fd < 0)
8136                         die("failure: create");
8137
8138                 /* We're in_group_p() and capable_wrt_inode_uidgid() so setgid
8139                  * bit needs to be set.
8140                  */
8141                 if (!is_setgid(open_tree_fd, FILE1, 0))
8142                         die("failure: is_setgid");
8143
8144                 /* create directory */
8145                 if (mkdirat(open_tree_fd, DIR1, 0000))
8146                         die("failure: create");
8147
8148                 /* Directories always inherit the setgid bit. */
8149                 if (!is_setgid(open_tree_fd, DIR1, 0))
8150                         die("failure: is_setgid");
8151
8152                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8153                         die("failure: check ownership");
8154
8155                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8156                         die("failure: check ownership");
8157
8158                 if (unlinkat(open_tree_fd, FILE1, 0))
8159                         die("failure: delete");
8160
8161                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8162                         die("failure: delete");
8163
8164                 exit(EXIT_SUCCESS);
8165         }
8166         if (wait_for_pid(pid))
8167                 goto out;
8168
8169         /*
8170          * Below we verify that setgid inheritance for a newly created file or
8171          * directory works correctly. As part of this we need to verify that
8172          * newly created files or directories inherit their gid from their
8173          * parent directory. So we change the parent directorie's gid to 1000
8174          * and create a file with fs{g,u}id 0 and verify that the newly created
8175          * file and directory inherit gid 1000, not 0.
8176          */
8177         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8178                 log_stderr("failure: fchownat");
8179                 goto out;
8180         }
8181
8182         pid = fork();
8183         if (pid < 0) {
8184                 log_stderr("failure: fork");
8185                 goto out;
8186         }
8187         if (pid == 0) {
8188                 if (!caps_supported()) {
8189                         log_debug("skip: capability library not installed");
8190                         exit(EXIT_SUCCESS);
8191                 }
8192
8193                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8194                         die("failure: switch_userns");
8195
8196                 /* create regular file via open() */
8197                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8198                 if (file1_fd < 0)
8199                         die("failure: create");
8200
8201                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8202                  * bit needs to be stripped.
8203                  */
8204                 if (is_setgid(open_tree_fd, FILE1, 0))
8205                         die("failure: is_setgid");
8206
8207                 /* create directory */
8208                 if (mkdirat(open_tree_fd, DIR1, 0000))
8209                         die("failure: create");
8210
8211                 if (xfs_irix_sgid_inherit_enabled()) {
8212                         /* We're not in_group_p(). */
8213                         if (is_setgid(open_tree_fd, DIR1, 0))
8214                                 die("failure: is_setgid");
8215                 } else {
8216                         /* Directories always inherit the setgid bit. */
8217                         if (!is_setgid(open_tree_fd, DIR1, 0))
8218                                 die("failure: is_setgid");
8219                 }
8220
8221                 /*
8222                  * In setgid directories newly created files always inherit the
8223                  * gid from the parent directory. Verify that the file is owned
8224                  * by gid 1000, not by gid 0.
8225                  */
8226                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8227                         die("failure: check ownership");
8228
8229                 /*
8230                  * In setgid directories newly created directories always
8231                  * inherit the gid from the parent directory. Verify that the
8232                  * directory is owned by gid 1000, not by gid 0.
8233                  */
8234                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 1000))
8235                         die("failure: check ownership");
8236
8237                 if (unlinkat(open_tree_fd, FILE1, 0))
8238                         die("failure: delete");
8239
8240                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8241                         die("failure: delete");
8242
8243                 exit(EXIT_SUCCESS);
8244         }
8245         if (wait_for_pid(pid))
8246                 goto out;
8247
8248         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8249                 log_stderr("failure: fchownat");
8250                 goto out;
8251         }
8252
8253         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8254                 log_stderr("failure: fchownat");
8255                 goto out;
8256         }
8257
8258         pid = fork();
8259         if (pid < 0) {
8260                 log_stderr("failure: fork");
8261                 goto out;
8262         }
8263         if (pid == 0) {
8264                 if (!caps_supported()) {
8265                         log_debug("skip: capability library not installed");
8266                         exit(EXIT_SUCCESS);
8267                 }
8268
8269                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8270                         die("failure: switch_userns");
8271
8272                 /* create regular file via open() */
8273                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8274                 if (file1_fd < 0)
8275                         die("failure: create");
8276
8277                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8278                  * bit needs to be stripped.
8279                  */
8280                 if (is_setgid(open_tree_fd, FILE1, 0))
8281                         die("failure: is_setgid");
8282
8283                 /* create directory */
8284                 if (mkdirat(open_tree_fd, DIR1, 0000))
8285                         die("failure: create");
8286
8287                 /* Directories always inherit the setgid bit. */
8288                 if (xfs_irix_sgid_inherit_enabled()) {
8289                         /* We're not in_group_p(). */
8290                         if (is_setgid(open_tree_fd, DIR1, 0))
8291                                 die("failure: is_setgid");
8292                 } else {
8293                         /* Directories always inherit the setgid bit. */
8294                         if (!is_setgid(open_tree_fd, DIR1, 0))
8295                                 die("failure: is_setgid");
8296                 }
8297
8298                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8299                         die("failure: check ownership");
8300
8301                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8302                         die("failure: check ownership");
8303
8304                 exit(EXIT_SUCCESS);
8305         }
8306         if (wait_for_pid(pid))
8307                 goto out;
8308
8309         fret = 0;
8310         log_debug("Ran test");
8311 out:
8312         safe_close(attr.userns_fd);
8313         safe_close(file1_fd);
8314         safe_close(open_tree_fd);
8315
8316         return fret;
8317 }
8318
8319 #define PTR_TO_INT(p) ((int)((intptr_t)(p)))
8320 #define INT_TO_PTR(u) ((void *)((intptr_t)(u)))
8321
8322 static void *idmapped_mount_create_cb(void *data)
8323 {
8324         int fret = EXIT_FAILURE, open_tree_fd = PTR_TO_INT(data);
8325         struct mount_attr attr = {
8326                 .attr_set = MOUNT_ATTR_IDMAP,
8327         };
8328
8329         /* Changing mount properties on a detached mount. */
8330         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8331         if (attr.userns_fd < 0) {
8332                 log_stderr("failure: get_userns_fd");
8333                 goto out;
8334         }
8335
8336         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8337                 log_stderr("failure: sys_mount_setattr");
8338                 goto out;
8339         }
8340
8341         fret = EXIT_SUCCESS;
8342
8343 out:
8344         safe_close(attr.userns_fd);
8345         pthread_exit(INT_TO_PTR(fret));
8346 }
8347
8348 /* This tries to verify that we never see an inconistent ownership on-disk and
8349  * can't write invalid ids to disk. To do this we create a race between
8350  * idmapping a mount and creating files on it.
8351  * Note, while it is perfectly fine to see overflowuid and overflowgid as owner
8352  * if we create files through the open_tree_fd before the mount is idmapped but
8353  * look at the files after the mount has been idmapped in this test it can never
8354  * be the case that we see overflowuid and overflowgid when we access the file
8355  * through a non-idmapped mount (in the initial user namespace).
8356  */
8357 static void *idmapped_mount_operations_cb(void *data)
8358 {
8359         int file1_fd = -EBADF, file2_fd = -EBADF, dir1_fd = -EBADF,
8360             dir1_fd2 = -EBADF, fret = EXIT_FAILURE,
8361             open_tree_fd = PTR_TO_INT(data);
8362
8363         if (!switch_fsids(10000, 10000)) {
8364                 log_stderr("failure: switch fsids");
8365                 goto out;
8366         }
8367
8368         file1_fd = openat(open_tree_fd, FILE1,
8369                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8370         if (file1_fd < 0) {
8371                 log_stderr("failure: openat");
8372                 goto out;
8373         }
8374
8375         file2_fd = openat(open_tree_fd, FILE2,
8376                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8377         if (file2_fd < 0) {
8378                 log_stderr("failure: openat");
8379                 goto out;
8380         }
8381
8382         if (mkdirat(open_tree_fd, DIR1, 0777)) {
8383                 log_stderr("failure: mkdirat");
8384                 goto out;
8385         }
8386
8387         dir1_fd = openat(open_tree_fd, DIR1,
8388                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8389         if (dir1_fd < 0) {
8390                 log_stderr("failure: openat");
8391                 goto out;
8392         }
8393
8394         if (!__expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0, false) &&
8395             !__expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000, false) &&
8396             !__expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid, false)) {
8397                 log_stderr("failure: expected_uid_gid");
8398                 goto out;
8399         }
8400
8401         if (!__expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0, false) &&
8402             !__expected_uid_gid(open_tree_fd, FILE2, 0, 10000, 10000, false) &&
8403             !__expected_uid_gid(open_tree_fd, FILE2, 0, t_overflowuid, t_overflowgid, false)) {
8404                 log_stderr("failure: expected_uid_gid");
8405                 goto out;
8406         }
8407
8408         if (!__expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0, false) &&
8409             !__expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000, false) &&
8410             !__expected_uid_gid(open_tree_fd, DIR1, 0, t_overflowuid, t_overflowgid, false)) {
8411                 log_stderr("failure: expected_uid_gid");
8412                 goto out;
8413         }
8414
8415         if (!__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 0, 0, false) &&
8416             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 10000, 10000, false) &&
8417             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, t_overflowuid, t_overflowgid, false)) {
8418                 log_stderr("failure: expected_uid_gid");
8419                 goto out;
8420         }
8421
8422         dir1_fd2 = openat(t_dir1_fd, DIR1,
8423                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8424         if (dir1_fd2 < 0) {
8425                 log_stderr("failure: openat");
8426                 goto out;
8427         }
8428
8429         if (!__expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0, false) &&
8430             !__expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000, false)) {
8431                 log_stderr("failure: expected_uid_gid");
8432                 goto out;
8433         }
8434
8435         if (!__expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0, false) &&
8436             !__expected_uid_gid(t_dir1_fd, FILE2, 0, 10000, 10000, false)) {
8437                 log_stderr("failure: expected_uid_gid");
8438                 goto out;
8439         }
8440
8441         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8442             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8443                 log_stderr("failure: expected_uid_gid");
8444                 goto out;
8445         }
8446
8447         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8448             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8449                 log_stderr("failure: expected_uid_gid");
8450                 goto out;
8451         }
8452
8453         if (!__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 0, 0, false) &&
8454             !__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 10000, 10000, false)) {
8455                 log_stderr("failure: expected_uid_gid");
8456                 goto out;
8457         }
8458
8459         fret = EXIT_SUCCESS;
8460
8461 out:
8462         safe_close(file1_fd);
8463         safe_close(file2_fd);
8464         safe_close(dir1_fd);
8465         safe_close(dir1_fd2);
8466
8467         pthread_exit(INT_TO_PTR(fret));
8468 }
8469
8470 static int threaded_idmapped_mount_interactions(void)
8471 {
8472         int i;
8473         int fret = -1;
8474         pid_t pid;
8475         pthread_attr_t thread_attr;
8476         pthread_t threads[2];
8477
8478         pthread_attr_init(&thread_attr);
8479
8480         for (i = 0; i < 1000; i++) {
8481                 int ret1 = 0, ret2 = 0, tret1 = 0, tret2 = 0;
8482
8483                 pid = fork();
8484                 if (pid < 0) {
8485                         log_stderr("failure: fork");
8486                         goto out;
8487                 }
8488                 if (pid == 0) {
8489                         int open_tree_fd = -EBADF;
8490
8491                         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8492                                                      AT_EMPTY_PATH |
8493                                                      AT_NO_AUTOMOUNT |
8494                                                      AT_SYMLINK_NOFOLLOW |
8495                                                      OPEN_TREE_CLOEXEC |
8496                                                      OPEN_TREE_CLONE);
8497                         if (open_tree_fd < 0)
8498                                 die("failure: sys_open_tree");
8499
8500                         if (pthread_create(&threads[0], &thread_attr,
8501                                            idmapped_mount_create_cb,
8502                                            INT_TO_PTR(open_tree_fd)))
8503                                 die("failure: pthread_create");
8504
8505                         if (pthread_create(&threads[1], &thread_attr,
8506                                            idmapped_mount_operations_cb,
8507                                            INT_TO_PTR(open_tree_fd)))
8508                                 die("failure: pthread_create");
8509
8510                         ret1 = pthread_join(threads[0], INT_TO_PTR(tret1));
8511                         ret2 = pthread_join(threads[1], INT_TO_PTR(tret2));
8512
8513                         if (ret1) {
8514                                 errno = ret1;
8515                                 die("failure: pthread_join");
8516                         }
8517
8518                         if (ret2) {
8519                                 errno = ret2;
8520                                 die("failure: pthread_join");
8521                         }
8522
8523                         if (tret1 || tret2)
8524                                 exit(EXIT_FAILURE);
8525
8526                         exit(EXIT_SUCCESS);
8527
8528                 }
8529
8530                 if (wait_for_pid(pid)) {
8531                         log_stderr("failure: iteration %d", i);
8532                         goto out;
8533                 }
8534
8535                 rm_r(t_dir1_fd, ".");
8536
8537         }
8538
8539         fret = 0;
8540         log_debug("Ran test");
8541
8542 out:
8543         return fret;
8544 }
8545
8546 static int setattr_truncate(void)
8547 {
8548         int fret = -1;
8549         int file1_fd = -EBADF;
8550
8551         /* create regular file via open() */
8552         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8553         if (file1_fd < 0) {
8554                 log_stderr("failure: create");
8555                 goto out;
8556         }
8557
8558         if (ftruncate(file1_fd, 10000)) {
8559                 log_stderr("failure: ftruncate");
8560                 goto out;
8561         }
8562
8563         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8564                 log_stderr("failure: check ownership");
8565                 goto out;
8566         }
8567
8568         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 10000)) {
8569                 log_stderr("failure: expected_file_size");
8570                 goto out;
8571         }
8572
8573         if (ftruncate(file1_fd, 0)) {
8574                 log_stderr("failure: ftruncate");
8575                 goto out;
8576         }
8577
8578         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8579                 log_stderr("failure: check ownership");
8580                 goto out;
8581         }
8582
8583         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 0)) {
8584                 log_stderr("failure: expected_file_size");
8585                 goto out;
8586         }
8587
8588         if (unlinkat(t_dir1_fd, FILE1, 0)) {
8589                 log_stderr("failure: remove");
8590                 goto out;
8591         }
8592
8593         fret = 0;
8594         log_debug("Ran test");
8595 out:
8596         safe_close(file1_fd);
8597
8598         return fret;
8599 }
8600
8601 static int setattr_truncate_idmapped(void)
8602 {
8603         int fret = -1;
8604         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8605         pid_t pid;
8606         struct mount_attr attr = {
8607                 .attr_set = MOUNT_ATTR_IDMAP,
8608         };
8609
8610         /* Changing mount properties on a detached mount. */
8611         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8612         if (attr.userns_fd < 0) {
8613                 log_stderr("failure: get_userns_fd");
8614                 goto out;
8615         }
8616
8617         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8618                                      AT_EMPTY_PATH |
8619                                      AT_NO_AUTOMOUNT |
8620                                      AT_SYMLINK_NOFOLLOW |
8621                                      OPEN_TREE_CLOEXEC |
8622                                      OPEN_TREE_CLONE);
8623         if (open_tree_fd < 0) {
8624                 log_stderr("failure: sys_open_tree");
8625                 goto out;
8626         }
8627
8628         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8629                 log_stderr("failure: sys_mount_setattr");
8630                 goto out;
8631         }
8632
8633         pid = fork();
8634         if (pid < 0) {
8635                 log_stderr("failure: fork");
8636                 goto out;
8637         }
8638         if (pid == 0) {
8639                 if (!switch_ids(10000, 10000))
8640                         die("failure: switch_ids");
8641
8642                 /* create regular file via open() */
8643                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8644                 if (file1_fd < 0)
8645                         die("failure: create");
8646
8647                 if (ftruncate(file1_fd, 10000))
8648                         die("failure: ftruncate");
8649
8650                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8651                         die("failure: check ownership");
8652
8653                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8654                         die("failure: expected_file_size");
8655
8656                 if (ftruncate(file1_fd, 0))
8657                         die("failure: ftruncate");
8658
8659                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8660                         die("failure: check ownership");
8661
8662                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8663                         die("failure: expected_file_size");
8664
8665                 exit(EXIT_SUCCESS);
8666         }
8667         if (wait_for_pid(pid))
8668                 goto out;
8669
8670         pid = fork();
8671         if (pid < 0) {
8672                 log_stderr("failure: fork");
8673                 goto out;
8674         }
8675         if (pid == 0) {
8676                 int file1_fd2 = -EBADF;
8677
8678                 /* create regular file via open() */
8679                 file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8680                 if (file1_fd2 < 0)
8681                         die("failure: create");
8682
8683                 if (ftruncate(file1_fd2, 10000))
8684                         die("failure: ftruncate");
8685
8686                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8687                         die("failure: check ownership");
8688
8689                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8690                         die("failure: expected_file_size");
8691
8692                 if (ftruncate(file1_fd2, 0))
8693                         die("failure: ftruncate");
8694
8695                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8696                         die("failure: check ownership");
8697
8698                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8699                         die("failure: expected_file_size");
8700
8701                 exit(EXIT_SUCCESS);
8702         }
8703         if (wait_for_pid(pid))
8704                 goto out;
8705
8706         fret = 0;
8707         log_debug("Ran test");
8708 out:
8709         safe_close(file1_fd);
8710         safe_close(open_tree_fd);
8711
8712         return fret;
8713 }
8714
8715 static int setattr_truncate_idmapped_in_userns(void)
8716 {
8717         int fret = -1;
8718         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8719         struct mount_attr attr = {
8720                 .attr_set = MOUNT_ATTR_IDMAP,
8721         };
8722         pid_t pid;
8723
8724         /* Changing mount properties on a detached mount. */
8725         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8726         if (attr.userns_fd < 0) {
8727                 log_stderr("failure: get_userns_fd");
8728                 goto out;
8729         }
8730
8731         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8732                                      AT_EMPTY_PATH |
8733                                      AT_NO_AUTOMOUNT |
8734                                      AT_SYMLINK_NOFOLLOW |
8735                                      OPEN_TREE_CLOEXEC |
8736                                      OPEN_TREE_CLONE);
8737         if (open_tree_fd < 0) {
8738                 log_stderr("failure: sys_open_tree");
8739                 goto out;
8740         }
8741
8742         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8743                 log_stderr("failure: sys_mount_setattr");
8744                 goto out;
8745         }
8746
8747         pid = fork();
8748         if (pid < 0) {
8749                 log_stderr("failure: fork");
8750                 goto out;
8751         }
8752         if (pid == 0) {
8753                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8754                         die("failure: switch_userns");
8755
8756                 /* create regular file via open() */
8757                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8758                 if (file1_fd < 0)
8759                         die("failure: create");
8760
8761                 if (ftruncate(file1_fd, 10000))
8762                         die("failure: ftruncate");
8763
8764                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8765                         die("failure: check ownership");
8766
8767                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8768                         die("failure: expected_file_size");
8769
8770                 if (ftruncate(file1_fd, 0))
8771                         die("failure: ftruncate");
8772
8773                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8774                         die("failure: check ownership");
8775
8776                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8777                         die("failure: expected_file_size");
8778
8779                 if (unlinkat(open_tree_fd, FILE1, 0))
8780                         die("failure: delete");
8781
8782                 exit(EXIT_SUCCESS);
8783         }
8784         if (wait_for_pid(pid))
8785                 goto out;
8786
8787         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8788                 log_stderr("failure: fchownat");
8789                 goto out;
8790         }
8791
8792         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8793                 log_stderr("failure: fchownat");
8794                 goto out;
8795         }
8796
8797         pid = fork();
8798         if (pid < 0) {
8799                 log_stderr("failure: fork");
8800                 goto out;
8801         }
8802         if (pid == 0) {
8803                 if (!caps_supported()) {
8804                         log_debug("skip: capability library not installed");
8805                         exit(EXIT_SUCCESS);
8806                 }
8807
8808                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8809                         die("failure: switch_userns");
8810
8811                 /* create regular file via open() */
8812                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8813                 if (file1_fd < 0)
8814                         die("failure: create");
8815
8816                 if (ftruncate(file1_fd, 10000))
8817                         die("failure: ftruncate");
8818
8819                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8820                         die("failure: check ownership");
8821
8822                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8823                         die("failure: expected_file_size");
8824
8825                 if (ftruncate(file1_fd, 0))
8826                         die("failure: ftruncate");
8827
8828                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8829                         die("failure: check ownership");
8830
8831                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8832                         die("failure: expected_file_size");
8833
8834                 if (unlinkat(open_tree_fd, FILE1, 0))
8835                         die("failure: delete");
8836
8837                 exit(EXIT_SUCCESS);
8838         }
8839         if (wait_for_pid(pid))
8840                 goto out;
8841
8842         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8843                 log_stderr("failure: fchownat");
8844                 goto out;
8845         }
8846
8847         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8848                 log_stderr("failure: fchownat");
8849                 goto out;
8850         }
8851
8852         pid = fork();
8853         if (pid < 0) {
8854                 log_stderr("failure: fork");
8855                 goto out;
8856         }
8857         if (pid == 0) {
8858                 if (!caps_supported()) {
8859                         log_debug("skip: capability library not installed");
8860                         exit(EXIT_SUCCESS);
8861                 }
8862
8863                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8864                         die("failure: switch_userns");
8865
8866                 /* create regular file via open() */
8867                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8868                 if (file1_fd < 0)
8869                         die("failure: create");
8870
8871                 if (ftruncate(file1_fd, 10000))
8872                         die("failure: ftruncate");
8873
8874                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8875                         die("failure: check ownership");
8876
8877                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8878                         die("failure: expected_file_size");
8879
8880                 if (ftruncate(file1_fd, 0))
8881                         die("failure: ftruncate");
8882
8883                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8884                         die("failure: check ownership");
8885
8886                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8887                         die("failure: expected_file_size");
8888
8889                 if (unlinkat(open_tree_fd, FILE1, 0))
8890                         die("failure: delete");
8891
8892                 exit(EXIT_SUCCESS);
8893         }
8894         if (wait_for_pid(pid))
8895                 goto out;
8896
8897         fret = 0;
8898         log_debug("Ran test");
8899 out:
8900         safe_close(attr.userns_fd);
8901         safe_close(file1_fd);
8902         safe_close(open_tree_fd);
8903
8904         return fret;
8905 }
8906
8907 static int nested_userns(void)
8908 {
8909         int fret = -1;
8910         int ret;
8911         pid_t pid;
8912         unsigned int id;
8913         struct list *it, *next;
8914         struct userns_hierarchy hierarchy[] = {
8915                 { .level = 1, .fd_userns = -EBADF, },
8916                 { .level = 2, .fd_userns = -EBADF, },
8917                 { .level = 3, .fd_userns = -EBADF, },
8918                 { .level = 4, .fd_userns = -EBADF, },
8919                 /* Dummy entry that marks the end. */
8920                 { .level = MAX_USERNS_LEVEL, .fd_userns = -EBADF, },
8921         };
8922         struct mount_attr attr_level1 = {
8923                 .attr_set       = MOUNT_ATTR_IDMAP,
8924                 .userns_fd      = -EBADF,
8925         };
8926         struct mount_attr attr_level2 = {
8927                 .attr_set       = MOUNT_ATTR_IDMAP,
8928                 .userns_fd      = -EBADF,
8929         };
8930         struct mount_attr attr_level3 = {
8931                 .attr_set       = MOUNT_ATTR_IDMAP,
8932                 .userns_fd      = -EBADF,
8933         };
8934         struct mount_attr attr_level4 = {
8935                 .attr_set       = MOUNT_ATTR_IDMAP,
8936                 .userns_fd      = -EBADF,
8937         };
8938         int fd_dir1 = -EBADF,
8939             fd_open_tree_level1 = -EBADF,
8940             fd_open_tree_level2 = -EBADF,
8941             fd_open_tree_level3 = -EBADF,
8942             fd_open_tree_level4 = -EBADF;
8943         const unsigned int id_file_range = 10000;
8944
8945         list_init(&hierarchy[0].id_map);
8946         list_init(&hierarchy[1].id_map);
8947         list_init(&hierarchy[2].id_map);
8948         list_init(&hierarchy[3].id_map);
8949
8950         /*
8951          * Give a large map to the outermost user namespace so we can create
8952          * comfortable nested maps.
8953          */
8954         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_UID);
8955         if (ret) {
8956                 log_stderr("failure: adding uidmap for userns at level 1");
8957                 goto out;
8958         }
8959
8960         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_GID);
8961         if (ret) {
8962                 log_stderr("failure: adding gidmap for userns at level 1");
8963                 goto out;
8964         }
8965
8966         /* This is uid:0->2000000:100000000 in init userns. */
8967         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_UID);
8968         if (ret) {
8969                 log_stderr("failure: adding uidmap for userns at level 2");
8970                 goto out;
8971         }
8972
8973         /* This is gid:0->2000000:100000000 in init userns. */
8974         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_GID);
8975         if (ret) {
8976                 log_stderr("failure: adding gidmap for userns at level 2");
8977                 goto out;
8978         }
8979
8980         /* This is uid:0->3000000:999 in init userns. */
8981         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_UID);
8982         if (ret) {
8983                 log_stderr("failure: adding uidmap for userns at level 3");
8984                 goto out;
8985         }
8986
8987         /* This is gid:0->3000000:999 in the init userns. */
8988         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_GID);
8989         if (ret) {
8990                 log_stderr("failure: adding gidmap for userns at level 3");
8991                 goto out;
8992         }
8993
8994         /* id 999 will remain unmapped. */
8995
8996         /* This is uid:1000->2001000:1 in init userns. */
8997         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_UID);
8998         if (ret) {
8999                 log_stderr("failure: adding uidmap for userns at level 3");
9000                 goto out;
9001         }
9002
9003         /* This is gid:1000->2001000:1 in init userns. */
9004         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_GID);
9005         if (ret) {
9006                 log_stderr("failure: adding gidmap for userns at level 3");
9007                 goto out;
9008         }
9009
9010         /* This is uid:1001->3001001:10000 in init userns. */
9011         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_UID);
9012         if (ret) {
9013                 log_stderr("failure: adding uidmap for userns at level 3");
9014                 goto out;
9015         }
9016
9017         /* This is gid:1001->3001001:10000 in init userns. */
9018         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_GID);
9019         if (ret) {
9020                 log_stderr("failure: adding gidmap for userns at level 3");
9021                 goto out;
9022         }
9023
9024         /* Don't write a mapping in the 4th userns. */
9025         list_empty(&hierarchy[4].id_map);
9026
9027         /* Create the actual userns hierarchy. */
9028         ret = create_userns_hierarchy(hierarchy);
9029         if (ret) {
9030                 log_stderr("failure: create userns hierarchy");
9031                 goto out;
9032         }
9033
9034         attr_level1.userns_fd = hierarchy[0].fd_userns;
9035         attr_level2.userns_fd = hierarchy[1].fd_userns;
9036         attr_level3.userns_fd = hierarchy[2].fd_userns;
9037         attr_level4.userns_fd = hierarchy[3].fd_userns;
9038
9039         /*
9040          * Create one directory where we create files for each uid/gid within
9041          * the first userns.
9042          */
9043         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
9044                 log_stderr("failure: mkdirat");
9045                 goto out;
9046         }
9047
9048         fd_dir1 = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
9049         if (fd_dir1 < 0) {
9050                 log_stderr("failure: openat");
9051                 goto out;
9052         }
9053
9054         for (id = 0; id <= id_file_range; id++) {
9055                 char file[256];
9056
9057                 snprintf(file, sizeof(file), DIR1 "/" FILE1 "_%u", id);
9058
9059                 if (mknodat(t_dir1_fd, file, S_IFREG | 0644, 0)) {
9060                         log_stderr("failure: create %s", file);
9061                         goto out;
9062                 }
9063
9064                 if (fchownat(t_dir1_fd, file, id, id, AT_SYMLINK_NOFOLLOW)) {
9065                         log_stderr("failure: fchownat %s", file);
9066                         goto out;
9067                 }
9068
9069                 if (!expected_uid_gid(t_dir1_fd, file, 0, id, id)) {
9070                         log_stderr("failure: check ownership %s", file);
9071                         goto out;
9072                 }
9073         }
9074
9075         /* Create detached mounts for all the user namespaces. */
9076         fd_open_tree_level1 = sys_open_tree(t_dir1_fd, DIR1,
9077                                             AT_NO_AUTOMOUNT |
9078                                             AT_SYMLINK_NOFOLLOW |
9079                                             OPEN_TREE_CLOEXEC |
9080                                             OPEN_TREE_CLONE);
9081         if (fd_open_tree_level1 < 0) {
9082                 log_stderr("failure: sys_open_tree");
9083                 goto out;
9084         }
9085
9086         fd_open_tree_level2 = sys_open_tree(t_dir1_fd, DIR1,
9087                                             AT_NO_AUTOMOUNT |
9088                                             AT_SYMLINK_NOFOLLOW |
9089                                             OPEN_TREE_CLOEXEC |
9090                                             OPEN_TREE_CLONE);
9091         if (fd_open_tree_level2 < 0) {
9092                 log_stderr("failure: sys_open_tree");
9093                 goto out;
9094         }
9095
9096         fd_open_tree_level3 = sys_open_tree(t_dir1_fd, DIR1,
9097                                             AT_NO_AUTOMOUNT |
9098                                             AT_SYMLINK_NOFOLLOW |
9099                                             OPEN_TREE_CLOEXEC |
9100                                             OPEN_TREE_CLONE);
9101         if (fd_open_tree_level3 < 0) {
9102                 log_stderr("failure: sys_open_tree");
9103                 goto out;
9104         }
9105
9106         fd_open_tree_level4 = sys_open_tree(t_dir1_fd, DIR1,
9107                                             AT_NO_AUTOMOUNT |
9108                                             AT_SYMLINK_NOFOLLOW |
9109                                             OPEN_TREE_CLOEXEC |
9110                                             OPEN_TREE_CLONE);
9111         if (fd_open_tree_level4 < 0) {
9112                 log_stderr("failure: sys_open_tree");
9113                 goto out;
9114         }
9115
9116         /* Turn detached mounts into detached idmapped mounts. */
9117         if (sys_mount_setattr(fd_open_tree_level1, "", AT_EMPTY_PATH,
9118                               &attr_level1, sizeof(attr_level1))) {
9119                 log_stderr("failure: sys_mount_setattr");
9120                 goto out;
9121         }
9122
9123         if (sys_mount_setattr(fd_open_tree_level2, "", AT_EMPTY_PATH,
9124                               &attr_level2, sizeof(attr_level2))) {
9125                 log_stderr("failure: sys_mount_setattr");
9126                 goto out;
9127         }
9128
9129         if (sys_mount_setattr(fd_open_tree_level3, "", AT_EMPTY_PATH,
9130                               &attr_level3, sizeof(attr_level3))) {
9131                 log_stderr("failure: sys_mount_setattr");
9132                 goto out;
9133         }
9134
9135         if (sys_mount_setattr(fd_open_tree_level4, "", AT_EMPTY_PATH,
9136                               &attr_level4, sizeof(attr_level4))) {
9137                 log_stderr("failure: sys_mount_setattr");
9138                 goto out;
9139         }
9140
9141         /* Verify that ownership looks correct for callers in the init userns. */
9142         for (id = 0; id <= id_file_range; id++) {
9143                 bool bret;
9144                 unsigned int id_level1, id_level2, id_level3;
9145                 char file[256];
9146
9147                 snprintf(file, sizeof(file), FILE1 "_%u", id);
9148
9149                 id_level1 = id + 1000000;
9150                 if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1)) {
9151                         log_stderr("failure: check ownership %s", file);
9152                         goto out;
9153                 }
9154
9155                 id_level2 = id + 2000000;
9156                 if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2)) {
9157                         log_stderr("failure: check ownership %s", file);
9158                         goto out;
9159                 }
9160
9161                 if (id == 999) {
9162                         /* This id is unmapped. */
9163                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9164                 } else if (id == 1000) {
9165                         id_level3 = id + 2000000; /* We punched a hole in the map at 1000. */
9166                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9167                 } else {
9168                         id_level3 = id + 3000000; /* Rest is business as usual. */
9169                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9170                 }
9171                 if (!bret) {
9172                         log_stderr("failure: check ownership %s", file);
9173                         goto out;
9174                 }
9175
9176                 if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid)) {
9177                         log_stderr("failure: check ownership %s", file);
9178                         goto out;
9179                 }
9180         }
9181
9182         /* Verify that ownership looks correct for callers in the first userns. */
9183         pid = fork();
9184         if (pid < 0) {
9185                 log_stderr("failure: fork");
9186                 goto out;
9187         }
9188         if (pid == 0) {
9189                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9190                         die("failure: switch_userns");
9191
9192                 for (id = 0; id <= id_file_range; id++) {
9193                         bool bret;
9194                         unsigned int id_level1, id_level2, id_level3;
9195                         char file[256];
9196
9197                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9198
9199                         id_level1 = id;
9200                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9201                                 die("failure: check ownership %s", file);
9202
9203                         id_level2 = id + 1000000;
9204                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9205                                 die("failure: check ownership %s", file);
9206
9207                         if (id == 999) {
9208                                 /* This id is unmapped. */
9209                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9210                         } else if (id == 1000) {
9211                                 id_level3 = id + 1000000; /* We punched a hole in the map at 1000. */
9212                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9213                         } else {
9214                                 id_level3 = id + 2000000; /* Rest is business as usual. */
9215                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9216                         }
9217                         if (!bret)
9218                                 die("failure: check ownership %s", file);
9219
9220                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9221                                 die("failure: check ownership %s", file);
9222                 }
9223
9224                 exit(EXIT_SUCCESS);
9225         }
9226         if (wait_for_pid(pid))
9227                 goto out;
9228
9229         /* Verify that ownership looks correct for callers in the second userns. */
9230         pid = fork();
9231         if (pid < 0) {
9232                 log_stderr("failure: fork");
9233                 goto out;
9234         }
9235         if (pid == 0) {
9236                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9237                         die("failure: switch_userns");
9238
9239                 for (id = 0; id <= id_file_range; id++) {
9240                         bool bret;
9241                         unsigned int id_level2, id_level3;
9242                         char file[256];
9243
9244                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9245
9246                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9247                                 die("failure: check ownership %s", file);
9248
9249                         id_level2 = id;
9250                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9251                                 die("failure: check ownership %s", file);
9252
9253                         if (id == 999) {
9254                                 /* This id is unmapped. */
9255                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9256                         } else if (id == 1000) {
9257                                 id_level3 = id; /* We punched a hole in the map at 1000. */
9258                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9259                         } else {
9260                                 id_level3 = id + 1000000; /* Rest is business as usual. */
9261                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9262                         }
9263                         if (!bret)
9264                                 die("failure: check ownership %s", file);
9265
9266                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9267                                 die("failure: check ownership %s", file);
9268                 }
9269
9270                 exit(EXIT_SUCCESS);
9271         }
9272         if (wait_for_pid(pid))
9273                 goto out;
9274
9275         /* Verify that ownership looks correct for callers in the third userns. */
9276         pid = fork();
9277         if (pid < 0) {
9278                 log_stderr("failure: fork");
9279                 goto out;
9280         }
9281         if (pid == 0) {
9282                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9283                         die("failure: switch_userns");
9284
9285                 for (id = 0; id <= id_file_range; id++) {
9286                         bool bret;
9287                         unsigned int id_level2, id_level3;
9288                         char file[256];
9289
9290                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9291
9292                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9293                                 die("failure: check ownership %s", file);
9294
9295                         if (id == 1000) {
9296                                 /*
9297                                  * The idmapping of the third userns has a hole
9298                                  * at uid/gid 1000. That means:
9299                                  * - 1000->userns_0(2000000) // init userns
9300                                  * - 1000->userns_1(2000000) // level 1
9301                                  * - 1000->userns_2(1000000) // level 2
9302                                  * - 1000->userns_3(1000)    // level 3 (because level 3 has a hole)
9303                                  */
9304                                 id_level2 = id;
9305                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2);
9306                         } else {
9307                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid);
9308                         }
9309                         if (!bret)
9310                                 die("failure: check ownership %s", file);
9311
9312
9313                         if (id == 999) {
9314                                 /* This id is unmapped. */
9315                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9316                         } else {
9317                                 id_level3 = id; /* Rest is business as usual. */
9318                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9319                         }
9320                         if (!bret)
9321                                 die("failure: check ownership %s", file);
9322
9323                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9324                                 die("failure: check ownership %s", file);
9325                 }
9326
9327                 exit(EXIT_SUCCESS);
9328         }
9329         if (wait_for_pid(pid))
9330                 goto out;
9331
9332         /* Verify that ownership looks correct for callers in the fourth userns. */
9333         pid = fork();
9334         if (pid < 0) {
9335                 log_stderr("failure: fork");
9336                 goto out;
9337         }
9338         if (pid == 0) {
9339                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9340                         die("failure: switch_userns");
9341
9342                 for (id = 0; id <= id_file_range; id++) {
9343                         char file[256];
9344
9345                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9346
9347                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9348                                 die("failure: check ownership %s", file);
9349
9350                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9351                                 die("failure: check ownership %s", file);
9352
9353                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9354                                 die("failure: check ownership %s", file);
9355
9356                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9357                                 die("failure: check ownership %s", file);
9358                 }
9359
9360                 exit(EXIT_SUCCESS);
9361         }
9362         if (wait_for_pid(pid))
9363                 goto out;
9364
9365         /* Verify that chown works correctly for callers in the first userns. */
9366         pid = fork();
9367         if (pid < 0) {
9368                 log_stderr("failure: fork");
9369                 goto out;
9370         }
9371         if (pid == 0) {
9372                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9373                         die("failure: switch_userns");
9374
9375                 for (id = 0; id <= id_file_range; id++) {
9376                         bool bret;
9377                         unsigned int id_level1, id_level2, id_level3, id_new;
9378                         char file[256];
9379
9380                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9381
9382                         id_new = id + 1;
9383                         if (fchownat(fd_open_tree_level1, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9384                                 die("failure: fchownat %s", file);
9385
9386                         id_level1 = id_new;
9387                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9388                                 die("failure: check ownership %s", file);
9389
9390                         id_level2 = id_new + 1000000;
9391                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9392                                 die("failure: check ownership %s", file);
9393
9394                         if (id_new == 999) {
9395                                 /* This id is unmapped. */
9396                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9397                         } else if (id_new == 1000) {
9398                                 id_level3 = id_new + 1000000; /* We punched a hole in the map at 1000. */
9399                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9400                         } else {
9401                                 id_level3 = id_new + 2000000; /* Rest is business as usual. */
9402                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9403                         }
9404                         if (!bret)
9405                                 die("failure: check ownership %s", file);
9406
9407                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9408                                 die("failure: check ownership %s", file);
9409
9410                         /* Revert ownership. */
9411                         if (fchownat(fd_open_tree_level1, file, id, id, AT_SYMLINK_NOFOLLOW))
9412                                 die("failure: fchownat %s", file);
9413                 }
9414
9415                 exit(EXIT_SUCCESS);
9416         }
9417         if (wait_for_pid(pid))
9418                 goto out;
9419
9420         /* Verify that chown works correctly for callers in the second userns. */
9421         pid = fork();
9422         if (pid < 0) {
9423                 log_stderr("failure: fork");
9424                 goto out;
9425         }
9426         if (pid == 0) {
9427                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9428                         die("failure: switch_userns");
9429
9430                 for (id = 0; id <= id_file_range; id++) {
9431                         bool bret;
9432                         unsigned int id_level2, id_level3, id_new;
9433                         char file[256];
9434
9435                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9436
9437                         id_new = id + 1;
9438                         if (fchownat(fd_open_tree_level2, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9439                                 die("failure: fchownat %s", file);
9440
9441                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9442                                 die("failure: check ownership %s", file);
9443
9444                         id_level2 = id_new;
9445                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9446                                 die("failure: check ownership %s", file);
9447
9448                         if (id_new == 999) {
9449                                 /* This id is unmapped. */
9450                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9451                         } else if (id_new == 1000) {
9452                                 id_level3 = id_new; /* We punched a hole in the map at 1000. */
9453                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9454                         } else {
9455                                 id_level3 = id_new + 1000000; /* Rest is business as usual. */
9456                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9457                         }
9458                         if (!bret)
9459                                 die("failure: check ownership %s", file);
9460
9461                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9462                                 die("failure: check ownership %s", file);
9463
9464                         /* Revert ownership. */
9465                         if (fchownat(fd_open_tree_level2, file, id, id, AT_SYMLINK_NOFOLLOW))
9466                                 die("failure: fchownat %s", file);
9467                 }
9468
9469                 exit(EXIT_SUCCESS);
9470         }
9471         if (wait_for_pid(pid))
9472                 goto out;
9473
9474         /* Verify that chown works correctly for callers in the third userns. */
9475         pid = fork();
9476         if (pid < 0) {
9477                 log_stderr("failure: fork");
9478                 goto out;
9479         }
9480         if (pid == 0) {
9481                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9482                         die("failure: switch_userns");
9483
9484                 for (id = 0; id <= id_file_range; id++) {
9485                         unsigned int id_new;
9486                         char file[256];
9487
9488                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9489
9490                         id_new = id + 1;
9491                         if (id_new == 999 || id_new == 1000) {
9492                                 /*
9493                                  * We can't change ownership as we can't
9494                                  * chown from or to an unmapped id.
9495                                  */
9496                                 if (!fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9497                                         die("failure: fchownat %s", file);
9498                         } else {
9499                                 if (fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9500                                         die("failure: fchownat %s", file);
9501                         }
9502
9503                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9504                                 die("failure: check ownership %s", file);
9505
9506                         /* There's no id 1000 anymore as we changed ownership for id 1000 to 1001 above. */
9507                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9508                                 die("failure: check ownership %s", file);
9509
9510                         if (id_new == 999) {
9511                                 /*
9512                                  * We did not change ownership as we can't
9513                                  * chown to an unmapped id.
9514                                  */
9515                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id, id))
9516                                         die("failure: check ownership %s", file);
9517                         } else if (id_new == 1000) {
9518                                 /*
9519                                  * We did not change ownership as we can't
9520                                  * chown from an unmapped id.
9521                                  */
9522                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9523                                         die("failure: check ownership %s", file);
9524                         } else {
9525                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id_new, id_new))
9526                                         die("failure: check ownership %s", file);
9527                         }
9528
9529                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9530                                 die("failure: check ownership %s", file);
9531
9532                         /* Revert ownership. */
9533                         if (id_new != 999 && id_new != 1000) {
9534                                 if (fchownat(fd_open_tree_level3, file, id, id, AT_SYMLINK_NOFOLLOW))
9535                                         die("failure: fchownat %s", file);
9536                         }
9537                 }
9538
9539                 exit(EXIT_SUCCESS);
9540         }
9541         if (wait_for_pid(pid))
9542                 goto out;
9543
9544         /* Verify that chown works correctly for callers in the fourth userns. */
9545         pid = fork();
9546         if (pid < 0) {
9547                 log_stderr("failure: fork");
9548                 goto out;
9549         }
9550         if (pid == 0) {
9551                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9552                         die("failure: switch_userns");
9553
9554                 for (id = 0; id <= id_file_range; id++) {
9555                         char file[256];
9556                         unsigned long id_new;
9557
9558                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9559
9560                         id_new = id + 1;
9561                         if (!fchownat(fd_open_tree_level4, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9562                                 die("failure: fchownat %s", file);
9563
9564                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9565                                 die("failure: check ownership %s", file);
9566
9567                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9568                                 die("failure: check ownership %s", file);
9569
9570                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9571                                 die("failure: check ownership %s", file);
9572
9573                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9574                                 die("failure: check ownership %s", file);
9575
9576                 }
9577
9578                 exit(EXIT_SUCCESS);
9579         }
9580         if (wait_for_pid(pid))
9581                 goto out;
9582
9583         fret = 0;
9584         log_debug("Ran test");
9585
9586 out:
9587         list_for_each_safe(it, &hierarchy[0].id_map, next) {
9588                 list_del(it);
9589                 free(it->elem);
9590                 free(it);
9591         }
9592
9593         list_for_each_safe(it, &hierarchy[1].id_map, next) {
9594                 list_del(it);
9595                 free(it->elem);
9596                 free(it);
9597         }
9598
9599         list_for_each_safe(it, &hierarchy[2].id_map, next) {
9600                 list_del(it);
9601                 free(it->elem);
9602                 free(it);
9603         }
9604
9605         safe_close(hierarchy[0].fd_userns);
9606         safe_close(hierarchy[1].fd_userns);
9607         safe_close(hierarchy[2].fd_userns);
9608         safe_close(fd_dir1);
9609         safe_close(fd_open_tree_level1);
9610         safe_close(fd_open_tree_level2);
9611         safe_close(fd_open_tree_level3);
9612         safe_close(fd_open_tree_level4);
9613         return fret;
9614 }
9615
9616 #ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS
9617
9618 #ifndef BTRFS_PATH_NAME_MAX
9619 #define BTRFS_PATH_NAME_MAX 4087
9620 #endif
9621
9622 struct btrfs_ioctl_vol_args {
9623         __s64 fd;
9624         char name[BTRFS_PATH_NAME_MAX + 1];
9625 };
9626 #endif
9627
9628 #ifndef HAVE_STRUCT_BTRFS_QGROUP_LIMIT
9629 struct btrfs_qgroup_limit {
9630         __u64 flags;
9631         __u64 max_rfer;
9632         __u64 max_excl;
9633         __u64 rsv_rfer;
9634         __u64 rsv_excl;
9635 };
9636 #endif
9637
9638 #ifndef HAVE_STRUCT_BTRFS_QGROUP_INHERIT
9639 struct btrfs_qgroup_inherit {
9640         __u64 flags;
9641         __u64 num_qgroups;
9642         __u64 num_ref_copies;
9643         __u64 num_excl_copies;
9644         struct btrfs_qgroup_limit lim;
9645         __u64 qgroups[0];
9646 };
9647 #endif
9648
9649 #if !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2) || !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID)
9650
9651 #ifndef BTRFS_SUBVOL_NAME_MAX
9652 #define BTRFS_SUBVOL_NAME_MAX 4039
9653 #endif
9654
9655 struct btrfs_ioctl_vol_args_v2 {
9656         __s64 fd;
9657         __u64 transid;
9658         __u64 flags;
9659         union {
9660                 struct {
9661                         __u64 size;
9662                         struct btrfs_qgroup_inherit *qgroup_inherit;
9663                 };
9664                 __u64 unused[4];
9665         };
9666         union {
9667                 char name[BTRFS_SUBVOL_NAME_MAX + 1];
9668                 __u64 devid;
9669                 __u64 subvolid;
9670         };
9671 };
9672 #endif
9673
9674 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_ARGS
9675
9676 #ifndef BTRFS_INO_LOOKUP_PATH_MAX
9677 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
9678 #endif
9679 struct btrfs_ioctl_ino_lookup_args {
9680         __u64 treeid;
9681         __u64 objectid;
9682         char name[BTRFS_INO_LOOKUP_PATH_MAX];
9683 };
9684 #endif
9685
9686 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_USER_ARGS
9687
9688 #ifndef BTRFS_VOL_NAME_MAX
9689 #define BTRFS_VOL_NAME_MAX 255
9690 #endif
9691
9692 #ifndef BTRFS_INO_LOOKUP_USER_PATH_MAX
9693 #define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
9694 #endif
9695
9696 struct btrfs_ioctl_ino_lookup_user_args {
9697         __u64 dirid;
9698         __u64 treeid;
9699         char name[BTRFS_VOL_NAME_MAX + 1];
9700         char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
9701 };
9702 #endif
9703
9704 #ifndef HAVE_STRUCT_BTRFS_IOCTL_GET_SUBVOL_ROOTREF_ARGS
9705
9706 #ifndef BTRFS_MAX_ROOTREF_BUFFER_NUM
9707 #define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
9708 #endif
9709
9710 struct btrfs_ioctl_get_subvol_rootref_args {
9711         __u64 min_treeid;
9712         struct {
9713                 __u64 treeid;
9714                 __u64 dirid;
9715         } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
9716         __u8 num_items;
9717         __u8 align[7];
9718 };
9719 #endif
9720
9721 #ifndef BTRFS_IOCTL_MAGIC
9722 #define BTRFS_IOCTL_MAGIC 0x94
9723 #endif
9724
9725 #ifndef BTRFS_IOC_SNAP_DESTROY
9726 #define BTRFS_IOC_SNAP_DESTROY \
9727         _IOW(BTRFS_IOCTL_MAGIC, 15, struct btrfs_ioctl_vol_args)
9728 #endif
9729
9730 #ifndef BTRFS_IOC_SNAP_DESTROY_V2
9731 #define BTRFS_IOC_SNAP_DESTROY_V2 \
9732         _IOW(BTRFS_IOCTL_MAGIC, 63, struct btrfs_ioctl_vol_args_v2)
9733 #endif
9734
9735 #ifndef BTRFS_IOC_SNAP_CREATE_V2
9736 #define BTRFS_IOC_SNAP_CREATE_V2 \
9737         _IOW(BTRFS_IOCTL_MAGIC, 23, struct btrfs_ioctl_vol_args_v2)
9738 #endif
9739
9740 #ifndef BTRFS_IOC_SUBVOL_CREATE_V2
9741 #define BTRFS_IOC_SUBVOL_CREATE_V2 \
9742         _IOW(BTRFS_IOCTL_MAGIC, 24, struct btrfs_ioctl_vol_args_v2)
9743 #endif
9744
9745 #ifndef BTRFS_IOC_SUBVOL_GETFLAGS
9746 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
9747 #endif
9748
9749 #ifndef BTRFS_IOC_SUBVOL_SETFLAGS
9750 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
9751 #endif
9752
9753 #ifndef BTRFS_IOC_INO_LOOKUP
9754 #define BTRFS_IOC_INO_LOOKUP \
9755         _IOWR(BTRFS_IOCTL_MAGIC, 18, struct btrfs_ioctl_ino_lookup_args)
9756 #endif
9757
9758 #ifndef BTRFS_IOC_INO_LOOKUP_USER
9759 #define BTRFS_IOC_INO_LOOKUP_USER \
9760         _IOWR(BTRFS_IOCTL_MAGIC, 62, struct btrfs_ioctl_ino_lookup_user_args)
9761 #endif
9762
9763 #ifndef BTRFS_IOC_GET_SUBVOL_ROOTREF
9764 #define BTRFS_IOC_GET_SUBVOL_ROOTREF \
9765         _IOWR(BTRFS_IOCTL_MAGIC, 61, struct btrfs_ioctl_get_subvol_rootref_args)
9766 #endif
9767
9768 #ifndef BTRFS_SUBVOL_RDONLY
9769 #define BTRFS_SUBVOL_RDONLY (1ULL << 1)
9770 #endif
9771
9772 #ifndef BTRFS_SUBVOL_SPEC_BY_ID
9773 #define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
9774 #endif
9775
9776 #ifndef BTRFS_FIRST_FREE_OBJECTID
9777 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
9778 #endif
9779
9780 static int btrfs_delete_subvolume(int parent_fd, const char *name)
9781 {
9782         struct btrfs_ioctl_vol_args args = {};
9783         size_t len;
9784         int ret;
9785
9786         len = strlen(name);
9787         if (len >= sizeof(args.name))
9788                 return -ENAMETOOLONG;
9789
9790         memcpy(args.name, name, len);
9791         args.name[len] = '\0';
9792
9793         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY, &args);
9794         if (ret < 0)
9795                 return -1;
9796
9797         return 0;
9798 }
9799
9800 static int btrfs_delete_subvolume_id(int parent_fd, uint64_t subvolid)
9801 {
9802         struct btrfs_ioctl_vol_args_v2 args = {};
9803         int ret;
9804
9805         args.flags = BTRFS_SUBVOL_SPEC_BY_ID;
9806         args.subvolid = subvolid;
9807
9808         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY_V2, &args);
9809         if (ret < 0)
9810                 return -1;
9811
9812         return 0;
9813 }
9814
9815 static int btrfs_create_subvolume(int parent_fd, const char *name)
9816 {
9817         struct btrfs_ioctl_vol_args_v2 args = {};
9818         size_t len;
9819         int ret;
9820
9821         len = strlen(name);
9822         if (len >= sizeof(args.name))
9823                 return -ENAMETOOLONG;
9824
9825         memcpy(args.name, name, len);
9826         args.name[len] = '\0';
9827
9828         ret = ioctl(parent_fd, BTRFS_IOC_SUBVOL_CREATE_V2, &args);
9829         if (ret < 0)
9830                 return -1;
9831
9832         return 0;
9833 }
9834
9835 static int btrfs_create_snapshot(int fd, int parent_fd, const char *name,
9836                                  int flags)
9837 {
9838         struct btrfs_ioctl_vol_args_v2 args = {
9839                 .fd = fd,
9840         };
9841         size_t len;
9842         int ret;
9843
9844         if (flags & ~BTRFS_SUBVOL_RDONLY)
9845                 return -EINVAL;
9846
9847         len = strlen(name);
9848         if (len >= sizeof(args.name))
9849                 return -ENAMETOOLONG;
9850         memcpy(args.name, name, len);
9851         args.name[len] = '\0';
9852
9853         if (flags & BTRFS_SUBVOL_RDONLY)
9854                 args.flags |= BTRFS_SUBVOL_RDONLY;
9855         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_CREATE_V2, &args);
9856         if (ret < 0)
9857                 return -1;
9858
9859         return 0;
9860 }
9861
9862 static int btrfs_get_subvolume_ro(int fd, bool *read_only_ret)
9863 {
9864         uint64_t flags;
9865         int ret;
9866
9867         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9868         if (ret < 0)
9869                 return -1;
9870
9871         *read_only_ret = flags & BTRFS_SUBVOL_RDONLY;
9872         return 0;
9873 }
9874
9875 static int btrfs_set_subvolume_ro(int fd, bool read_only)
9876 {
9877         uint64_t flags;
9878         int ret;
9879
9880         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9881         if (ret < 0)
9882                 return -1;
9883
9884         if (read_only)
9885                 flags |= BTRFS_SUBVOL_RDONLY;
9886         else
9887                 flags &= ~BTRFS_SUBVOL_RDONLY;
9888
9889         ret = ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
9890         if (ret < 0)
9891                 return -1;
9892
9893         return 0;
9894 }
9895
9896 static int btrfs_get_subvolume_id(int fd, uint64_t *id_ret)
9897 {
9898         struct btrfs_ioctl_ino_lookup_args args = {
9899             .treeid = 0,
9900             .objectid = BTRFS_FIRST_FREE_OBJECTID,
9901         };
9902         int ret;
9903
9904         ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
9905         if (ret < 0)
9906                 return -1;
9907
9908         *id_ret = args.treeid;
9909
9910         return 0;
9911 }
9912
9913 /*
9914  * The following helpers are adapted from the btrfsutils library. We can't use
9915  * the library directly since we need full control over how the subvolume
9916  * iteration happens. We need to be able to check whether unprivileged
9917  * subvolume iteration is possible, i.e. whether BTRFS_IOC_INO_LOOKUP_USER is
9918  * available and also ensure that it is actually used when looking up paths.
9919  */
9920 struct btrfs_stack {
9921         uint64_t tree_id;
9922         struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
9923         size_t items_pos;
9924         size_t path_len;
9925 };
9926
9927 struct btrfs_iter {
9928         int fd;
9929         int cur_fd;
9930
9931         struct btrfs_stack *search_stack;
9932         size_t stack_len;
9933         size_t stack_capacity;
9934
9935         char *cur_path;
9936         size_t cur_path_capacity;
9937 };
9938
9939 static struct btrfs_stack *top_stack_entry(struct btrfs_iter *iter)
9940 {
9941         return &iter->search_stack[iter->stack_len - 1];
9942 }
9943
9944 static int pop_stack(struct btrfs_iter *iter)
9945 {
9946         struct btrfs_stack *top, *parent;
9947         int fd, parent_fd;
9948         size_t i;
9949
9950         if (iter->stack_len == 1) {
9951                 iter->stack_len--;
9952                 return 0;
9953         }
9954
9955         top = top_stack_entry(iter);
9956         iter->stack_len--;
9957         parent = top_stack_entry(iter);
9958
9959         fd = iter->cur_fd;
9960         for (i = parent->path_len; i < top->path_len; i++) {
9961                 if (i == 0 || iter->cur_path[i] == '/') {
9962                         parent_fd = openat(fd, "..", O_RDONLY);
9963                         if (fd != iter->cur_fd)
9964                                 close(fd);
9965                         if (parent_fd == -1)
9966                                 return -1;
9967                         fd = parent_fd;
9968                 }
9969         }
9970         if (iter->cur_fd != iter->fd)
9971                 close(iter->cur_fd);
9972         iter->cur_fd = fd;
9973
9974         return 0;
9975 }
9976
9977 static int append_stack(struct btrfs_iter *iter, uint64_t tree_id, size_t path_len)
9978 {
9979         struct btrfs_stack *entry;
9980
9981         if (iter->stack_len >= iter->stack_capacity) {
9982                 size_t new_capacity = iter->stack_capacity * 2;
9983                 struct btrfs_stack *new_search_stack;
9984 #ifdef HAVE_REALLOCARRAY
9985                 new_search_stack = reallocarray(iter->search_stack, new_capacity,
9986                                                 sizeof(*iter->search_stack));
9987 #else
9988                 new_search_stack = realloc(iter->search_stack, new_capacity * sizeof(*iter->search_stack));
9989 #endif
9990                 if (!new_search_stack)
9991                         return -ENOMEM;
9992
9993                 iter->stack_capacity = new_capacity;
9994                 iter->search_stack = new_search_stack;
9995         }
9996
9997         entry = &iter->search_stack[iter->stack_len];
9998
9999         memset(entry, 0, sizeof(*entry));
10000         entry->path_len = path_len;
10001         entry->tree_id = tree_id;
10002
10003         if (iter->stack_len) {
10004                 struct btrfs_stack *top;
10005                 char *path;
10006                 int fd;
10007
10008                 top = top_stack_entry(iter);
10009                 path = &iter->cur_path[top->path_len];
10010                 if (*path == '/')
10011                         path++;
10012                 fd = openat(iter->cur_fd, path, O_RDONLY);
10013                 if (fd == -1)
10014                         return -errno;
10015
10016                 close(iter->cur_fd);
10017                 iter->cur_fd = fd;
10018         }
10019
10020         iter->stack_len++;
10021
10022         return 0;
10023 }
10024
10025 static int btrfs_iterator_start(int fd, uint64_t top, struct btrfs_iter **ret)
10026 {
10027         struct btrfs_iter *iter;
10028         int err;
10029
10030         iter = malloc(sizeof(*iter));
10031         if (!iter)
10032                 return -ENOMEM;
10033
10034         iter->fd = fd;
10035         iter->cur_fd = fd;
10036
10037         iter->stack_len = 0;
10038         iter->stack_capacity = 4;
10039         iter->search_stack = malloc(sizeof(*iter->search_stack) *
10040                                     iter->stack_capacity);
10041         if (!iter->search_stack) {
10042                 err = -ENOMEM;
10043                 goto out_iter;
10044         }
10045
10046         iter->cur_path_capacity = 256;
10047         iter->cur_path = malloc(iter->cur_path_capacity);
10048         if (!iter->cur_path) {
10049                 err = -ENOMEM;
10050                 goto out_search_stack;
10051         }
10052
10053         err = append_stack(iter, top, 0);
10054         if (err)
10055                 goto out_cur_path;
10056
10057         *ret = iter;
10058
10059         return 0;
10060
10061 out_cur_path:
10062         free(iter->cur_path);
10063 out_search_stack:
10064         free(iter->search_stack);
10065 out_iter:
10066         free(iter);
10067         return err;
10068 }
10069
10070 static void btrfs_iterator_end(struct btrfs_iter *iter)
10071 {
10072         if (iter) {
10073                 free(iter->cur_path);
10074                 free(iter->search_stack);
10075                 if (iter->cur_fd != iter->fd)
10076                         close(iter->cur_fd);
10077                 close(iter->fd);
10078                 free(iter);
10079         }
10080 }
10081
10082 static int __append_path(struct btrfs_iter *iter, const char *name,
10083                          size_t name_len, const char *dir, size_t dir_len,
10084                          size_t *path_len_ret)
10085 {
10086         struct btrfs_stack *top = top_stack_entry(iter);
10087         size_t path_len;
10088         char *p;
10089
10090         path_len = top->path_len;
10091         /*
10092          * We need a joining slash if we have a current path and a subdirectory.
10093          */
10094         if (top->path_len && dir_len)
10095                 path_len++;
10096         path_len += dir_len;
10097         /*
10098          * We need another joining slash if we have a current path and a name,
10099          * but not if we have a subdirectory, because the lookup ioctl includes
10100          * a trailing slash.
10101          */
10102         if (top->path_len && !dir_len && name_len)
10103                 path_len++;
10104         path_len += name_len;
10105
10106         /* We need one extra character for the NUL terminator. */
10107         if (path_len + 1 > iter->cur_path_capacity) {
10108                 char *tmp = realloc(iter->cur_path, path_len + 1);
10109
10110                 if (!tmp)
10111                         return -ENOMEM;
10112                 iter->cur_path = tmp;
10113                 iter->cur_path_capacity = path_len + 1;
10114         }
10115
10116         p = iter->cur_path + top->path_len;
10117         if (top->path_len && dir_len)
10118                 *p++ = '/';
10119         memcpy(p, dir, dir_len);
10120         p += dir_len;
10121         if (top->path_len && !dir_len && name_len)
10122                 *p++ = '/';
10123         memcpy(p, name, name_len);
10124         p += name_len;
10125         *p = '\0';
10126
10127         *path_len_ret = path_len;
10128
10129         return 0;
10130 }
10131
10132 static int get_subvolume_path(struct btrfs_iter *iter, uint64_t treeid,
10133                               uint64_t dirid, size_t *path_len_ret)
10134 {
10135         struct btrfs_ioctl_ino_lookup_user_args args = {
10136                 .treeid = treeid,
10137                 .dirid = dirid,
10138         };
10139         int ret;
10140
10141         ret = ioctl(iter->cur_fd, BTRFS_IOC_INO_LOOKUP_USER, &args);
10142         if (ret == -1)
10143                 return -1;
10144
10145         return __append_path(iter, args.name, strlen(args.name), args.path,
10146                              strlen(args.path), path_len_ret);
10147 }
10148
10149 static int btrfs_iterator_next(struct btrfs_iter *iter, char **path_ret,
10150                                uint64_t *id_ret)
10151 {
10152         struct btrfs_stack *top;
10153         uint64_t treeid, dirid;
10154         size_t path_len;
10155         int ret, err;
10156
10157         for (;;) {
10158                 for (;;) {
10159                         if (iter->stack_len == 0)
10160                                 return 1;
10161
10162                         top = top_stack_entry(iter);
10163                         if (top->items_pos < top->rootref_args.num_items) {
10164                                 break;
10165                         } else {
10166                                 ret = ioctl(iter->cur_fd,
10167                                             BTRFS_IOC_GET_SUBVOL_ROOTREF,
10168                                             &top->rootref_args);
10169                                 if (ret == -1 && errno != EOVERFLOW)
10170                                         return -1;
10171                                 top->items_pos = 0;
10172
10173                                 if (top->rootref_args.num_items == 0) {
10174                                         err = pop_stack(iter);
10175                                         if (err)
10176                                                 return err;
10177                                 }
10178                         }
10179                 }
10180
10181                 treeid = top->rootref_args.rootref[top->items_pos].treeid;
10182                 dirid = top->rootref_args.rootref[top->items_pos].dirid;
10183                 top->items_pos++;
10184                 err = get_subvolume_path(iter, treeid, dirid, &path_len);
10185                 if (err) {
10186                         /* Skip the subvolume if we can't access it. */
10187                         if (errno == EACCES)
10188                                 continue;
10189                         return err;
10190                 }
10191
10192                 err = append_stack(iter, treeid, path_len);
10193                 if (err) {
10194                         /*
10195                          * Skip the subvolume if it does not exist (which can
10196                          * happen if there is another filesystem mounted over a
10197                          * parent directory) or we don't have permission to
10198                          * access it.
10199                          */
10200                         if (errno == ENOENT || errno == EACCES)
10201                                 continue;
10202                         return err;
10203                 }
10204
10205                 top = top_stack_entry(iter);
10206                 goto out;
10207         }
10208
10209 out:
10210         if (path_ret) {
10211                 *path_ret = malloc(top->path_len + 1);
10212                 if (!*path_ret)
10213                         return -ENOMEM;
10214                 memcpy(*path_ret, iter->cur_path, top->path_len);
10215                 (*path_ret)[top->path_len] = '\0';
10216         }
10217         if (id_ret)
10218                 *id_ret = top->tree_id;
10219         return 0;
10220 }
10221
10222 #define BTRFS_SUBVOLUME1 "subvol1"
10223 #define BTRFS_SUBVOLUME1_SNAPSHOT1 "subvol1_snapshot1"
10224 #define BTRFS_SUBVOLUME1_SNAPSHOT1_RO "subvol1_snapshot1_ro"
10225 #define BTRFS_SUBVOLUME1_RENAME "subvol1_rename"
10226 #define BTRFS_SUBVOLUME2 "subvol2"
10227
10228 static int btrfs_subvolumes_fsids_mapped(void)
10229 {
10230         int fret = -1;
10231         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10232         struct mount_attr attr = {
10233                 .attr_set = MOUNT_ATTR_IDMAP,
10234         };
10235         pid_t pid;
10236
10237         if (!caps_supported())
10238                 return 0;
10239
10240         /* Changing mount properties on a detached mount. */
10241         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10242         if (attr.userns_fd < 0) {
10243                 log_stderr("failure: get_userns_fd");
10244                 goto out;
10245         }
10246
10247         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10248                                      AT_EMPTY_PATH |
10249                                      AT_NO_AUTOMOUNT |
10250                                      AT_SYMLINK_NOFOLLOW |
10251                                      OPEN_TREE_CLOEXEC |
10252                                      OPEN_TREE_CLONE);
10253         if (open_tree_fd < 0) {
10254                 log_stderr("failure: sys_open_tree");
10255                 goto out;
10256         }
10257
10258         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10259                 log_stderr("failure: sys_mount_setattr");
10260                 goto out;
10261         }
10262
10263         /*
10264          * The open_tree() syscall returns an O_PATH file descriptor which we
10265          * can't use with ioctl(). So let's reopen it as a proper file
10266          * descriptor.
10267          */
10268         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10269         if (tree_fd < 0) {
10270                 log_stderr("failure: openat");
10271                 goto out;
10272         }
10273
10274         pid = fork();
10275         if (pid < 0) {
10276                 log_stderr("failure: fork");
10277                 goto out;
10278         }
10279         if (pid == 0) {
10280                 if (!switch_fsids(10000, 10000))
10281                         die("failure: switch fsids");
10282
10283                 if (!caps_up())
10284                         die("failure: raise caps");
10285
10286                 /*
10287                  * The caller's fsids now have mappings in the idmapped mount so
10288                  * any file creation must succeed.
10289                  */
10290
10291                 /* create subvolume */
10292                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10293                         die("failure: btrfs_create_subvolume");
10294
10295                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10296                         die("failure: check ownership");
10297
10298                 /* remove subvolume */
10299                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10300                         die("failure: btrfs_delete_subvolume");
10301
10302                 /* create subvolume */
10303                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10304                         die("failure: btrfs_create_subvolume");
10305
10306                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10307                         die("failure: check ownership");
10308
10309                 if (!caps_down())
10310                         die("failure: lower caps");
10311
10312                 /*
10313                  * The filesystem is not mounted with user_subvol_rm_allowed so
10314                  * subvolume deletion must fail.
10315                  */
10316                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10317                         die("failure: btrfs_delete_subvolume");
10318                 if (errno != EPERM)
10319                         die("failure: errno");
10320
10321                 exit(EXIT_SUCCESS);
10322         }
10323         if (wait_for_pid(pid))
10324                 goto out;
10325
10326         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10327                 die("failure: check ownership");
10328
10329         /* remove subvolume */
10330         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10331                 log_stderr("failure: btrfs_delete_subvolume");
10332                 goto out;
10333         }
10334
10335         fret = 0;
10336         log_debug("Ran test");
10337 out:
10338         safe_close(attr.userns_fd);
10339         safe_close(open_tree_fd);
10340         safe_close(tree_fd);
10341
10342         return fret;
10343 }
10344
10345 static int btrfs_subvolumes_fsids_mapped_userns(void)
10346 {
10347         int fret = -1;
10348         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10349         struct mount_attr attr = {
10350                 .attr_set = MOUNT_ATTR_IDMAP,
10351         };
10352         pid_t pid;
10353
10354         if (!caps_supported())
10355                 return 0;
10356
10357         /* Changing mount properties on a detached mount. */
10358         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10359         if (attr.userns_fd < 0) {
10360                 log_stderr("failure: get_userns_fd");
10361                 goto out;
10362         }
10363
10364         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10365                                      AT_EMPTY_PATH |
10366                                      AT_NO_AUTOMOUNT |
10367                                      AT_SYMLINK_NOFOLLOW |
10368                                      OPEN_TREE_CLOEXEC |
10369                                      OPEN_TREE_CLONE);
10370         if (open_tree_fd < 0) {
10371                 log_stderr("failure: sys_open_tree");
10372                 goto out;
10373         }
10374
10375         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10376                 log_stderr("failure: sys_mount_setattr");
10377                 goto out;
10378         }
10379
10380         /*
10381          * The open_tree() syscall returns an O_PATH file descriptor which we
10382          * can't use with ioctl(). So let's reopen it as a proper file
10383          * descriptor.
10384          */
10385         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10386         if (tree_fd < 0) {
10387                 log_stderr("failure: openat");
10388                 goto out;
10389         }
10390
10391         pid = fork();
10392         if (pid < 0) {
10393                 log_stderr("failure: fork");
10394                 goto out;
10395         }
10396         if (pid == 0) {
10397                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10398                         die("failure: switch_userns");
10399
10400                 /* The caller's fsids now have mappings in the idmapped mount so
10401                  * any file creation must fail.
10402                  */
10403
10404                 /* create subvolume */
10405                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10406                         die("failure: btrfs_create_subvolume");
10407
10408                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10409                         die("failure: check ownership");
10410
10411                 /* remove subvolume */
10412                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10413                         die("failure: btrfs_delete_subvolume");
10414
10415                 exit(EXIT_SUCCESS);
10416         }
10417         if (wait_for_pid(pid))
10418                 goto out;
10419
10420         /* remove subvolume */
10421         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10422                 log_stderr("failure: btrfs_delete_subvolume");
10423                 goto out;
10424         }
10425
10426         fret = 0;
10427         log_debug("Ran test");
10428 out:
10429         safe_close(attr.userns_fd);
10430         safe_close(open_tree_fd);
10431         safe_close(tree_fd);
10432
10433         return fret;
10434 }
10435
10436 static int btrfs_subvolumes_fsids_unmapped(void)
10437 {
10438         int fret = -1;
10439         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10440         struct mount_attr attr = {
10441                 .attr_set = MOUNT_ATTR_IDMAP,
10442         };
10443
10444         /* create directory for rename test */
10445         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10446                 log_stderr("failure: btrfs_create_subvolume");
10447                 goto out;
10448         }
10449
10450         /* change ownership of all files to uid 0 */
10451         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10452                 log_stderr("failure: fchownat");
10453                 goto out;
10454         }
10455
10456         /* Changing mount properties on a detached mount. */
10457         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10458         if (attr.userns_fd < 0) {
10459                 log_stderr("failure: get_userns_fd");
10460                 goto out;
10461         }
10462
10463         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10464                                      AT_EMPTY_PATH |
10465                                      AT_NO_AUTOMOUNT |
10466                                      AT_SYMLINK_NOFOLLOW |
10467                                      OPEN_TREE_CLOEXEC |
10468                                      OPEN_TREE_CLONE);
10469         if (open_tree_fd < 0) {
10470                 log_stderr("failure: sys_open_tree");
10471                 goto out;
10472         }
10473
10474         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10475                 log_stderr("failure: sys_mount_setattr");
10476                 goto out;
10477         }
10478
10479         if (!switch_fsids(0, 0)) {
10480                 log_stderr("failure: switch_fsids");
10481                 goto out;
10482         }
10483
10484         /*
10485          * The caller's fsids don't have a mappings in the idmapped mount so
10486          * any file creation must fail.
10487          */
10488
10489         /*
10490          * The open_tree() syscall returns an O_PATH file descriptor which we
10491          * can't use with ioctl(). So let's reopen it as a proper file
10492          * descriptor.
10493          */
10494         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10495         if (tree_fd < 0) {
10496                 log_stderr("failure: openat");
10497                 goto out;
10498         }
10499
10500         /* create subvolume */
10501         if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2)) {
10502                 log_stderr("failure: btrfs_create_subvolume");
10503                 goto out;
10504         }
10505         if (errno != EOVERFLOW) {
10506                 log_stderr("failure: errno");
10507                 goto out;
10508         }
10509
10510         /* try to rename a subvolume */
10511         if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10512                        BTRFS_SUBVOLUME1_RENAME)) {
10513                 log_stderr("failure: renameat");
10514                 goto out;
10515         }
10516         if (errno != EOVERFLOW) {
10517                 log_stderr("failure: errno");
10518                 goto out;
10519         }
10520
10521         /* The caller is privileged over the inode so file deletion must work. */
10522
10523         /* remove subvolume */
10524         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10525                 log_stderr("failure: btrfs_delete_subvolume");
10526                 goto out;
10527         }
10528
10529         fret = 0;
10530         log_debug("Ran test");
10531 out:
10532         safe_close(attr.userns_fd);
10533         safe_close(open_tree_fd);
10534         safe_close(tree_fd);
10535
10536         return fret;
10537 }
10538
10539 static int btrfs_subvolumes_fsids_unmapped_userns(void)
10540 {
10541         int fret = -1;
10542         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
10543         struct mount_attr attr = {
10544                 .attr_set = MOUNT_ATTR_IDMAP,
10545         };
10546         pid_t pid;
10547
10548         /* create directory for rename test */
10549         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10550                 log_stderr("failure: btrfs_create_subvolume");
10551                 goto out;
10552         }
10553
10554         /* change ownership of all files to uid 0 */
10555         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10556                 log_stderr("failure: fchownat");
10557                 goto out;
10558         }
10559
10560         /* Changing mount properties on a detached mount. */
10561         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10562         if (attr.userns_fd < 0) {
10563                 log_stderr("failure: get_userns_fd");
10564                 goto out;
10565         }
10566
10567         /* Changing mount properties on a detached mount. */
10568         userns_fd = get_userns_fd(0, 30000, 10000);
10569         if (userns_fd < 0) {
10570                 log_stderr("failure: get_userns_fd");
10571                 goto out;
10572         }
10573
10574         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10575                                      AT_EMPTY_PATH |
10576                                      AT_NO_AUTOMOUNT |
10577                                      AT_SYMLINK_NOFOLLOW |
10578                                      OPEN_TREE_CLOEXEC |
10579                                      OPEN_TREE_CLONE);
10580         if (open_tree_fd < 0) {
10581                 log_stderr("failure: sys_open_tree");
10582                 goto out;
10583         }
10584
10585         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10586                 log_stderr("failure: sys_mount_setattr");
10587                 goto out;
10588         }
10589
10590         /*
10591          * The open_tree() syscall returns an O_PATH file descriptor which we
10592          * can't use with ioctl(). So let's reopen it as a proper file
10593          * descriptor.
10594          */
10595         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10596         if (tree_fd < 0) {
10597                 log_stderr("failure: openat");
10598                 goto out;
10599         }
10600
10601         pid = fork();
10602         if (pid < 0) {
10603                 log_stderr("failure: fork");
10604                 goto out;
10605         }
10606         if (pid == 0) {
10607                 if (!switch_userns(userns_fd, 0, 0, false))
10608                         die("failure: switch_userns");
10609
10610                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
10611                                       t_overflowuid, t_overflowgid))
10612                         die("failure: expected_uid_gid");
10613
10614                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
10615                                       t_overflowuid, t_overflowgid))
10616                         die("failure: expected_uid_gid");
10617
10618                 /*
10619                  * The caller's fsids don't have a mappings in the idmapped mount so
10620                  * any file creation must fail.
10621                  */
10622
10623                 /* create subvolume */
10624                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
10625                         die("failure: btrfs_create_subvolume");
10626                 if (errno != EOVERFLOW)
10627                         die("failure: errno");
10628
10629                 /* try to rename a subvolume */
10630                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10631                                         BTRFS_SUBVOLUME1_RENAME))
10632                         die("failure: renameat");
10633                 if (errno != EOVERFLOW)
10634                         die("failure: errno");
10635
10636                 /*
10637                  * The caller is not privileged over the inode so subvolume
10638                  * deletion must fail.
10639                  */
10640
10641                 /* remove subvolume */
10642                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10643                         die("failure: btrfs_delete_subvolume");
10644
10645                 exit(EXIT_SUCCESS);
10646         }
10647         if (wait_for_pid(pid))
10648                 goto out;
10649
10650         /* remove subvolume */
10651         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10652                 log_stderr("failure: btrfs_delete_subvolume");
10653                 goto out;
10654         }
10655
10656         fret = 0;
10657         log_debug("Ran test");
10658 out:
10659         safe_close(attr.userns_fd);
10660         safe_close(open_tree_fd);
10661         safe_close(tree_fd);
10662         safe_close(userns_fd);
10663
10664         return fret;
10665 }
10666
10667 static int btrfs_snapshots_fsids_mapped(void)
10668 {
10669         int fret = -1;
10670         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10671         struct mount_attr attr = {
10672                 .attr_set = MOUNT_ATTR_IDMAP,
10673         };
10674         pid_t pid;
10675
10676         if (!caps_supported())
10677                 return 0;
10678
10679         /* Changing mount properties on a detached mount. */
10680         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10681         if (attr.userns_fd < 0) {
10682                 log_stderr("failure: get_userns_fd");
10683                 goto out;
10684         }
10685
10686         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10687                                      AT_EMPTY_PATH |
10688                                      AT_NO_AUTOMOUNT |
10689                                      AT_SYMLINK_NOFOLLOW |
10690                                      OPEN_TREE_CLOEXEC |
10691                                      OPEN_TREE_CLONE);
10692         if (open_tree_fd < 0) {
10693                 log_stderr("failure: sys_open_tree");
10694                 goto out;
10695         }
10696
10697         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10698                 log_stderr("failure: sys_mount_setattr");
10699                 goto out;
10700         }
10701
10702         /*
10703          * The open_tree() syscall returns an O_PATH file descriptor which we
10704          * can't use with ioctl(). So let's reopen it as a proper file
10705          * descriptor.
10706          */
10707         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10708         if (tree_fd < 0) {
10709                 log_stderr("failure: openat");
10710                 goto out;
10711         }
10712
10713         pid = fork();
10714         if (pid < 0) {
10715                 log_stderr("failure: fork");
10716                 goto out;
10717         }
10718         if (pid == 0) {
10719                 int subvolume_fd = -EBADF;
10720
10721                 if (!switch_fsids(10000, 10000))
10722                         die("failure: switch fsids");
10723
10724                 if (!caps_up())
10725                         die("failure: raise caps");
10726
10727                 /* The caller's fsids now have mappings in the idmapped mount so
10728                  * any file creation must fail.
10729                  */
10730
10731                 /* create subvolume */
10732                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10733                         die("failure: btrfs_create_subvolume");
10734
10735                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10736                         die("failure: expected_uid_gid");
10737
10738                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10739                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10740                 if (subvolume_fd < 0)
10741                         die("failure: openat");
10742
10743                 /* create read-write snapshot */
10744                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10745                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10746                         die("failure: btrfs_create_snapshot");
10747
10748                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10749                         die("failure: expected_uid_gid");
10750
10751                 /* create read-only snapshot */
10752                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10753                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10754                                           BTRFS_SUBVOL_RDONLY))
10755                         die("failure: btrfs_create_snapshot");
10756
10757                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10758                         die("failure: expected_uid_gid");
10759
10760                 safe_close(subvolume_fd);
10761
10762                 /* remove subvolume */
10763                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10764                         die("failure: btrfs_delete_subvolume");
10765
10766                 /* remove read-write snapshot */
10767                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
10768                         die("failure: btrfs_delete_subvolume");
10769
10770                 /* remove read-only snapshot */
10771                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
10772                         die("failure: btrfs_delete_subvolume");
10773
10774                 /* create directory */
10775                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10776                         die("failure: btrfs_create_subvolume");
10777
10778                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10779                         die("failure: expected_uid_gid");
10780
10781                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10782                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10783                 if (subvolume_fd < 0)
10784                         die("failure: openat");
10785
10786                 /* create read-write snapshot */
10787                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10788                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10789                         die("failure: btrfs_create_snapshot");
10790
10791                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10792                         die("failure: expected_uid_gid");
10793
10794                 /* create read-only snapshot */
10795                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10796                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10797                                           BTRFS_SUBVOL_RDONLY))
10798                         die("failure: btrfs_create_snapshot");
10799
10800                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10801                         die("failure: expected_uid_gid");
10802
10803                 safe_close(subvolume_fd);
10804
10805                 exit(EXIT_SUCCESS);
10806         }
10807         if (wait_for_pid(pid))
10808                 goto out;
10809
10810         /* remove directory */
10811         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10812                 log_stderr("failure: btrfs_delete_subvolume");
10813                 goto out;
10814         }
10815
10816         /* remove read-write snapshot */
10817         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10818                 log_stderr("failure: btrfs_delete_subvolume");
10819                 goto out;
10820         }
10821
10822         /* remove read-only snapshot */
10823         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10824                 log_stderr("failure: btrfs_delete_subvolume");
10825                 goto out;
10826         }
10827
10828         fret = 0;
10829         log_debug("Ran test");
10830 out:
10831         safe_close(attr.userns_fd);
10832         safe_close(open_tree_fd);
10833         safe_close(tree_fd);
10834
10835         return fret;
10836 }
10837
10838 static int btrfs_snapshots_fsids_mapped_userns(void)
10839 {
10840         int fret = -1;
10841         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10842         struct mount_attr attr = {
10843                 .attr_set = MOUNT_ATTR_IDMAP,
10844         };
10845         pid_t pid;
10846
10847         if (!caps_supported())
10848                 return 0;
10849
10850         /* Changing mount properties on a detached mount. */
10851         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10852         if (attr.userns_fd < 0) {
10853                 log_stderr("failure: get_userns_fd");
10854                 goto out;
10855         }
10856
10857         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10858                                      AT_EMPTY_PATH |
10859                                      AT_NO_AUTOMOUNT |
10860                                      AT_SYMLINK_NOFOLLOW |
10861                                      OPEN_TREE_CLOEXEC |
10862                                      OPEN_TREE_CLONE);
10863         if (open_tree_fd < 0) {
10864                 log_stderr("failure: sys_open_tree");
10865                 goto out;
10866         }
10867
10868         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10869                 log_stderr("failure: sys_mount_setattr");
10870                 goto out;
10871         }
10872
10873         /*
10874          * The open_tree() syscall returns an O_PATH file descriptor which we
10875          * can't use with ioctl(). So let's reopen it as a proper file
10876          * descriptor.
10877          */
10878         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10879         if (tree_fd < 0) {
10880                 log_stderr("failure: openat");
10881                 goto out;
10882         }
10883
10884         pid = fork();
10885         if (pid < 0) {
10886                 log_stderr("failure: fork");
10887                 goto out;
10888         }
10889         if (pid == 0) {
10890                 int subvolume_fd = -EBADF;
10891
10892                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10893                         die("failure: switch_userns");
10894
10895                 /* create subvolume */
10896                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10897                         die("failure: btrfs_create_subvolume");
10898
10899                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10900                         die("failure: expected_uid_gid");
10901
10902                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10903                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10904                 if (subvolume_fd < 0)
10905                         die("failure: openat");
10906
10907                 /* create read-write snapshot */
10908                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10909                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10910                         die("failure: btrfs_create_snapshot");
10911
10912                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
10913                         die("failure: expected_uid_gid");
10914
10915                 /* create read-only snapshot */
10916                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10917                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10918                                           BTRFS_SUBVOL_RDONLY))
10919                         die("failure: btrfs_create_snapshot");
10920
10921                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
10922                         die("failure: expected_uid_gid");
10923
10924                 safe_close(subvolume_fd);
10925
10926                 exit(EXIT_SUCCESS);
10927         }
10928         if (wait_for_pid(pid))
10929                 goto out;
10930
10931         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10932                 die("failure: expected_uid_gid");
10933
10934         /* remove directory */
10935         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10936                 log_stderr("failure: btrfs_delete_subvolume");
10937                 goto out;
10938         }
10939
10940         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10941                 die("failure: expected_uid_gid");
10942
10943         /* remove read-write snapshot */
10944         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10945                 log_stderr("failure: btrfs_delete_subvolume");
10946                 goto out;
10947         }
10948
10949         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10950                 die("failure: expected_uid_gid");
10951
10952         /* remove read-only snapshot */
10953         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10954                 log_stderr("failure: btrfs_delete_subvolume");
10955                 goto out;
10956         }
10957
10958         fret = 0;
10959         log_debug("Ran test");
10960 out:
10961         safe_close(attr.userns_fd);
10962         safe_close(open_tree_fd);
10963         safe_close(tree_fd);
10964
10965         return fret;
10966 }
10967
10968 static int btrfs_snapshots_fsids_unmapped(void)
10969 {
10970         int fret = -1;
10971         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10972         struct mount_attr attr = {
10973                 .attr_set = MOUNT_ATTR_IDMAP,
10974         };
10975         pid_t pid;
10976
10977         if (!caps_supported())
10978                 return 0;
10979
10980         /* create directory for rename test */
10981         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10982                 log_stderr("failure: btrfs_create_subvolume");
10983                 goto out;
10984         }
10985
10986         /* change ownership of all files to uid 0 */
10987         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10988                 log_stderr("failure: fchownat");
10989                 goto out;
10990         }
10991
10992         /* Changing mount properties on a detached mount. */
10993         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10994         if (attr.userns_fd < 0) {
10995                 log_stderr("failure: get_userns_fd");
10996                 goto out;
10997         }
10998
10999         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11000                                      AT_EMPTY_PATH |
11001                                      AT_NO_AUTOMOUNT |
11002                                      AT_SYMLINK_NOFOLLOW |
11003                                      OPEN_TREE_CLOEXEC |
11004                                      OPEN_TREE_CLONE);
11005         if (open_tree_fd < 0) {
11006                 log_stderr("failure: sys_open_tree");
11007                 goto out;
11008         }
11009
11010         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
11011                               sizeof(attr))) {
11012                 log_stderr("failure: sys_mount_setattr");
11013                 goto out;
11014         }
11015
11016         pid = fork();
11017         if (pid < 0) {
11018                 log_stderr("failure: fork");
11019                 goto out;
11020         }
11021         if (pid == 0) {
11022                 int subvolume_fd = -EBADF;
11023
11024                 if (!switch_fsids(0, 0)) {
11025                         log_stderr("failure: switch_fsids");
11026                         goto out;
11027                 }
11028
11029                 /*
11030                  * The caller's fsids don't have a mappings in the idmapped
11031                  * mount so any file creation must fail.
11032                  */
11033
11034                 /*
11035                  * The open_tree() syscall returns an O_PATH file descriptor
11036                  * which we can't use with ioctl(). So let's reopen it as a
11037                  * proper file descriptor.
11038                  */
11039                 tree_fd = openat(open_tree_fd, ".",
11040                                  O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11041                 if (tree_fd < 0)
11042                         die("failure: openat");
11043
11044                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11045                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11046                 if (subvolume_fd < 0)
11047                         die("failure: openat");
11048
11049                 /* create directory */
11050                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
11051                         die("failure: btrfs_create_subvolume");
11052                 if (errno != EOVERFLOW)
11053                         die("failure: errno");
11054
11055                 /* create read-write snapshot */
11056                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11057                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11058                         die("failure: btrfs_create_snapshot");
11059                 if (errno != EOVERFLOW)
11060                         die("failure: errno");
11061
11062                 /* create read-only snapshot */
11063                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11064                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11065                                            BTRFS_SUBVOL_RDONLY))
11066                         die("failure: btrfs_create_snapshot");
11067                 if (errno != EOVERFLOW)
11068                         die("failure: errno");
11069
11070                 /* try to rename a directory */
11071                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11072                                BTRFS_SUBVOLUME1_RENAME))
11073                         die("failure: renameat");
11074                 if (errno != EOVERFLOW)
11075                         die("failure: errno");
11076
11077                 if (!caps_down())
11078                         die("failure: caps_down");
11079
11080                 /* create read-write snapshot */
11081                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11082                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11083                         die("failure: btrfs_create_snapshot");
11084                 if (errno != EPERM)
11085                         die("failure: errno");
11086
11087                 /* create read-only snapshot */
11088                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11089                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11090                                            BTRFS_SUBVOL_RDONLY))
11091                         die("failure: btrfs_create_snapshot");
11092                 if (errno != EPERM)
11093                         die("failure: errno");
11094
11095                 /*
11096                  * The caller is not privileged over the inode so subvolume
11097                  * deletion must fail.
11098                  */
11099
11100                 /* remove directory */
11101                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11102                         die("failure: btrfs_delete_subvolume");
11103                 if (errno != EPERM)
11104                         die("failure: errno");
11105
11106                 if (!caps_up())
11107                         die("failure: caps_down");
11108
11109                 /*
11110                  * The caller is privileged over the inode so subvolume
11111                  * deletion must work.
11112                  */
11113
11114                 /* remove directory */
11115                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11116                         die("failure: btrfs_delete_subvolume");
11117
11118                 exit(EXIT_SUCCESS);
11119         }
11120         if (wait_for_pid(pid))
11121                 goto out;
11122
11123         fret = 0;
11124         log_debug("Ran test");
11125 out:
11126         safe_close(attr.userns_fd);
11127         safe_close(open_tree_fd);
11128         safe_close(tree_fd);
11129
11130         return fret;
11131 }
11132
11133 static int btrfs_snapshots_fsids_unmapped_userns(void)
11134 {
11135         int fret = -1;
11136         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
11137             userns_fd = -EBADF;
11138         struct mount_attr attr = {
11139                 .attr_set = MOUNT_ATTR_IDMAP,
11140         };
11141         pid_t pid;
11142
11143         if (!caps_supported())
11144                 return 0;
11145
11146         /* create directory for rename test */
11147         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
11148                 log_stderr("failure: btrfs_create_subvolume");
11149                 goto out;
11150         }
11151
11152         /* change ownership of all files to uid 0 */
11153         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
11154                 log_stderr("failure: fchownat");
11155                 goto out;
11156         }
11157
11158         /* Changing mount properties on a detached mount. */
11159         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11160         if (attr.userns_fd < 0) {
11161                 log_stderr("failure: get_userns_fd");
11162                 goto out;
11163         }
11164
11165         /* Changing mount properties on a detached mount. */
11166         userns_fd = get_userns_fd(0, 30000, 10000);
11167         if (userns_fd < 0) {
11168                 log_stderr("failure: get_userns_fd");
11169                 goto out;
11170         }
11171
11172         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11173                                      AT_EMPTY_PATH |
11174                                      AT_NO_AUTOMOUNT |
11175                                      AT_SYMLINK_NOFOLLOW |
11176                                      OPEN_TREE_CLOEXEC |
11177                                      OPEN_TREE_CLONE);
11178         if (open_tree_fd < 0) {
11179                 log_stderr("failure: sys_open_tree");
11180                 goto out;
11181         }
11182
11183         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
11184                               sizeof(attr))) {
11185                 log_stderr("failure: sys_mount_setattr");
11186                 goto out;
11187         }
11188
11189         /*
11190          * The open_tree() syscall returns an O_PATH file descriptor
11191          * which we can't use with ioctl(). So let's reopen it as a
11192          * proper file descriptor.
11193          */
11194         tree_fd = openat(open_tree_fd, ".",
11195                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11196         if (tree_fd < 0) {
11197                 log_stderr("failure: openat");
11198                 goto out;
11199         }
11200
11201         subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11202                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11203         if (subvolume_fd < 0) {
11204                 log_stderr("failure: openat");
11205                 goto out;
11206         }
11207
11208         pid = fork();
11209         if (pid < 0) {
11210                 log_stderr("failure: fork");
11211                 goto out;
11212         }
11213         if (pid == 0) {
11214                 if (!switch_userns(userns_fd, 0, 0, false))
11215                         die("failure: switch_userns");
11216
11217                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
11218                                       t_overflowuid, t_overflowgid))
11219                         die("failure: expected_uid_gid");
11220
11221                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
11222                                       t_overflowuid, t_overflowgid))
11223                         die("failure: expected_uid_gid");
11224
11225                 /*
11226                  * The caller's fsids don't have a mappings in the idmapped
11227                  * mount so any file creation must fail.
11228                  */
11229
11230                 /* create directory */
11231                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
11232                         die("failure: btrfs_create_subvolume");
11233                 if (errno != EOVERFLOW)
11234                         die("failure: errno");
11235
11236                 /* create read-write snapshot */
11237                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11238                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11239                         die("failure: btrfs_create_snapshot");
11240                 if (errno != EPERM)
11241                         die("failure: errno");
11242
11243                 /* create read-only snapshot */
11244                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11245                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11246                                            BTRFS_SUBVOL_RDONLY))
11247                         die("failure: btrfs_create_snapshot");
11248                 if (errno != EPERM)
11249                         die("failure: errno");
11250
11251                 /* try to rename a directory */
11252                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11253                                BTRFS_SUBVOLUME1_RENAME))
11254                         die("failure: renameat");
11255                 if (errno != EOVERFLOW)
11256                         die("failure: errno");
11257
11258                 /*
11259                  * The caller is not privileged over the inode so subvolume
11260                  * deletion must fail.
11261                  */
11262
11263                 /* remove directory */
11264                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11265                         die("failure: btrfs_delete_subvolume");
11266                 if (errno != EPERM)
11267                         die("failure: errno");
11268
11269                 exit(EXIT_SUCCESS);
11270         }
11271         if (wait_for_pid(pid))
11272                 goto out;
11273
11274         /* remove directory */
11275         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11276                 die("failure: btrfs_delete_subvolume");
11277
11278         fret = 0;
11279         log_debug("Ran test");
11280 out:
11281         safe_close(attr.userns_fd);
11282         safe_close(open_tree_fd);
11283         safe_close(subvolume_fd);
11284         safe_close(tree_fd);
11285
11286         return fret;
11287 }
11288
11289 static int btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed(void)
11290 {
11291         int fret = -1;
11292         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11293         struct mount_attr attr = {
11294                 .attr_set = MOUNT_ATTR_IDMAP,
11295         };
11296         pid_t pid;
11297
11298         if (!caps_supported())
11299                 return 0;
11300
11301         /* Changing mount properties on a detached mount. */
11302         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11303         if (attr.userns_fd < 0) {
11304                 log_stderr("failure: get_userns_fd");
11305                 goto out;
11306         }
11307
11308         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11309                                      AT_EMPTY_PATH |
11310                                      AT_NO_AUTOMOUNT |
11311                                      AT_SYMLINK_NOFOLLOW |
11312                                      OPEN_TREE_CLOEXEC |
11313                                      OPEN_TREE_CLONE);
11314         if (open_tree_fd < 0) {
11315                 log_stderr("failure: sys_open_tree");
11316                 goto out;
11317         }
11318
11319         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11320                 log_stderr("failure: sys_mount_setattr");
11321                 goto out;
11322         }
11323
11324         /*
11325          * The open_tree() syscall returns an O_PATH file descriptor which we
11326          * can't use with ioctl(). So let's reopen it as a proper file
11327          * descriptor.
11328          */
11329         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11330         if (tree_fd < 0) {
11331                 log_stderr("failure: openat");
11332                 goto out;
11333         }
11334
11335         pid = fork();
11336         if (pid < 0) {
11337                 log_stderr("failure: fork");
11338                 goto out;
11339         }
11340         if (pid == 0) {
11341                 if (!switch_fsids(10000, 10000))
11342                         die("failure: switch fsids");
11343
11344                 if (!caps_down())
11345                         die("failure: raise caps");
11346
11347                 /*
11348                  * The caller's fsids now have mappings in the idmapped mount so
11349                  * any file creation must succedd.
11350                  */
11351
11352                 /* create subvolume */
11353                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11354                         die("failure: btrfs_create_subvolume");
11355
11356                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11357                         die("failure: check ownership");
11358
11359                 /*
11360                  * The scratch device is mounted with user_subvol_rm_allowed so
11361                  * subvolume deletion must succeed.
11362                  */
11363                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11364                         die("failure: btrfs_delete_subvolume");
11365
11366                 exit(EXIT_SUCCESS);
11367         }
11368         if (wait_for_pid(pid))
11369                 goto out;
11370
11371         fret = 0;
11372         log_debug("Ran test");
11373 out:
11374         safe_close(attr.userns_fd);
11375         safe_close(open_tree_fd);
11376         safe_close(tree_fd);
11377
11378         return fret;
11379 }
11380
11381 static int btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed(void)
11382 {
11383         int fret = -1;
11384         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11385         struct mount_attr attr = {
11386                 .attr_set = MOUNT_ATTR_IDMAP,
11387         };
11388         pid_t pid;
11389
11390         if (!caps_supported())
11391                 return 0;
11392
11393         /* Changing mount properties on a detached mount. */
11394         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11395         if (attr.userns_fd < 0) {
11396                 log_stderr("failure: get_userns_fd");
11397                 goto out;
11398         }
11399
11400         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11401                                      AT_EMPTY_PATH |
11402                                      AT_NO_AUTOMOUNT |
11403                                      AT_SYMLINK_NOFOLLOW |
11404                                      OPEN_TREE_CLOEXEC |
11405                                      OPEN_TREE_CLONE);
11406         if (open_tree_fd < 0) {
11407                 log_stderr("failure: sys_open_tree");
11408                 goto out;
11409         }
11410
11411         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11412                 log_stderr("failure: sys_mount_setattr");
11413                 goto out;
11414         }
11415
11416         /*
11417          * The open_tree() syscall returns an O_PATH file descriptor which we
11418          * can't use with ioctl(). So let's reopen it as a proper file
11419          * descriptor.
11420          */
11421         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11422         if (tree_fd < 0) {
11423                 log_stderr("failure: openat");
11424                 goto out;
11425         }
11426
11427         pid = fork();
11428         if (pid < 0) {
11429                 log_stderr("failure: fork");
11430                 goto out;
11431         }
11432         if (pid == 0) {
11433                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11434                         die("failure: switch_userns");
11435
11436                 /* The caller's fsids now have mappings in the idmapped mount so
11437                  * any file creation must fail.
11438                  */
11439
11440                 /* create subvolume */
11441                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11442                         die("failure: btrfs_create_subvolume");
11443
11444                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11445                         die("failure: check ownership");
11446
11447                 /*
11448                  * The scratch device is mounted with user_subvol_rm_allowed so
11449                  * subvolume deletion must succeed.
11450                  */
11451                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11452                         die("failure: btrfs_delete_subvolume");
11453
11454                 exit(EXIT_SUCCESS);
11455         }
11456         if (wait_for_pid(pid))
11457                 goto out;
11458
11459         fret = 0;
11460         log_debug("Ran test");
11461 out:
11462         safe_close(attr.userns_fd);
11463         safe_close(open_tree_fd);
11464         safe_close(tree_fd);
11465
11466         return fret;
11467 }
11468
11469 static int btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed(void)
11470 {
11471         int fret = -1;
11472         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11473         struct mount_attr attr = {
11474                 .attr_set = MOUNT_ATTR_IDMAP,
11475         };
11476         pid_t pid;
11477
11478         if (!caps_supported())
11479                 return 0;
11480
11481         /* Changing mount properties on a detached mount. */
11482         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11483         if (attr.userns_fd < 0) {
11484                 log_stderr("failure: get_userns_fd");
11485                 goto out;
11486         }
11487
11488         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11489                                      AT_EMPTY_PATH |
11490                                      AT_NO_AUTOMOUNT |
11491                                      AT_SYMLINK_NOFOLLOW |
11492                                      OPEN_TREE_CLOEXEC |
11493                                      OPEN_TREE_CLONE);
11494         if (open_tree_fd < 0) {
11495                 log_stderr("failure: sys_open_tree");
11496                 goto out;
11497         }
11498
11499         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11500                 log_stderr("failure: sys_mount_setattr");
11501                 goto out;
11502         }
11503
11504         /*
11505          * The open_tree() syscall returns an O_PATH file descriptor which we
11506          * can't use with ioctl(). So let's reopen it as a proper file
11507          * descriptor.
11508          */
11509         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11510         if (tree_fd < 0) {
11511                 log_stderr("failure: openat");
11512                 goto out;
11513         }
11514
11515         pid = fork();
11516         if (pid < 0) {
11517                 log_stderr("failure: fork");
11518                 goto out;
11519         }
11520         if (pid == 0) {
11521                 int subvolume_fd = -EBADF;
11522
11523                 if (!switch_fsids(10000, 10000))
11524                         die("failure: switch fsids");
11525
11526                 if (!caps_down())
11527                         die("failure: raise caps");
11528
11529                 /*
11530                  * The caller's fsids now have mappings in the idmapped mount so
11531                  * any file creation must succeed.
11532                  */
11533
11534                 /* create subvolume */
11535                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11536                         die("failure: btrfs_create_subvolume");
11537
11538                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11539                         die("failure: expected_uid_gid");
11540
11541                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11542                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11543                 if (subvolume_fd < 0)
11544                         die("failure: openat");
11545
11546                 /* create read-write snapshot */
11547                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11548                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11549                         die("failure: btrfs_create_snapshot");
11550
11551                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
11552                         die("failure: expected_uid_gid");
11553
11554                 /* create read-only snapshot */
11555                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11556                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11557                                           BTRFS_SUBVOL_RDONLY))
11558                         die("failure: btrfs_create_snapshot");
11559
11560                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
11561                         die("failure: expected_uid_gid");
11562
11563                 safe_close(subvolume_fd);
11564
11565                 /* remove subvolume */
11566                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11567                         die("failure: btrfs_delete_subvolume");
11568
11569                 /* remove read-write snapshot */
11570                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11571                         die("failure: btrfs_delete_subvolume");
11572
11573                 /* remove read-only snapshot */
11574                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11575                         die("failure: btrfs_delete_subvolume");
11576
11577                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11578                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11579                 if (subvolume_fd < 0)
11580                         die("failure: openat");
11581
11582                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11583                         die("failure: btrfs_set_subvolume_ro");
11584
11585                 safe_close(subvolume_fd);
11586
11587                 /* remove read-only snapshot */
11588                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11589                         die("failure: btrfs_delete_subvolume");
11590
11591                 exit(EXIT_SUCCESS);
11592         }
11593         if (wait_for_pid(pid))
11594                 goto out;
11595
11596         fret = 0;
11597         log_debug("Ran test");
11598 out:
11599         safe_close(attr.userns_fd);
11600         safe_close(open_tree_fd);
11601         safe_close(tree_fd);
11602
11603         return fret;
11604 }
11605
11606 static int btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed(void)
11607 {
11608         int fret = -1;
11609         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11610         struct mount_attr attr = {
11611                 .attr_set = MOUNT_ATTR_IDMAP,
11612         };
11613         pid_t pid;
11614
11615         if (!caps_supported())
11616                 return 0;
11617
11618         /* Changing mount properties on a detached mount. */
11619         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11620         if (attr.userns_fd < 0) {
11621                 log_stderr("failure: get_userns_fd");
11622                 goto out;
11623         }
11624
11625         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11626                                      AT_EMPTY_PATH |
11627                                      AT_NO_AUTOMOUNT |
11628                                      AT_SYMLINK_NOFOLLOW |
11629                                      OPEN_TREE_CLOEXEC |
11630                                      OPEN_TREE_CLONE);
11631         if (open_tree_fd < 0) {
11632                 log_stderr("failure: sys_open_tree");
11633                 goto out;
11634         }
11635
11636         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11637                 log_stderr("failure: sys_mount_setattr");
11638                 goto out;
11639         }
11640
11641         /*
11642          * The open_tree() syscall returns an O_PATH file descriptor which we
11643          * can't use with ioctl(). So let's reopen it as a proper file
11644          * descriptor.
11645          */
11646         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11647         if (tree_fd < 0) {
11648                 log_stderr("failure: openat");
11649                 goto out;
11650         }
11651
11652         pid = fork();
11653         if (pid < 0) {
11654                 log_stderr("failure: fork");
11655                 goto out;
11656         }
11657         if (pid == 0) {
11658                 int subvolume_fd = -EBADF;
11659
11660                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11661                         die("failure: switch_userns");
11662
11663                 /* create subvolume */
11664                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11665                         die("failure: btrfs_create_subvolume");
11666
11667                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11668                         die("failure: expected_uid_gid");
11669
11670                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11671                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11672                 if (subvolume_fd < 0)
11673                         die("failure: openat");
11674
11675                 /* create read-write snapshot */
11676                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11677                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11678                         die("failure: btrfs_create_snapshot");
11679
11680                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11681                         die("failure: expected_uid_gid");
11682
11683                 /* create read-only snapshot */
11684                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11685                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11686                                           BTRFS_SUBVOL_RDONLY))
11687                         die("failure: btrfs_create_snapshot");
11688
11689                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11690                         die("failure: expected_uid_gid");
11691
11692                 /* remove directory */
11693                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11694                         die("failure: btrfs_delete_subvolume");
11695
11696                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11697                         die("failure: expected_uid_gid");
11698
11699                 /* remove read-write snapshot */
11700                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11701                         die("failure: btrfs_delete_subvolume");
11702
11703                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11704                         die("failure: expected_uid_gid");
11705
11706                 /* remove read-only snapshot */
11707                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11708                         die("failure: btrfs_delete_subvolume");
11709
11710                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11711                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11712                 if (subvolume_fd < 0)
11713                         die("failure: openat");
11714
11715                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11716                         die("failure: btrfs_set_subvolume_ro");
11717
11718                 safe_close(subvolume_fd);
11719
11720                 /* remove read-only snapshot */
11721                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11722                         die("failure: btrfs_delete_subvolume");
11723
11724                 exit(EXIT_SUCCESS);
11725         }
11726         if (wait_for_pid(pid))
11727                 goto out;
11728
11729         fret = 0;
11730         log_debug("Ran test");
11731 out:
11732         safe_close(attr.userns_fd);
11733         safe_close(open_tree_fd);
11734         safe_close(tree_fd);
11735
11736         return fret;
11737 }
11738
11739 static int btrfs_delete_by_spec_id(void)
11740 {
11741         int fret = -1;
11742         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
11743         uint64_t subvolume_id1 = -EINVAL, subvolume_id2 = -EINVAL;
11744         struct mount_attr attr = {
11745                 .attr_set = MOUNT_ATTR_IDMAP,
11746         };
11747         pid_t pid;
11748
11749         /* Changing mount properties on a detached mount. */
11750         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11751         if (attr.userns_fd < 0) {
11752                 log_stderr("failure: get_userns_fd");
11753                 goto out;
11754         }
11755
11756         /* create subvolume */
11757         if (btrfs_create_subvolume(t_mnt_scratch_fd, "A")) {
11758                 log_stderr("failure: btrfs_create_subvolume");
11759                 goto out;
11760         }
11761
11762         /* create subvolume */
11763         if (btrfs_create_subvolume(t_mnt_scratch_fd, "B")) {
11764                 log_stderr("failure: btrfs_create_subvolume");
11765                 goto out;
11766         }
11767
11768         subvolume_fd = openat(t_mnt_scratch_fd, "B", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11769         if (subvolume_fd < 0) {
11770                 log_stderr("failure: openat");
11771                 goto out;
11772         }
11773
11774         /* create subvolume */
11775         if (btrfs_create_subvolume(subvolume_fd, "C")) {
11776                 log_stderr("failure: btrfs_create_subvolume");
11777                 goto out;
11778         }
11779
11780         safe_close(subvolume_fd);
11781
11782         subvolume_fd = openat(t_mnt_scratch_fd, "A", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11783         if (subvolume_fd < 0) {
11784                 log_stderr("failure: openat");
11785                 goto out;
11786         }
11787
11788         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id1)) {
11789                 log_stderr("failure: btrfs_get_subvolume_id");
11790                 goto out;
11791         }
11792
11793         subvolume_fd = openat(t_mnt_scratch_fd, "B/C", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11794         if (subvolume_fd < 0) {
11795                 log_stderr("failure: openat");
11796                 goto out;
11797         }
11798
11799         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id2)) {
11800                 log_stderr("failure: btrfs_get_subvolume_id");
11801                 goto out;
11802         }
11803
11804         if (sys_mount(t_device_scratch, t_mountpoint, "btrfs", 0, "subvol=B/C")) {
11805                 log_stderr("failure: mount");
11806                 goto out;
11807         }
11808
11809         open_tree_fd = sys_open_tree(-EBADF, t_mountpoint,
11810                                      AT_NO_AUTOMOUNT |
11811                                      AT_SYMLINK_NOFOLLOW |
11812                                      OPEN_TREE_CLOEXEC |
11813                                      OPEN_TREE_CLONE);
11814         if (open_tree_fd < 0) {
11815                 log_stderr("failure: sys_open_tree");
11816                 goto out;
11817         }
11818
11819         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11820                 log_stderr("failure: sys_mount_setattr");
11821                 goto out;
11822         }
11823
11824         /*
11825          * The open_tree() syscall returns an O_PATH file descriptor which we
11826          * can't use with ioctl(). So let's reopen it as a proper file
11827          * descriptor.
11828          */
11829         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11830         if (tree_fd < 0) {
11831                 log_stderr("failure: openat");
11832                 goto out;
11833         }
11834
11835         pid = fork();
11836         if (pid < 0) {
11837                 log_stderr("failure: fork");
11838                 goto out;
11839         }
11840         if (pid == 0) {
11841                 /*
11842                  * The subvolume isn't exposed in the idmapped mount so
11843                  * delation via spec id must fail.
11844                  */
11845                 if (!btrfs_delete_subvolume_id(tree_fd, subvolume_id1))
11846                         die("failure: btrfs_delete_subvolume_id");
11847                 if (errno != EOPNOTSUPP)
11848                         die("failure: errno");
11849
11850                 if (btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id1))
11851                         die("failure: btrfs_delete_subvolume_id");
11852
11853                 exit(EXIT_SUCCESS);
11854         }
11855         if (wait_for_pid(pid))
11856                 goto out;
11857
11858         fret = 0;
11859         log_debug("Ran test");
11860 out:
11861         safe_close(attr.userns_fd);
11862         safe_close(open_tree_fd);
11863         safe_close(tree_fd);
11864         sys_umount2(t_mountpoint, MNT_DETACH);
11865         btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id2);
11866         btrfs_delete_subvolume(t_mnt_scratch_fd, "B");
11867
11868         return fret;
11869 }
11870
11871 static int btrfs_subvolumes_setflags_fsids_mapped(void)
11872 {
11873         int fret = -1;
11874         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11875         struct mount_attr attr = {
11876                 .attr_set = MOUNT_ATTR_IDMAP,
11877         };
11878         pid_t pid;
11879
11880         if (!caps_supported())
11881                 return 0;
11882
11883         /* Changing mount properties on a detached mount. */
11884         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11885         if (attr.userns_fd < 0) {
11886                 log_stderr("failure: get_userns_fd");
11887                 goto out;
11888         }
11889
11890         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11891                                      AT_EMPTY_PATH |
11892                                      AT_NO_AUTOMOUNT |
11893                                      AT_SYMLINK_NOFOLLOW |
11894                                      OPEN_TREE_CLOEXEC |
11895                                      OPEN_TREE_CLONE);
11896         if (open_tree_fd < 0) {
11897                 log_stderr("failure: sys_open_tree");
11898                 goto out;
11899         }
11900
11901         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11902                 log_stderr("failure: sys_mount_setattr");
11903                 goto out;
11904         }
11905
11906         /*
11907          * The open_tree() syscall returns an O_PATH file descriptor which we
11908          * can't use with ioctl(). So let's reopen it as a proper file
11909          * descriptor.
11910          */
11911         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11912         if (tree_fd < 0) {
11913                 log_stderr("failure: openat");
11914                 goto out;
11915         }
11916
11917         pid = fork();
11918         if (pid < 0) {
11919                 log_stderr("failure: fork");
11920                 goto out;
11921         }
11922         if (pid == 0) {
11923                 int subvolume_fd = -EBADF;
11924                 bool read_only = false;
11925
11926                 if (!switch_fsids(10000, 10000))
11927                         die("failure: switch fsids");
11928
11929                 if (!caps_down())
11930                         die("failure: raise caps");
11931
11932                 /* The caller's fsids now have mappings in the idmapped mount so
11933                  * any file creation must fail.
11934                  */
11935
11936                 /* create subvolume */
11937                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11938                         die("failure: btrfs_create_subvolume");
11939
11940                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11941                         die("failure: expected_uid_gid");
11942
11943                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11944                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11945                 if (subvolume_fd < 0)
11946                         die("failure: openat");
11947
11948                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11949                         die("failure: btrfs_get_subvolume_ro");
11950
11951                 if (read_only)
11952                         die("failure: read_only");
11953
11954                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
11955                         die("failure: btrfs_set_subvolume_ro");
11956
11957                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11958                         die("failure: btrfs_get_subvolume_ro");
11959
11960                 if (!read_only)
11961                         die("failure: not read_only");
11962
11963                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11964                         die("failure: btrfs_set_subvolume_ro");
11965
11966                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11967                         die("failure: btrfs_get_subvolume_ro");
11968
11969                 if (read_only)
11970                         die("failure: read_only");
11971
11972                 safe_close(subvolume_fd);
11973
11974                 exit(EXIT_SUCCESS);
11975         }
11976         if (wait_for_pid(pid))
11977                 goto out;
11978
11979         /* remove directory */
11980         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
11981                 log_stderr("failure: btrfs_delete_subvolume");
11982                 goto out;
11983         }
11984
11985         fret = 0;
11986         log_debug("Ran test");
11987 out:
11988         safe_close(attr.userns_fd);
11989         safe_close(open_tree_fd);
11990         safe_close(tree_fd);
11991
11992         return fret;
11993 }
11994
11995 static int btrfs_subvolumes_setflags_fsids_mapped_userns(void)
11996 {
11997         int fret = -1;
11998         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11999         struct mount_attr attr = {
12000                 .attr_set = MOUNT_ATTR_IDMAP,
12001         };
12002         pid_t pid;
12003
12004         if (!caps_supported())
12005                 return 0;
12006
12007         /* Changing mount properties on a detached mount. */
12008         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12009         if (attr.userns_fd < 0) {
12010                 log_stderr("failure: get_userns_fd");
12011                 goto out;
12012         }
12013
12014         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12015                                      AT_EMPTY_PATH |
12016                                      AT_NO_AUTOMOUNT |
12017                                      AT_SYMLINK_NOFOLLOW |
12018                                      OPEN_TREE_CLOEXEC |
12019                                      OPEN_TREE_CLONE);
12020         if (open_tree_fd < 0) {
12021                 log_stderr("failure: sys_open_tree");
12022                 goto out;
12023         }
12024
12025         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12026                 log_stderr("failure: sys_mount_setattr");
12027                 goto out;
12028         }
12029
12030         /*
12031          * The open_tree() syscall returns an O_PATH file descriptor which we
12032          * can't use with ioctl(). So let's reopen it as a proper file
12033          * descriptor.
12034          */
12035         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12036         if (tree_fd < 0) {
12037                 log_stderr("failure: openat");
12038                 goto out;
12039         }
12040
12041         pid = fork();
12042         if (pid < 0) {
12043                 log_stderr("failure: fork");
12044                 goto out;
12045         }
12046         if (pid == 0) {
12047                 int subvolume_fd = -EBADF;
12048                 bool read_only = false;
12049
12050                 if (!switch_userns(attr.userns_fd, 0, 0, false))
12051                         die("failure: switch_userns");
12052
12053                 /* The caller's fsids now have mappings in the idmapped mount so
12054                  * any file creation must fail.
12055                  */
12056
12057                 /* create subvolume */
12058                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12059                         die("failure: btrfs_create_subvolume");
12060
12061                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12062                         die("failure: expected_uid_gid");
12063
12064                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12065                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12066                 if (subvolume_fd < 0)
12067                         die("failure: openat");
12068
12069                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12070                         die("failure: btrfs_get_subvolume_ro");
12071
12072                 if (read_only)
12073                         die("failure: read_only");
12074
12075                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
12076                         die("failure: btrfs_set_subvolume_ro");
12077
12078                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12079                         die("failure: btrfs_get_subvolume_ro");
12080
12081                 if (!read_only)
12082                         die("failure: not read_only");
12083
12084                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
12085                         die("failure: btrfs_set_subvolume_ro");
12086
12087                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12088                         die("failure: btrfs_get_subvolume_ro");
12089
12090                 if (read_only)
12091                         die("failure: read_only");
12092
12093                 safe_close(subvolume_fd);
12094
12095                 exit(EXIT_SUCCESS);
12096         }
12097         if (wait_for_pid(pid))
12098                 goto out;
12099
12100         /* remove directory */
12101         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12102                 log_stderr("failure: btrfs_delete_subvolume");
12103                 goto out;
12104         }
12105
12106         fret = 0;
12107         log_debug("Ran test");
12108 out:
12109         safe_close(attr.userns_fd);
12110         safe_close(open_tree_fd);
12111         safe_close(tree_fd);
12112
12113         return fret;
12114 }
12115
12116 static int btrfs_subvolumes_setflags_fsids_unmapped(void)
12117 {
12118         int fret = -1;
12119         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12120         struct mount_attr attr = {
12121                 .attr_set = MOUNT_ATTR_IDMAP,
12122         };
12123         pid_t pid;
12124
12125         if (!caps_supported())
12126                 return 0;
12127
12128         /* Changing mount properties on a detached mount. */
12129         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12130         if (attr.userns_fd < 0) {
12131                 log_stderr("failure: get_userns_fd");
12132                 goto out;
12133         }
12134
12135         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12136                                      AT_EMPTY_PATH |
12137                                      AT_NO_AUTOMOUNT |
12138                                      AT_SYMLINK_NOFOLLOW |
12139                                      OPEN_TREE_CLOEXEC |
12140                                      OPEN_TREE_CLONE);
12141         if (open_tree_fd < 0) {
12142                 log_stderr("failure: sys_open_tree");
12143                 goto out;
12144         }
12145
12146         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12147                 log_stderr("failure: sys_mount_setattr");
12148                 goto out;
12149         }
12150
12151         /*
12152          * The open_tree() syscall returns an O_PATH file descriptor which we
12153          * can't use with ioctl(). So let's reopen it as a proper file
12154          * descriptor.
12155          */
12156         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12157         if (tree_fd < 0) {
12158                 log_stderr("failure: openat");
12159                 goto out;
12160         }
12161
12162         /* create subvolume */
12163         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12164                 log_stderr("failure: btrfs_create_subvolume");
12165                 goto out;
12166         }
12167
12168         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12169                 log_stderr("failure: expected_uid_gid");
12170                 goto out;
12171         }
12172
12173         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12174                 log_stderr("failure: expected_uid_gid");
12175                 goto out;
12176         }
12177
12178         pid = fork();
12179         if (pid < 0) {
12180                 log_stderr("failure: fork");
12181                 goto out;
12182         }
12183         if (pid == 0) {
12184                 int subvolume_fd = -EBADF;
12185                 bool read_only = false;
12186
12187                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12188                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12189                 if (subvolume_fd < 0)
12190                         die("failure: openat");
12191
12192                 if (!switch_fsids(0, 0))
12193                         die("failure: switch fsids");
12194
12195                 if (!caps_down())
12196                         die("failure: raise caps");
12197
12198                 /*
12199                  * The caller's fsids don't have mappings in the idmapped mount
12200                  * so any file creation must fail.
12201                  */
12202
12203                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12204                         die("failure: btrfs_get_subvolume_ro");
12205
12206                 if (read_only)
12207                         die("failure: read_only");
12208
12209                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12210                         die("failure: btrfs_set_subvolume_ro");
12211                 if (errno != EPERM)
12212                         die("failure: errno");
12213
12214                 safe_close(subvolume_fd);
12215
12216                 exit(EXIT_SUCCESS);
12217         }
12218         if (wait_for_pid(pid))
12219                 goto out;
12220
12221         /* remove directory */
12222         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12223                 log_stderr("failure: btrfs_delete_subvolume");
12224                 goto out;
12225         }
12226
12227         fret = 0;
12228         log_debug("Ran test");
12229 out:
12230         safe_close(attr.userns_fd);
12231         safe_close(open_tree_fd);
12232         safe_close(tree_fd);
12233
12234         return fret;
12235 }
12236
12237 static int btrfs_subvolumes_setflags_fsids_unmapped_userns(void)
12238 {
12239         int fret = -1;
12240         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
12241         struct mount_attr attr = {
12242                 .attr_set = MOUNT_ATTR_IDMAP,
12243         };
12244         pid_t pid;
12245
12246         if (!caps_supported())
12247                 return 0;
12248
12249         /* Changing mount properties on a detached mount. */
12250         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12251         if (attr.userns_fd < 0) {
12252                 log_stderr("failure: get_userns_fd");
12253                 goto out;
12254         }
12255
12256         /* Changing mount properties on a detached mount. */
12257         userns_fd = get_userns_fd(0, 30000, 10000);
12258         if (userns_fd < 0) {
12259                 log_stderr("failure: get_userns_fd");
12260                 goto out;
12261         }
12262
12263         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12264                                      AT_EMPTY_PATH |
12265                                      AT_NO_AUTOMOUNT |
12266                                      AT_SYMLINK_NOFOLLOW |
12267                                      OPEN_TREE_CLOEXEC |
12268                                      OPEN_TREE_CLONE);
12269         if (open_tree_fd < 0) {
12270                 log_stderr("failure: sys_open_tree");
12271                 goto out;
12272         }
12273
12274         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12275                 log_stderr("failure: sys_mount_setattr");
12276                 goto out;
12277         }
12278
12279         /*
12280          * The open_tree() syscall returns an O_PATH file descriptor which we
12281          * can't use with ioctl(). So let's reopen it as a proper file
12282          * descriptor.
12283          */
12284         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12285         if (tree_fd < 0) {
12286                 log_stderr("failure: openat");
12287                 goto out;
12288         }
12289
12290         /* create subvolume */
12291         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12292                 log_stderr("failure: btrfs_create_subvolume");
12293                 goto out;
12294         }
12295
12296         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12297                 log_stderr("failure: expected_uid_gid");
12298                 goto out;
12299         }
12300
12301         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12302                 log_stderr("failure: expected_uid_gid");
12303                 goto out;
12304         }
12305
12306         pid = fork();
12307         if (pid < 0) {
12308                 log_stderr("failure: fork");
12309                 goto out;
12310         }
12311         if (pid == 0) {
12312                 int subvolume_fd = -EBADF;
12313                 bool read_only = false;
12314
12315                 /*
12316                  * The caller's fsids don't have mappings in the idmapped mount
12317                  * so any file creation must fail.
12318                  */
12319
12320                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12321                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12322                 if (subvolume_fd < 0)
12323                         die("failure: openat");
12324
12325                 if (!switch_userns(userns_fd, 0, 0, false))
12326                         die("failure: switch_userns");
12327
12328                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12329                                       t_overflowuid, t_overflowgid))
12330                         die("failure: expected_uid_gid");
12331
12332                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12333                                       t_overflowuid, t_overflowgid))
12334                         die("failure: expected_uid_gid");
12335
12336                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12337                         die("failure: btrfs_get_subvolume_ro");
12338
12339                 if (read_only)
12340                         die("failure: read_only");
12341
12342                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12343                         die("failure: btrfs_set_subvolume_ro");
12344                 if (errno != EPERM)
12345                         die("failure: errno");
12346
12347                 safe_close(subvolume_fd);
12348
12349                 exit(EXIT_SUCCESS);
12350         }
12351         if (wait_for_pid(pid))
12352                 goto out;
12353
12354         /* remove directory */
12355         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12356                 log_stderr("failure: btrfs_delete_subvolume");
12357                 goto out;
12358         }
12359
12360         fret = 0;
12361         log_debug("Ran test");
12362 out:
12363         safe_close(attr.userns_fd);
12364         safe_close(open_tree_fd);
12365         safe_close(tree_fd);
12366         safe_close(userns_fd);
12367
12368         return fret;
12369 }
12370
12371 static int btrfs_snapshots_setflags_fsids_mapped(void)
12372 {
12373         int fret = -1;
12374         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12375         struct mount_attr attr = {
12376                 .attr_set = MOUNT_ATTR_IDMAP,
12377         };
12378         pid_t pid;
12379
12380         if (!caps_supported())
12381                 return 0;
12382
12383         /* Changing mount properties on a detached mount. */
12384         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12385         if (attr.userns_fd < 0) {
12386                 log_stderr("failure: get_userns_fd");
12387                 goto out;
12388         }
12389
12390         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12391                                      AT_EMPTY_PATH |
12392                                      AT_NO_AUTOMOUNT |
12393                                      AT_SYMLINK_NOFOLLOW |
12394                                      OPEN_TREE_CLOEXEC |
12395                                      OPEN_TREE_CLONE);
12396         if (open_tree_fd < 0) {
12397                 log_stderr("failure: sys_open_tree");
12398                 goto out;
12399         }
12400
12401         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12402                 log_stderr("failure: sys_mount_setattr");
12403                 goto out;
12404         }
12405
12406         /*
12407          * The open_tree() syscall returns an O_PATH file descriptor which we
12408          * can't use with ioctl(). So let's reopen it as a proper file
12409          * descriptor.
12410          */
12411         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12412         if (tree_fd < 0) {
12413                 log_stderr("failure: openat");
12414                 goto out;
12415         }
12416
12417         pid = fork();
12418         if (pid < 0) {
12419                 log_stderr("failure: fork");
12420                 goto out;
12421         }
12422         if (pid == 0) {
12423                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12424                 bool read_only = false;
12425
12426                 if (!switch_fsids(10000, 10000))
12427                         die("failure: switch fsids");
12428
12429                 if (!caps_down())
12430                         die("failure: raise caps");
12431
12432                 /*
12433                  * The caller's fsids now have mappings in the idmapped mount
12434                  * so any file creation must succeed.
12435                  */
12436
12437                 /* create subvolume */
12438                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12439                         die("failure: btrfs_create_subvolume");
12440
12441                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
12442                         die("failure: expected_uid_gid");
12443
12444                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12445                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12446                 if (subvolume_fd < 0)
12447                         die("failure: openat");
12448
12449                 /* create read-write snapshot */
12450                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12451                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12452                         die("failure: btrfs_create_snapshot");
12453
12454                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
12455                         die("failure: expected_uid_gid");
12456
12457                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12458                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12459                 if (snapshot_fd < 0)
12460                         die("failure: openat");
12461
12462                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12463                         die("failure: btrfs_get_subvolume_ro");
12464
12465                 if (read_only)
12466                         die("failure: read_only");
12467
12468                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12469                         die("failure: btrfs_set_subvolume_ro");
12470
12471                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12472                         die("failure: btrfs_get_subvolume_ro");
12473
12474                 if (!read_only)
12475                         die("failure: not read_only");
12476
12477                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12478                         die("failure: btrfs_set_subvolume_ro");
12479
12480                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12481                         die("failure: btrfs_get_subvolume_ro");
12482
12483                 if (read_only)
12484                         die("failure: read_only");
12485
12486                 safe_close(snapshot_fd);
12487                 safe_close(subvolume_fd);
12488
12489                 exit(EXIT_SUCCESS);
12490         }
12491         if (wait_for_pid(pid))
12492                 goto out;
12493
12494         /* remove directory */
12495         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12496                 log_stderr("failure: btrfs_delete_subvolume");
12497                 goto out;
12498         }
12499
12500         /* remove directory */
12501         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12502                 log_stderr("failure: btrfs_delete_subvolume");
12503                 goto out;
12504         }
12505
12506         fret = 0;
12507         log_debug("Ran test");
12508 out:
12509         safe_close(attr.userns_fd);
12510         safe_close(open_tree_fd);
12511         safe_close(tree_fd);
12512
12513         return fret;
12514 }
12515
12516 static int btrfs_snapshots_setflags_fsids_mapped_userns(void)
12517 {
12518         int fret = -1;
12519         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12520         struct mount_attr attr = {
12521                 .attr_set = MOUNT_ATTR_IDMAP,
12522         };
12523         pid_t pid;
12524
12525         if (!caps_supported())
12526                 return 0;
12527
12528         /* Changing mount properties on a detached mount. */
12529         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12530         if (attr.userns_fd < 0) {
12531                 log_stderr("failure: get_userns_fd");
12532                 goto out;
12533         }
12534
12535         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12536                                      AT_EMPTY_PATH |
12537                                      AT_NO_AUTOMOUNT |
12538                                      AT_SYMLINK_NOFOLLOW |
12539                                      OPEN_TREE_CLOEXEC |
12540                                      OPEN_TREE_CLONE);
12541         if (open_tree_fd < 0) {
12542                 log_stderr("failure: sys_open_tree");
12543                 goto out;
12544         }
12545
12546         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12547                 log_stderr("failure: sys_mount_setattr");
12548                 goto out;
12549         }
12550
12551         /*
12552          * The open_tree() syscall returns an O_PATH file descriptor which we
12553          * can't use with ioctl(). So let's reopen it as a proper file
12554          * descriptor.
12555          */
12556         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12557         if (tree_fd < 0) {
12558                 log_stderr("failure: openat");
12559                 goto out;
12560         }
12561
12562         pid = fork();
12563         if (pid < 0) {
12564                 log_stderr("failure: fork");
12565                 goto out;
12566         }
12567         if (pid == 0) {
12568                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12569                 bool read_only = false;
12570
12571                 if (!switch_userns(attr.userns_fd, 0, 0, false))
12572                         die("failure: switch_userns");
12573
12574                 /*
12575                  * The caller's fsids now have mappings in the idmapped mount so
12576                  * any file creation must succeed.
12577                  */
12578
12579                 /* create subvolume */
12580                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12581                         die("failure: btrfs_create_subvolume");
12582
12583                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12584                         die("failure: expected_uid_gid");
12585
12586                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12587                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12588                 if (subvolume_fd < 0)
12589                         die("failure: openat");
12590
12591                 /* create read-write snapshot */
12592                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12593                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12594                         die("failure: btrfs_create_snapshot");
12595
12596                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
12597                         die("failure: expected_uid_gid");
12598
12599                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12600                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12601                 if (snapshot_fd < 0)
12602                         die("failure: openat");
12603
12604                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12605                         die("failure: btrfs_get_subvolume_ro");
12606
12607                 if (read_only)
12608                         die("failure: read_only");
12609
12610                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12611                         die("failure: btrfs_set_subvolume_ro");
12612
12613                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12614                         die("failure: btrfs_get_subvolume_ro");
12615
12616                 if (!read_only)
12617                         die("failure: not read_only");
12618
12619                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12620                         die("failure: btrfs_set_subvolume_ro");
12621
12622                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12623                         die("failure: btrfs_get_subvolume_ro");
12624
12625                 if (read_only)
12626                         die("failure: read_only");
12627
12628                 safe_close(snapshot_fd);
12629                 safe_close(subvolume_fd);
12630
12631                 exit(EXIT_SUCCESS);
12632         }
12633         if (wait_for_pid(pid))
12634                 goto out;
12635
12636         /* remove directory */
12637         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12638                 log_stderr("failure: btrfs_delete_subvolume");
12639                 goto out;
12640         }
12641
12642         /* remove directory */
12643         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12644                 log_stderr("failure: btrfs_delete_subvolume");
12645                 goto out;
12646         }
12647
12648         fret = 0;
12649         log_debug("Ran test");
12650 out:
12651         safe_close(attr.userns_fd);
12652         safe_close(open_tree_fd);
12653         safe_close(tree_fd);
12654
12655         return fret;
12656 }
12657
12658 static int btrfs_snapshots_setflags_fsids_unmapped(void)
12659 {
12660         int fret = -1;
12661         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
12662         struct mount_attr attr = {
12663                 .attr_set = MOUNT_ATTR_IDMAP,
12664         };
12665         pid_t pid;
12666
12667         if (!caps_supported())
12668                 return 0;
12669
12670         /* Changing mount properties on a detached mount. */
12671         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12672         if (attr.userns_fd < 0) {
12673                 log_stderr("failure: get_userns_fd");
12674                 goto out;
12675         }
12676
12677         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12678                                      AT_EMPTY_PATH |
12679                                      AT_NO_AUTOMOUNT |
12680                                      AT_SYMLINK_NOFOLLOW |
12681                                      OPEN_TREE_CLOEXEC |
12682                                      OPEN_TREE_CLONE);
12683         if (open_tree_fd < 0) {
12684                 log_stderr("failure: sys_open_tree");
12685                 goto out;
12686         }
12687
12688         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12689                 log_stderr("failure: sys_mount_setattr");
12690                 goto out;
12691         }
12692
12693         /*
12694          * The open_tree() syscall returns an O_PATH file descriptor which we
12695          * can't use with ioctl(). So let's reopen it as a proper file
12696          * descriptor.
12697          */
12698         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12699         if (tree_fd < 0) {
12700                 log_stderr("failure: openat");
12701                 goto out;
12702         }
12703
12704         /* create subvolume */
12705         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12706                 log_stderr("failure: btrfs_create_subvolume");
12707                 goto out;
12708         }
12709
12710         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12711                 log_stderr("failure: expected_uid_gid");
12712                 goto out;
12713         }
12714
12715         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12716                 log_stderr("failure: expected_uid_gid");
12717                 goto out;
12718         }
12719
12720         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12721                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12722         if (subvolume_fd < 0) {
12723                 log_stderr("failure: openat");
12724                 goto out;
12725         }
12726
12727         /* create read-write snapshot */
12728         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12729                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12730                 log_stderr("failure: btrfs_create_snapshot");
12731                 goto out;
12732         }
12733
12734         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12735                 log_stderr("failure: expected_uid_gid");
12736                 goto out;
12737         }
12738
12739         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12740                 log_stderr("failure: expected_uid_gid");
12741                 goto out;
12742         }
12743
12744         pid = fork();
12745         if (pid < 0) {
12746                 log_stderr("failure: fork");
12747                 goto out;
12748         }
12749         if (pid == 0) {
12750                 int snapshot_fd = -EBADF;
12751                 bool read_only = false;
12752
12753                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12754                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12755                 if (snapshot_fd < 0)
12756                         die("failure: openat");
12757
12758                 if (!switch_fsids(0, 0))
12759                         die("failure: switch fsids");
12760
12761                 if (!caps_down())
12762                         die("failure: raise caps");
12763
12764                 /*
12765                  * The caller's fsids don't have mappings in the idmapped mount
12766                  * so any file creation must fail.
12767                  */
12768
12769                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12770                         die("failure: btrfs_get_subvolume_ro");
12771
12772                 if (read_only)
12773                         die("failure: read_only");
12774
12775                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12776                         die("failure: btrfs_set_subvolume_ro");
12777                 if (errno != EPERM)
12778                         die("failure: errno");
12779
12780                 safe_close(snapshot_fd);
12781
12782                 exit(EXIT_SUCCESS);
12783         }
12784         if (wait_for_pid(pid))
12785                 goto out;
12786
12787         /* remove directory */
12788         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12789                 log_stderr("failure: btrfs_delete_subvolume");
12790                 goto out;
12791         }
12792
12793         /* remove directory */
12794         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12795                 log_stderr("failure: btrfs_delete_subvolume");
12796                 goto out;
12797         }
12798
12799         fret = 0;
12800         log_debug("Ran test");
12801 out:
12802         safe_close(attr.userns_fd);
12803         safe_close(open_tree_fd);
12804         safe_close(subvolume_fd);
12805         safe_close(tree_fd);
12806
12807         return fret;
12808 }
12809
12810 static int btrfs_snapshots_setflags_fsids_unmapped_userns(void)
12811 {
12812         int fret = -1;
12813         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
12814             userns_fd = -EBADF;
12815         struct mount_attr attr = {
12816                 .attr_set = MOUNT_ATTR_IDMAP,
12817         };
12818         pid_t pid;
12819
12820         if (!caps_supported())
12821                 return 0;
12822
12823         /* Changing mount properties on a detached mount. */
12824         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12825         if (attr.userns_fd < 0) {
12826                 log_stderr("failure: get_userns_fd");
12827                 goto out;
12828         }
12829
12830         /* Changing mount properties on a detached mount. */
12831         userns_fd = get_userns_fd(0, 30000, 10000);
12832         if (userns_fd < 0) {
12833                 log_stderr("failure: get_userns_fd");
12834                 goto out;
12835         }
12836
12837         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12838                                      AT_EMPTY_PATH |
12839                                      AT_NO_AUTOMOUNT |
12840                                      AT_SYMLINK_NOFOLLOW |
12841                                      OPEN_TREE_CLOEXEC |
12842                                      OPEN_TREE_CLONE);
12843         if (open_tree_fd < 0) {
12844                 log_stderr("failure: sys_open_tree");
12845                 goto out;
12846         }
12847
12848         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12849                 log_stderr("failure: sys_mount_setattr");
12850                 goto out;
12851         }
12852
12853         /*
12854          * The open_tree() syscall returns an O_PATH file descriptor which we
12855          * can't use with ioctl(). So let's reopen it as a proper file
12856          * descriptor.
12857          */
12858         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12859         if (tree_fd < 0) {
12860                 log_stderr("failure: openat");
12861                 goto out;
12862         }
12863
12864         /* create subvolume */
12865         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12866                 log_stderr("failure: btrfs_create_subvolume");
12867                 goto out;
12868         }
12869
12870         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12871                 log_stderr("failure: expected_uid_gid");
12872                 goto out;
12873         }
12874
12875         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12876                 log_stderr("failure: expected_uid_gid");
12877                 goto out;
12878         }
12879
12880         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12881                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12882         if (subvolume_fd < 0) {
12883                 log_stderr("failure: openat");
12884                 goto out;
12885         }
12886
12887         /* create read-write snapshot */
12888         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12889                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12890                 log_stderr("failure: btrfs_create_snapshot");
12891                 goto out;
12892         }
12893
12894         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12895                 log_stderr("failure: expected_uid_gid");
12896                 goto out;
12897         }
12898
12899         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12900                 log_stderr("failure: expected_uid_gid");
12901                 goto out;
12902         }
12903
12904         pid = fork();
12905         if (pid < 0) {
12906                 log_stderr("failure: fork");
12907                 goto out;
12908         }
12909         if (pid == 0) {
12910                 int snapshot_fd = -EBADF;
12911                 bool read_only = false;
12912
12913                 /*
12914                  * The caller's fsids don't have mappings in the idmapped mount
12915                  * so any file creation must fail.
12916                  */
12917
12918                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12919                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12920                 if (snapshot_fd < 0)
12921                         die("failure: openat");
12922
12923
12924                 if (!switch_userns(userns_fd, 0, 0, false))
12925                         die("failure: switch_userns");
12926
12927                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12928                                       t_overflowuid, t_overflowgid))
12929                         die("failure: expected_uid_gid");
12930
12931                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12932                                       t_overflowuid, t_overflowgid))
12933                         die("failure: expected_uid_gid");
12934
12935                 /*
12936                  * The caller's fsids don't have mappings in the idmapped mount
12937                  * so any file creation must fail.
12938                  */
12939
12940                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12941                         die("failure: btrfs_get_subvolume_ro");
12942
12943                 if (read_only)
12944                         die("failure: read_only");
12945
12946                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12947                         die("failure: btrfs_set_subvolume_ro");
12948                 if (errno != EPERM)
12949                         die("failure: errno");
12950
12951                 safe_close(snapshot_fd);
12952
12953                 exit(EXIT_SUCCESS);
12954         }
12955         if (wait_for_pid(pid))
12956                 goto out;
12957
12958         /* remove directory */
12959         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12960                 log_stderr("failure: btrfs_delete_subvolume");
12961                 goto out;
12962         }
12963
12964         /* remove directory */
12965         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12966                 log_stderr("failure: btrfs_delete_subvolume");
12967                 goto out;
12968         }
12969
12970         fret = 0;
12971         log_debug("Ran test");
12972 out:
12973         safe_close(attr.userns_fd);
12974         safe_close(open_tree_fd);
12975         safe_close(subvolume_fd);
12976         safe_close(tree_fd);
12977         safe_close(userns_fd);
12978
12979         return fret;
12980 }
12981
12982 #define BTRFS_SUBVOLUME_SUBVOL1 "subvol1"
12983 #define BTRFS_SUBVOLUME_SUBVOL2 "subvol2"
12984 #define BTRFS_SUBVOLUME_SUBVOL3 "subvol3"
12985 #define BTRFS_SUBVOLUME_SUBVOL4 "subvol4"
12986
12987 #define BTRFS_SUBVOLUME_SUBVOL1_ID 0
12988 #define BTRFS_SUBVOLUME_SUBVOL2_ID 1
12989 #define BTRFS_SUBVOLUME_SUBVOL3_ID 2
12990 #define BTRFS_SUBVOLUME_SUBVOL4_ID 3
12991
12992 #define BTRFS_SUBVOLUME_DIR1 "dir1"
12993 #define BTRFS_SUBVOLUME_DIR2 "dir2"
12994
12995 #define BTRFS_SUBVOLUME_MNT "mnt_subvolume1"
12996
12997 #define BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3 "subvol1/subvol3"
12998 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2 "subvol1/dir1/dir2"
12999 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4 "subvol1/dir1/dir2/subvol4"
13000
13001 /*
13002  * We create the following mount layout to test lookup:
13003  *
13004  * |-/mnt/test                    /dev/loop0                   btrfs       rw,relatime,space_cache,subvolid=5,subvol=/
13005  * | |-/mnt/test/mnt1             /dev/loop1[/subvol1]         btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=268,subvol=/subvol1
13006  * '-/mnt/scratch                 /dev/loop1                   btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=5,subvol=/
13007  */
13008 static int btrfs_subvolume_lookup_user(void)
13009 {
13010         int fret = -1, i;
13011         int dir1_fd = -EBADF, dir2_fd = -EBADF, mnt_fd = -EBADF,
13012             open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
13013         int subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
13014         uint64_t subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
13015         uint64_t subvolid = -EINVAL;
13016         struct mount_attr attr = {
13017                 .attr_set = MOUNT_ATTR_IDMAP,
13018         };
13019         pid_t pid;
13020         struct btrfs_iter *iter;
13021
13022         if (!caps_supported())
13023                 return 0;
13024
13025         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
13026                 subvolume_fds[i] = -EBADF;
13027
13028         for (i = 0; i < ARRAY_SIZE(subvolume_ids); i++)
13029                 subvolume_ids[i] = -EINVAL;
13030
13031         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL1)) {
13032                 log_stderr("failure: btrfs_create_subvolume");
13033                 goto out;
13034         }
13035
13036         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL2)) {
13037                 log_stderr("failure: btrfs_create_subvolume");
13038                 goto out;
13039         }
13040
13041         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] = openat(t_mnt_scratch_fd,
13042                                                            BTRFS_SUBVOLUME_SUBVOL1,
13043                                                            O_CLOEXEC | O_DIRECTORY);
13044         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] < 0) {
13045                 log_stderr("failure: openat");
13046                 goto out;
13047         }
13048
13049         /* create subvolume */
13050         if (btrfs_create_subvolume(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_SUBVOL3)) {
13051                 log_stderr("failure: btrfs_create_subvolume");
13052                 goto out;
13053         }
13054
13055         if (mkdirat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1, 0777)) {
13056                 log_stderr("failure: mkdirat");
13057                 goto out;
13058         }
13059
13060         dir1_fd = openat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1,
13061                          O_CLOEXEC | O_DIRECTORY);
13062         if (dir1_fd < 0) {
13063                 log_stderr("failure: openat");
13064                 goto out;
13065         }
13066
13067         if (mkdirat(dir1_fd, BTRFS_SUBVOLUME_DIR2, 0777)) {
13068                 log_stderr("failure: mkdirat");
13069                 goto out;
13070         }
13071
13072         dir2_fd = openat(dir1_fd, BTRFS_SUBVOLUME_DIR2, O_CLOEXEC | O_DIRECTORY);
13073         if (dir2_fd < 0) {
13074                 log_stderr("failure: openat");
13075                 goto out;
13076         }
13077
13078         if (btrfs_create_subvolume(dir2_fd, BTRFS_SUBVOLUME_SUBVOL4)) {
13079                 log_stderr("failure: btrfs_create_subvolume");
13080                 goto out;
13081         }
13082
13083         if (mkdirat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, 0777)) {
13084                 log_stderr("failure: mkdirat");
13085                 goto out;
13086         }
13087
13088         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13089         if (sys_mount(t_device_scratch, t_buf, "btrfs", 0,
13090                       "subvol=" BTRFS_SUBVOLUME_SUBVOL1)) {
13091                 log_stderr("failure: mount");
13092                 goto out;
13093         }
13094
13095         mnt_fd = openat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, O_CLOEXEC | O_DIRECTORY);
13096         if (mnt_fd < 0) {
13097                 log_stderr("failure: openat");
13098                 goto out;
13099         }
13100
13101         if (chown_r(t_mnt_scratch_fd, ".", 1000, 1000)) {
13102                 log_stderr("failure: chown_r");
13103                 goto out;
13104         }
13105
13106         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] = openat(t_mnt_scratch_fd,
13107                                                            BTRFS_SUBVOLUME_SUBVOL2,
13108                                                            O_CLOEXEC | O_DIRECTORY);
13109         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] < 0) {
13110                 log_stderr("failure: openat");
13111                 goto out;
13112         }
13113
13114         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID],
13115                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL1_ID])) {
13116                 log_stderr("failure: btrfs_get_subvolume_id");
13117                 goto out;
13118         }
13119
13120         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID],
13121                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL2_ID])) {
13122                 log_stderr("failure: btrfs_get_subvolume_id");
13123                 goto out;
13124         }
13125
13126         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] = openat(t_mnt_scratch_fd,
13127                                                            BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3,
13128                                                            O_CLOEXEC | O_DIRECTORY);
13129         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] < 0) {
13130                 log_stderr("failure: openat");
13131                 goto out;
13132         }
13133
13134         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID],
13135                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])) {
13136                 log_stderr("failure: btrfs_get_subvolume_id");
13137                 goto out;
13138         }
13139
13140         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] = openat(t_mnt_scratch_fd,
13141                                                            BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4,
13142                                                            O_CLOEXEC | O_DIRECTORY);
13143         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] < 0) {
13144                 log_stderr("failure: openat");
13145                 goto out;
13146         }
13147
13148         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID],
13149                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])) {
13150                 log_stderr("failure: btrfs_get_subvolume_id");
13151                 goto out;
13152         }
13153
13154
13155         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13156                 log_stderr("failure: fchmod");
13157                 goto out;
13158         }
13159
13160         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13161                 log_stderr("failure: fchmod");
13162                 goto out;
13163         }
13164
13165         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
13166         if (attr.userns_fd < 0) {
13167                 log_stderr("failure: get_userns_fd");
13168                 goto out;
13169         }
13170
13171         open_tree_fd = sys_open_tree(mnt_fd, "",
13172                                      AT_EMPTY_PATH |
13173                                      AT_NO_AUTOMOUNT |
13174                                      AT_SYMLINK_NOFOLLOW |
13175                                      OPEN_TREE_CLOEXEC |
13176                                      OPEN_TREE_CLONE);
13177         if (open_tree_fd < 0) {
13178                 log_stderr("failure: sys_open_tree");
13179                 goto out;
13180         }
13181
13182         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13183                 log_stderr("failure: sys_mount_setattr");
13184                 goto out;
13185         }
13186
13187         /*
13188          * The open_tree() syscall returns an O_PATH file descriptor which we
13189          * can't use with ioctl(). So let's reopen it as a proper file
13190          * descriptor.
13191          */
13192         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
13193         if (tree_fd < 0) {
13194                 log_stderr("failure: openat");
13195                 goto out;
13196         }
13197
13198         pid = fork();
13199         if (pid < 0) {
13200                 log_stderr("failure: fork");
13201                 goto out;
13202         }
13203         if (pid == 0) {
13204                 bool subvolume3_found = false, subvolume4_found = false;
13205
13206                 if (!switch_fsids(11000, 11000))
13207                         die("failure: switch fsids");
13208
13209                 if (!caps_down())
13210                         die("failure: lower caps");
13211
13212                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13213                         die("failure: btrfs_iterator_start");
13214
13215                 for (;;) {
13216                         char *subvol_path = NULL;
13217                         int ret;
13218
13219                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13220                         if (ret == 1)
13221                                 break;
13222                         else if (ret)
13223                                 die("failure: btrfs_iterator_next");
13224
13225                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13226                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13227                                 die("failure: subvolume id %llu->%s",
13228                                     (long long unsigned)subvolid, subvol_path);
13229
13230                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13231                                 subvolume3_found = true;
13232
13233                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13234                                 subvolume4_found = true;
13235
13236                         free(subvol_path);
13237                 }
13238                 btrfs_iterator_end(iter);
13239
13240                 if (!subvolume3_found || !subvolume4_found)
13241                         die("failure: subvolume id");
13242
13243                 exit(EXIT_SUCCESS);
13244         }
13245         if (wait_for_pid(pid))
13246                 goto out;
13247
13248         pid = fork();
13249         if (pid < 0) {
13250                 log_stderr("failure: fork");
13251                 goto out;
13252         }
13253         if (pid == 0) {
13254                 bool subvolume3_found = false, subvolume4_found = false;
13255
13256                 if (!switch_userns(attr.userns_fd, 0, 0, false))
13257                         die("failure: switch_userns");
13258
13259                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13260                         die("failure: btrfs_iterator_start");
13261
13262                 for (;;) {
13263                         char *subvol_path = NULL;
13264                         int ret;
13265
13266                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13267                         if (ret == 1)
13268                                 break;
13269                         else if (ret)
13270                                 die("failure: btrfs_iterator_next");
13271
13272                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13273                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13274                                 die("failure: subvolume id %llu->%s",
13275                                     (long long unsigned)subvolid, subvol_path);
13276
13277                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13278                                 subvolume3_found = true;
13279
13280                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13281                                 subvolume4_found = true;
13282
13283                         free(subvol_path);
13284                 }
13285                 btrfs_iterator_end(iter);
13286
13287                 if (!subvolume3_found || !subvolume4_found)
13288                         die("failure: subvolume id");
13289
13290                 exit(EXIT_SUCCESS);
13291         }
13292         if (wait_for_pid(pid))
13293                 goto out;
13294
13295         pid = fork();
13296         if (pid < 0) {
13297                 log_stderr("failure: fork");
13298                 goto out;
13299         }
13300         if (pid == 0) {
13301                 bool subvolume_found = false;
13302
13303                 if (!switch_fsids(0, 0))
13304                         die("failure: switch fsids");
13305
13306                 if (!caps_down())
13307                         die("failure: lower caps");
13308
13309                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13310                         die("failure: btrfs_iterator_start");
13311
13312                 for (;;) {
13313                         char *subvol_path = NULL;
13314                         int ret;
13315
13316                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13317                         if (ret == 1)
13318                                 break;
13319                         else if (ret)
13320                                 die("failure: btrfs_iterator_next");
13321
13322                         free(subvol_path);
13323
13324                         subvolume_found = true;
13325                         break;
13326                 }
13327                 btrfs_iterator_end(iter);
13328
13329                 if (subvolume_found)
13330                         die("failure: subvolume id");
13331
13332                 exit(EXIT_SUCCESS);
13333         }
13334         if (wait_for_pid(pid))
13335                 goto out;
13336
13337         userns_fd = get_userns_fd(0, 30000, 10000);
13338         if (userns_fd < 0) {
13339                 log_stderr("failure: get_userns_fd");
13340                 goto out;
13341         }
13342
13343         pid = fork();
13344         if (pid < 0) {
13345                 log_stderr("failure: fork");
13346                 goto out;
13347         }
13348         if (pid == 0) {
13349                 bool subvolume_found = false;
13350
13351                 if (!switch_userns(userns_fd, 0, 0, true))
13352                         die("failure: switch_userns");
13353
13354                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13355                         die("failure: btrfs_iterator_start");
13356
13357                 for (;;) {
13358                         char *subvol_path = NULL;
13359                         int ret;
13360
13361                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13362                         if (ret == 1)
13363                                 break;
13364                         else if (ret)
13365                                 die("failure: btrfs_iterator_next");
13366
13367                         free(subvol_path);
13368
13369                         subvolume_found = true;
13370                         break;
13371                 }
13372                 btrfs_iterator_end(iter);
13373
13374                 if (subvolume_found)
13375                         die("failure: subvolume id");
13376
13377                 exit(EXIT_SUCCESS);
13378         }
13379         if (wait_for_pid(pid))
13380                 goto out;
13381
13382         fret = 0;
13383         log_debug("Ran test");
13384 out:
13385         safe_close(dir1_fd);
13386         safe_close(dir2_fd);
13387         safe_close(open_tree_fd);
13388         safe_close(tree_fd);
13389         safe_close(userns_fd);
13390         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
13391                 safe_close(subvolume_fds[i]);
13392         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13393         sys_umount2(t_buf, MNT_DETACH);
13394         unlinkat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, AT_REMOVEDIR);
13395
13396         return fret;
13397 }
13398
13399 #define USER1 "fsgqa"
13400 #define USER2 "fsgqa2"
13401
13402 /**
13403  * lookup_ids - lookup uid and gid for a username
13404  * @name: [in]  name of the user
13405  * @uid:  [out] pointer to the user-ID
13406  * @gid:  [out] pointer to the group-ID
13407  *
13408  * Lookup the uid and gid of a user.
13409  *
13410  * Return: On success, true is returned.
13411  *         On error, false is returned.
13412  */
13413 static bool lookup_ids(const char *name, uid_t *uid, gid_t *gid)
13414 {
13415         bool bret = false;
13416         struct passwd *pwentp = NULL;
13417         struct passwd pwent;
13418         char *buf;
13419         ssize_t bufsize;
13420         int ret;
13421
13422         bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
13423         if (bufsize < 0)
13424                 bufsize = 1024;
13425
13426         buf = malloc(bufsize);
13427         if (!buf)
13428                 return bret;
13429
13430         ret = getpwnam_r(name, &pwent, buf, bufsize, &pwentp);
13431         if (!ret && pwentp) {
13432                 *uid = pwent.pw_uid;
13433                 *gid = pwent.pw_gid;
13434                 bret = true;
13435         }
13436
13437         free(buf);
13438         return bret;
13439 }
13440
13441 /**
13442  * setattr_fix_968219708108 - test for commit 968219708108 ("fs: handle circular mappings correctly")
13443  *
13444  * Test that ->setattr() works correctly for idmapped mounts with circular
13445  * idmappings such as:
13446  *
13447  * b:1000:1001:1
13448  * b:1001:1000:1
13449  *
13450  * Assume a directory /source with two files:
13451  *
13452  * /source/file1 | 1000:1000
13453  * /source/file2 | 1001:1001
13454  *
13455  * and we create an idmapped mount of /source at /target with an idmapped of:
13456  *
13457  * mnt_userns:        1000:1001:1
13458  *                    1001:1000:1
13459  *
13460  * In the idmapped mount file1 will be owned by uid 1001 and file2 by uid 1000:
13461  *
13462  * /target/file1 | 1001:1001
13463  * /target/file2 | 1000:1000
13464  *
13465  * Because in essence the idmapped mount switches ownership for {g,u}id 1000
13466  * and {g,u}id 1001.
13467  *
13468  * 1. A user with fs{g,u}id 1000 must be allowed to setattr /target/file2 from
13469  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13470  * 2. A user with fs{g,u}id 1001 must be allowed to setattr /target/file1 from
13471  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1001.
13472  * 3. A user with fs{g,u}id 1000 must fail to setattr /target/file1 from
13473  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1000.
13474  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13475  *    {g,u}id of the file.
13476  * 4. A user with fs{g,u}id 1001 must fail to setattr /target/file2 from
13477  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13478  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13479  *    {g,u}id of the file.
13480  * 5. Both, a user with fs{g,u}id 1000 and a user with fs{g,u}id 1001, must
13481  *    fail to setattr /target/file1 owned by {g,u}id 1001 in the idmapped mount
13482  *    and /target/file2 owned by {g,u}id 1000 in the idmapped mount to any
13483  *    {g,u}id apart from {g,u}id 1000 or 1001 with EINVAL.
13484  *    Only {g,u}id 1000 and 1001 have a mapping in the idmapped mount. Other
13485  *    {g,u}id are unmapped.
13486  */
13487 static int setattr_fix_968219708108(void)
13488 {
13489         int fret = -1;
13490         int open_tree_fd = -EBADF;
13491         struct mount_attr attr = {
13492                 .attr_set       = MOUNT_ATTR_IDMAP,
13493                 .userns_fd      = -EBADF,
13494         };
13495         int ret;
13496         uid_t user1_uid, user2_uid;
13497         gid_t user1_gid, user2_gid;
13498         pid_t pid;
13499         struct list idmap;
13500         struct list *it_cur, *it_next;
13501
13502         if (!caps_supported())
13503                 return 0;
13504
13505         list_init(&idmap);
13506
13507         if (!lookup_ids(USER1, &user1_uid, &user1_gid)) {
13508                 log_stderr("failure: lookup_user");
13509                 goto out;
13510         }
13511
13512         if (!lookup_ids(USER2, &user2_uid, &user2_gid)) {
13513                 log_stderr("failure: lookup_user");
13514                 goto out;
13515         }
13516
13517         log_debug("Found " USER1 " with uid(%d) and gid(%d) and " USER2 " with uid(%d) and gid(%d)",
13518                   user1_uid, user1_gid, user2_uid, user2_gid);
13519
13520         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
13521                 log_stderr("failure: mkdirat");
13522                 goto out;
13523         }
13524
13525         if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | 0644, 0)) {
13526                 log_stderr("failure: mknodat");
13527                 goto out;
13528         }
13529
13530         if (chown_r(t_mnt_fd, T_DIR1, user1_uid, user1_gid)) {
13531                 log_stderr("failure: chown_r");
13532                 goto out;
13533         }
13534
13535         if (mknodat(t_dir1_fd, DIR1 "/" FILE2, S_IFREG | 0644, 0)) {
13536                 log_stderr("failure: mknodat");
13537                 goto out;
13538         }
13539
13540         if (fchownat(t_dir1_fd, DIR1 "/" FILE2, user2_uid, user2_gid, AT_SYMLINK_NOFOLLOW)) {
13541                 log_stderr("failure: fchownat");
13542                 goto out;
13543         }
13544
13545         print_r(t_mnt_fd, T_DIR1);
13546
13547         /* u:1000:1001:1 */
13548         ret = add_map_entry(&idmap, user1_uid, user2_uid, 1, ID_TYPE_UID);
13549         if (ret) {
13550                 log_stderr("failure: add_map_entry");
13551                 goto out;
13552         }
13553
13554         /* u:1001:1000:1 */
13555         ret = add_map_entry(&idmap, user2_uid, user1_uid, 1, ID_TYPE_UID);
13556         if (ret) {
13557                 log_stderr("failure: add_map_entry");
13558                 goto out;
13559         }
13560
13561         /* g:1000:1001:1 */
13562         ret = add_map_entry(&idmap, user1_gid, user2_gid, 1, ID_TYPE_GID);
13563         if (ret) {
13564                 log_stderr("failure: add_map_entry");
13565                 goto out;
13566         }
13567
13568         /* g:1001:1000:1 */
13569         ret = add_map_entry(&idmap, user2_gid, user1_gid, 1, ID_TYPE_GID);
13570         if (ret) {
13571                 log_stderr("failure: add_map_entry");
13572                 goto out;
13573         }
13574
13575         attr.userns_fd = get_userns_fd_from_idmap(&idmap);
13576         if (attr.userns_fd < 0) {
13577                 log_stderr("failure: get_userns_fd");
13578                 goto out;
13579         }
13580
13581         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
13582                                      AT_NO_AUTOMOUNT |
13583                                      AT_SYMLINK_NOFOLLOW |
13584                                      OPEN_TREE_CLOEXEC |
13585                                      OPEN_TREE_CLONE |
13586                                      AT_RECURSIVE);
13587         if (open_tree_fd < 0) {
13588                 log_stderr("failure: sys_open_tree");
13589                 goto out;
13590         }
13591
13592         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13593                 log_stderr("failure: sys_mount_setattr");
13594                 goto out;
13595         }
13596
13597         print_r(open_tree_fd, "");
13598
13599         pid = fork();
13600         if (pid < 0) {
13601                 log_stderr("failure: fork");
13602                 goto out;
13603         }
13604         if (pid == 0) {
13605                 /* switch to {g,u}id 1001 */
13606                 if (!switch_resids(user2_uid, user2_gid))
13607                         die("failure: switch_resids");
13608
13609                 /* drop all capabilities */
13610                 if (!caps_down())
13611                         die("failure: caps_down");
13612
13613                 /*
13614                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13615                  * needs to fail with EINVAL.
13616                  */
13617                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13618                         die("failure: change ownership");
13619                 if (errno != EINVAL)
13620                         die("failure: errno");
13621
13622                 /*
13623                  * A user with fs{g,u}id 1001 must be allowed to change
13624                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13625                  * idmapped mount to {g,u}id 1001.
13626                  */
13627                 if (fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13628                              AT_SYMLINK_NOFOLLOW))
13629                         die("failure: change ownership");
13630
13631                 /* Verify that the ownership is still {g,u}id 1001. */
13632                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13633                                       user2_uid, user2_gid))
13634                         die("failure: check ownership");
13635
13636                 /*
13637                  * A user with fs{g,u}id 1001 must not be allowed to change
13638                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13639                  * idmapped mount to {g,u}id 1000.
13640                  */
13641                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13642                               AT_SYMLINK_NOFOLLOW))
13643                         die("failure: change ownership");
13644                 if (errno != EPERM)
13645                         die("failure: errno");
13646
13647                 /* Verify that the ownership is still {g,u}id 1001. */
13648                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13649                                       user2_uid, user2_gid))
13650                         die("failure: check ownership");
13651
13652                 /*
13653                  * A user with fs{g,u}id 1001 must not be allowed to change
13654                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13655                  * idmapped mount to {g,u}id 1000.
13656                  */
13657                 if (!fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13658                               AT_SYMLINK_NOFOLLOW))
13659                         die("failure: change ownership");
13660                 if (errno != EPERM)
13661                         die("failure: errno");
13662
13663                 /* Verify that the ownership is still {g,u}id 1000. */
13664                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13665                                       user1_uid, user1_gid))
13666                         die("failure: check ownership");
13667
13668                 /*
13669                  * A user with fs{g,u}id 1001 must not be allowed to change
13670                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13671                  * idmapped mount to {g,u}id 1001.
13672                  */
13673                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13674                               AT_SYMLINK_NOFOLLOW))
13675                         die("failure: change ownership");
13676                 if (errno != EPERM)
13677                         die("failure: errno");
13678
13679                 /* Verify that the ownership is still {g,u}id 1000. */
13680                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13681                                       user1_uid, user1_gid))
13682                         die("failure: check ownership");
13683
13684                 exit(EXIT_SUCCESS);
13685         }
13686         if (wait_for_pid(pid))
13687                 goto out;
13688
13689         pid = fork();
13690         if (pid < 0) {
13691                 log_stderr("failure: fork");
13692                 goto out;
13693         }
13694         if (pid == 0) {
13695                 /* switch to {g,u}id 1000 */
13696                 if (!switch_resids(user1_uid, user1_gid))
13697                         die("failure: switch_resids");
13698
13699                 /* drop all capabilities */
13700                 if (!caps_down())
13701                         die("failure: caps_down");
13702
13703                 /*
13704                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13705                  * needs to fail with EINVAL.
13706                  */
13707                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13708                         die("failure: change ownership");
13709                 if (errno != EINVAL)
13710                         die("failure: errno");
13711
13712                 /*
13713                  * A user with fs{g,u}id 1000 must be allowed to change
13714                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13715                  * idmapped mount to {g,u}id 1000.
13716                  */
13717                 if (fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13718                              AT_SYMLINK_NOFOLLOW))
13719                         die("failure: change ownership");
13720
13721                 /* Verify that the ownership is still {g,u}id 1000. */
13722                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13723                                       user1_uid, user1_gid))
13724                         die("failure: check ownership");
13725
13726                 /*
13727                  * A user with fs{g,u}id 1000 must not be allowed to change
13728                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13729                  * idmapped mount to {g,u}id 1001.
13730                  */
13731                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13732                               AT_SYMLINK_NOFOLLOW))
13733                         die("failure: change ownership");
13734                 if (errno != EPERM)
13735                         die("failure: errno");
13736
13737                 /* Verify that the ownership is still {g,u}id 1000. */
13738                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13739                                       user1_uid, user1_gid))
13740                         die("failure: check ownership");
13741
13742                 /*
13743                  * A user with fs{g,u}id 1000 must not be allowed to change
13744                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13745                  * idmapped mount to {g,u}id 1000.
13746                  */
13747                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13748                              AT_SYMLINK_NOFOLLOW))
13749                         die("failure: change ownership");
13750                 if (errno != EPERM)
13751                         die("failure: errno");
13752
13753                 /* Verify that the ownership is still {g,u}id 1001. */
13754                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13755                                       user2_uid, user2_gid))
13756                         die("failure: check ownership");
13757
13758                 /*
13759                  * A user with fs{g,u}id 1000 must not be allowed to change
13760                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13761                  * idmapped mount to {g,u}id 1001.
13762                  */
13763                 if (!fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13764                               AT_SYMLINK_NOFOLLOW))
13765                         die("failure: change ownership");
13766                 if (errno != EPERM)
13767                         die("failure: errno");
13768
13769                 /* Verify that the ownership is still {g,u}id 1001. */
13770                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13771                                       user2_uid, user2_gid))
13772                         die("failure: check ownership");
13773
13774                 exit(EXIT_SUCCESS);
13775         }
13776         if (wait_for_pid(pid))
13777                 goto out;
13778
13779         fret = 0;
13780         log_debug("Ran test");
13781 out:
13782         safe_close(attr.userns_fd);
13783         safe_close(open_tree_fd);
13784
13785         list_for_each_safe(it_cur, &idmap, it_next) {
13786                 list_del(it_cur);
13787                 free(it_cur->elem);
13788                 free(it_cur);
13789         }
13790
13791         return fret;
13792 }
13793
13794 static void usage(void)
13795 {
13796         fprintf(stderr, "Description:\n");
13797         fprintf(stderr, "    Run idmapped mount tests\n\n");
13798
13799         fprintf(stderr, "Arguments:\n");
13800         fprintf(stderr, "--device                            Device used in the tests\n");
13801         fprintf(stderr, "--fstype                            Filesystem type used in the tests\n");
13802         fprintf(stderr, "--help                              Print help\n");
13803         fprintf(stderr, "--mountpoint                        Mountpoint of device\n");
13804         fprintf(stderr, "--supported                         Test whether idmapped mounts are supported on this filesystem\n");
13805         fprintf(stderr, "--scratch-mountpoint                Mountpoint of scratch device used in the tests\n");
13806         fprintf(stderr, "--scratch-device                    Scratch device used in the tests\n");
13807         fprintf(stderr, "--test-core                         Run core idmapped mount testsuite\n");
13808         fprintf(stderr, "--test-fscaps-regression            Run fscap regression tests\n");
13809         fprintf(stderr, "--test-nested-userns                Run nested userns idmapped mount testsuite\n");
13810         fprintf(stderr, "--test-btrfs                        Run btrfs specific idmapped mount testsuite\n");
13811         fprintf(stderr, "--test-setattr-fix-968219708108     Run setattr regression tests\n");
13812
13813         _exit(EXIT_SUCCESS);
13814 }
13815
13816 static const struct option longopts[] = {
13817         {"device",                              required_argument,      0,      'd'},
13818         {"fstype",                              required_argument,      0,      'f'},
13819         {"mountpoint",                          required_argument,      0,      'm'},
13820         {"scratch-mountpoint",                  required_argument,      0,      'a'},
13821         {"scratch-device",                      required_argument,      0,      'e'},
13822         {"supported",                           no_argument,            0,      's'},
13823         {"help",                                no_argument,            0,      'h'},
13824         {"test-core",                           no_argument,            0,      'c'},
13825         {"test-fscaps-regression",              no_argument,            0,      'g'},
13826         {"test-nested-userns",                  no_argument,            0,      'n'},
13827         {"test-btrfs",                          no_argument,            0,      'b'},
13828         {"test-setattr-fix-968219708108",       no_argument,            0,      'i'},
13829         {NULL,                                  0,                      0,        0},
13830 };
13831
13832 struct t_idmapped_mounts {
13833         int (*test)(void);
13834         const char *description;
13835 } basic_suite[] = {
13836         { acls,                                                         "posix acls on regular mounts",                                                                 },
13837         { create_in_userns,                                             "create operations in user namespace",                                                          },
13838         { device_node_in_userns,                                        "device node in user namespace",                                                                },
13839         { expected_uid_gid_idmapped_mounts,                             "expected ownership on idmapped mounts",                                                        },
13840         { fscaps,                                                       "fscaps on regular mounts",                                                                     },
13841         { fscaps_idmapped_mounts,                                       "fscaps on idmapped mounts",                                                                    },
13842         { fscaps_idmapped_mounts_in_userns,                             "fscaps on idmapped mounts in user namespace",                                                  },
13843         { fscaps_idmapped_mounts_in_userns_separate_userns,             "fscaps on idmapped mounts in user namespace with different id mappings",                       },
13844         { fsids_mapped,                                                 "mapped fsids",                                                                                 },
13845         { fsids_unmapped,                                               "unmapped fsids",                                                                               },
13846         { hardlink_crossing_mounts,                                     "cross mount hardlink",                                                                         },
13847         { hardlink_crossing_idmapped_mounts,                            "cross idmapped mount hardlink",                                                                },
13848         { hardlink_from_idmapped_mount,                                 "hardlinks from idmapped mounts",                                                               },
13849         { hardlink_from_idmapped_mount_in_userns,                       "hardlinks from idmapped mounts in user namespace",                                             },
13850 #ifdef HAVE_LIBURING_H
13851         { io_uring,                                                     "io_uring",                                                                                     },
13852         { io_uring_userns,                                              "io_uring in user namespace",                                                                   },
13853         { io_uring_idmapped,                                            "io_uring from idmapped mounts",                                                                },
13854         { io_uring_idmapped_userns,                                     "io_uring from idmapped mounts in user namespace",                                              },
13855         { io_uring_idmapped_unmapped,                                   "io_uring from idmapped mounts with unmapped ids",                                              },
13856         { io_uring_idmapped_unmapped_userns,                            "io_uring from idmapped mounts with unmapped ids in user namespace",                            },
13857 #endif
13858         { protected_symlinks,                                           "following protected symlinks on regular mounts",                                               },
13859         { protected_symlinks_idmapped_mounts,                           "following protected symlinks on idmapped mounts",                                              },
13860         { protected_symlinks_idmapped_mounts_in_userns,                 "following protected symlinks on idmapped mounts in user namespace",                            },
13861         { rename_crossing_mounts,                                       "cross mount rename",                                                                           },
13862         { rename_crossing_idmapped_mounts,                              "cross idmapped mount rename",                                                                  },
13863         { rename_from_idmapped_mount,                                   "rename from idmapped mounts",                                                                  },
13864         { rename_from_idmapped_mount_in_userns,                         "rename from idmapped mounts in user namespace",                                                },
13865         { setattr_truncate,                                             "setattr truncate",                                                                             },
13866         { setattr_truncate_idmapped,                                    "setattr truncate on idmapped mounts",                                                          },
13867         { setattr_truncate_idmapped_in_userns,                          "setattr truncate on idmapped mounts in user namespace",                                        },
13868         { setgid_create,                                                "create operations in directories with setgid bit set",                                         },
13869         { setgid_create_idmapped,                                       "create operations in directories with setgid bit set on idmapped mounts",                      },
13870         { setgid_create_idmapped_in_userns,                             "create operations in directories with setgid bit set on idmapped mounts in user namespace",    },
13871         { setid_binaries,                                               "setid binaries on regular mounts",                                                             },
13872         { setid_binaries_idmapped_mounts,                               "setid binaries on idmapped mounts",                                                            },
13873         { setid_binaries_idmapped_mounts_in_userns,                     "setid binaries on idmapped mounts in user namespace",                                          },
13874         { setid_binaries_idmapped_mounts_in_userns_separate_userns,     "setid binaries on idmapped mounts in user namespace with different id mappings",               },
13875         { sticky_bit_unlink,                                            "sticky bit unlink operations on regular mounts",                                               },
13876         { sticky_bit_unlink_idmapped_mounts,                            "sticky bit unlink operations on idmapped mounts",                                              },
13877         { sticky_bit_unlink_idmapped_mounts_in_userns,                  "sticky bit unlink operations on idmapped mounts in user namespace",                            },
13878         { sticky_bit_rename,                                            "sticky bit rename operations on regular mounts",                                               },
13879         { sticky_bit_rename_idmapped_mounts,                            "sticky bit rename operations on idmapped mounts",                                              },
13880         { sticky_bit_rename_idmapped_mounts_in_userns,                  "sticky bit rename operations on idmapped mounts in user namespace",                            },
13881         { symlink_regular_mounts,                                       "symlink from regular mounts",                                                                  },
13882         { symlink_idmapped_mounts,                                      "symlink from idmapped mounts",                                                                 },
13883         { symlink_idmapped_mounts_in_userns,                            "symlink from idmapped mounts in user namespace",                                               },
13884         { threaded_idmapped_mount_interactions,                         "threaded operations on idmapped mounts",                                                       },
13885 };
13886
13887 struct t_idmapped_mounts fscaps_in_ancestor_userns[] = {
13888         { fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns,    "fscaps on idmapped mounts in user namespace writing fscap valid in ancestor userns",           },
13889 };
13890
13891 struct t_idmapped_mounts t_nested_userns[] = {
13892         { nested_userns,                                                "test that nested user namespaces behave correctly when attached to idmapped mounts",           },
13893 };
13894
13895 struct t_idmapped_mounts t_btrfs[] = {
13896         { btrfs_subvolumes_fsids_mapped,                                "test subvolumes with mapped fsids",                                                            },
13897         { btrfs_subvolumes_fsids_mapped_userns,                         "test subvolumes with mapped fsids inside user namespace",                                      },
13898         { btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed,         "test subvolume deletion with user_subvol_rm_allowed mount option",                             },
13899         { btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed,  "test subvolume deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13900         { btrfs_subvolumes_fsids_unmapped,                              "test subvolumes with unmapped fsids",                                                          },
13901         { btrfs_subvolumes_fsids_unmapped_userns,                       "test subvolumes with unmapped fsids inside user namespace",                                    },
13902         { btrfs_snapshots_fsids_mapped,                                 "test snapshots with mapped fsids",                                                             },
13903         { btrfs_snapshots_fsids_mapped_userns,                          "test snapshots with mapped fsids inside user namespace",                                       },
13904         { btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed,          "test snapshots deletion with user_subvol_rm_allowed mount option",                             },
13905         { btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed,   "test snapshots deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13906         { btrfs_snapshots_fsids_unmapped,                               "test snapshots with unmapped fsids",                                                           },
13907         { btrfs_snapshots_fsids_unmapped_userns,                        "test snapshots with unmapped fsids inside user namespace",                                     },
13908         { btrfs_delete_by_spec_id,                                      "test subvolume deletion by spec id",                                                           },
13909         { btrfs_subvolumes_setflags_fsids_mapped,                       "test subvolume flags with mapped fsids",                                                       },
13910         { btrfs_subvolumes_setflags_fsids_mapped_userns,                "test subvolume flags with mapped fsids inside user namespace",                                 },
13911         { btrfs_subvolumes_setflags_fsids_unmapped,                     "test subvolume flags with unmapped fsids",                                                     },
13912         { btrfs_subvolumes_setflags_fsids_unmapped_userns,              "test subvolume flags with unmapped fsids inside user namespace",                               },
13913         { btrfs_snapshots_setflags_fsids_mapped,                        "test snapshots flags with mapped fsids",                                                       },
13914         { btrfs_snapshots_setflags_fsids_mapped_userns,                 "test snapshots flags with mapped fsids inside user namespace",                                 },
13915         { btrfs_snapshots_setflags_fsids_unmapped,                      "test snapshots flags with unmapped fsids",                                                     },
13916         { btrfs_snapshots_setflags_fsids_unmapped_userns,               "test snapshots flags with unmapped fsids inside user namespace",                               },
13917         { btrfs_subvolume_lookup_user,                                  "test unprivileged subvolume lookup",                                                           },
13918 };
13919
13920 /* Test for commit 968219708108 ("fs: handle circular mappings correctly"). */
13921 struct t_idmapped_mounts t_setattr_fix_968219708108[] = {
13922         { setattr_fix_968219708108,                                     "test that setattr works correctly",                                                            },
13923 };
13924
13925 static bool run_test(struct t_idmapped_mounts suite[], size_t suite_size)
13926 {
13927         int i;
13928
13929         for (i = 0; i < suite_size; i++) {
13930                 struct t_idmapped_mounts *t = &suite[i];
13931                 int ret;
13932                 pid_t pid;
13933
13934                 test_setup();
13935
13936                 pid = fork();
13937                 if (pid < 0)
13938                         return false;
13939
13940                 if (pid == 0) {
13941                         ret = t->test();
13942                         if (ret)
13943                                 die("failure: %s", t->description);
13944
13945                         exit(EXIT_SUCCESS);
13946                 }
13947
13948                 ret = wait_for_pid(pid);
13949                 test_cleanup();
13950
13951                 if (ret)
13952                         return false;
13953         }
13954
13955         return true;
13956 }
13957
13958 int main(int argc, char *argv[])
13959 {
13960         int fret, ret;
13961         int index = 0;
13962         bool supported = false, test_btrfs = false, test_core = false,
13963              test_fscaps_regression = false, test_nested_userns = false,
13964              test_setattr_fix_968219708108 = false;
13965
13966         while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
13967                 switch (ret) {
13968                 case 'd':
13969                         t_device = optarg;
13970                         break;
13971                 case 'f':
13972                         t_fstype = optarg;
13973                         break;
13974                 case 'm':
13975                         t_mountpoint = optarg;
13976                         break;
13977                 case 's':
13978                         supported = true;
13979                         break;
13980                 case 'c':
13981                         test_core = true;
13982                         break;
13983                 case 'g':
13984                         test_fscaps_regression = true;
13985                         break;
13986                 case 'n':
13987                         test_nested_userns = true;
13988                         break;
13989                 case 'b':
13990                         test_btrfs = true;
13991                         break;
13992                 case 'a':
13993                         t_mountpoint_scratch = optarg;
13994                         break;
13995                 case 'e':
13996                         t_device_scratch = optarg;
13997                         break;
13998                 case 'i':
13999                         test_setattr_fix_968219708108 = true;
14000                         break;
14001                 case 'h':
14002                         /* fallthrough */
14003                 default:
14004                         usage();
14005                 }
14006         }
14007
14008         if (!t_device)
14009                 die_errno(EINVAL, "test device missing");
14010
14011         if (!t_fstype)
14012                 die_errno(EINVAL, "test filesystem type missing");
14013
14014         if (!t_mountpoint)
14015                 die_errno(EINVAL, "mountpoint of test device missing");
14016
14017         /* create separate mount namespace */
14018         if (unshare(CLONE_NEWNS))
14019                 die("failure: create new mount namespace");
14020
14021         /* turn off mount propagation */
14022         if (sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
14023                 die("failure: turn mount propagation off");
14024
14025         t_mnt_fd = openat(-EBADF, t_mountpoint, O_CLOEXEC | O_DIRECTORY);
14026         if (t_mnt_fd < 0)
14027                 die("failed to open %s", t_mountpoint);
14028
14029         t_mnt_scratch_fd = openat(-EBADF, t_mountpoint_scratch, O_CLOEXEC | O_DIRECTORY);
14030         if (t_mnt_fd < 0)
14031                 die("failed to open %s", t_mountpoint_scratch);
14032
14033         /*
14034          * Caller just wants to know whether the filesystem we're on supports
14035          * idmapped mounts.
14036          */
14037         if (supported) {
14038                 int open_tree_fd = -EBADF;
14039                 struct mount_attr attr = {
14040                         .attr_set       = MOUNT_ATTR_IDMAP,
14041                         .userns_fd      = -EBADF,
14042                 };
14043
14044                 /* Changing mount properties on a detached mount. */
14045                 attr.userns_fd  = get_userns_fd(0, 1000, 1);
14046                 if (attr.userns_fd < 0)
14047                         exit(EXIT_FAILURE);
14048
14049                 open_tree_fd = sys_open_tree(t_mnt_fd, "",
14050                                              AT_EMPTY_PATH |
14051                                              AT_NO_AUTOMOUNT |
14052                                              AT_SYMLINK_NOFOLLOW |
14053                                              OPEN_TREE_CLOEXEC |
14054                                              OPEN_TREE_CLONE);
14055                 if (open_tree_fd < 0)
14056                         ret = -1;
14057                 else
14058                         ret = sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr));
14059
14060                 close(open_tree_fd);
14061                 close(attr.userns_fd);
14062
14063                 if (ret)
14064                         exit(EXIT_FAILURE);
14065
14066                 exit(EXIT_SUCCESS);
14067         }
14068
14069         stash_overflowuid();
14070         stash_overflowgid();
14071
14072         fret = EXIT_FAILURE;
14073
14074         if (test_core && !run_test(basic_suite, ARRAY_SIZE(basic_suite)))
14075                 goto out;
14076
14077         if (test_fscaps_regression &&
14078             !run_test(fscaps_in_ancestor_userns,
14079                       ARRAY_SIZE(fscaps_in_ancestor_userns)))
14080                 goto out;
14081
14082         if (test_nested_userns &&
14083             !run_test(t_nested_userns, ARRAY_SIZE(t_nested_userns)))
14084                 goto out;
14085
14086         if (test_btrfs && !run_test(t_btrfs, ARRAY_SIZE(t_btrfs)))
14087                 goto out;
14088
14089         if (test_setattr_fix_968219708108 &&
14090             !run_test(t_setattr_fix_968219708108,
14091                       ARRAY_SIZE(t_setattr_fix_968219708108)))
14092                 goto out;
14093
14094         fret = EXIT_SUCCESS;
14095
14096 out:
14097         exit(fret);
14098 }