d3b27da6c0851426af23f68321fff05d71013159
[xfstests-dev.git] / src / idmapped-mounts / idmapped-mounts.c
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef _GNU_SOURCE
3 #define _GNU_SOURCE
4 #endif
5
6 #include "../global.h"
7
8 #include <dirent.h>
9 #include <errno.h>
10 #include <fcntl.h>
11 #include <getopt.h>
12 #include <grp.h>
13 #include <limits.h>
14 #include <linux/limits.h>
15 #include <linux/types.h>
16 #include <pthread.h>
17 #include <pwd.h>
18 #include <sched.h>
19 #include <stdbool.h>
20 #include <sys/fsuid.h>
21 #include <sys/stat.h>
22 #include <sys/types.h>
23 #include <sys/xattr.h>
24 #include <unistd.h>
25
26 #ifdef HAVE_LINUX_BTRFS_H
27 # ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID
28 #  define btrfs_ioctl_vol_args_v2 override_btrfs_ioctl_vol_args_v2
29 # endif
30 #include <linux/btrfs.h>
31 # undef btrfs_ioctl_vol_args_v2
32 #endif
33
34 #ifdef HAVE_LINUX_BTRFS_TREE_H
35 #include <linux/btrfs_tree.h>
36 #endif
37
38 #ifdef HAVE_SYS_CAPABILITY_H
39 #include <sys/capability.h>
40 #endif
41
42 #ifdef HAVE_LIBURING_H
43 #include <liburing.h>
44 #endif
45
46 #include "missing.h"
47 #include "utils.h"
48
49 #define T_DIR1 "idmapped_mounts_1"
50 #define FILE1 "file1"
51 #define FILE1_RENAME "file1_rename"
52 #define FILE2 "file2"
53 #define FILE2_RENAME "file2_rename"
54 #define DIR1 "dir1"
55 #define DIR2 "dir2"
56 #define DIR3 "dir3"
57 #define DIR1_RENAME "dir1_rename"
58 #define HARDLINK1 "hardlink1"
59 #define SYMLINK1 "symlink1"
60 #define SYMLINK_USER1 "symlink_user1"
61 #define SYMLINK_USER2 "symlink_user2"
62 #define SYMLINK_USER3 "symlink_user3"
63 #define CHRDEV1 "chrdev1"
64
65 #define log_stderr(format, ...)                                                         \
66         fprintf(stderr, "%s: %d: %s - %m - " format "\n", __FILE__, __LINE__, __func__, \
67                 ##__VA_ARGS__)
68
69 #ifdef DEBUG_TRACE
70 #define log_debug(format, ...)                                           \
71         fprintf(stderr, "%s: %d: %s - " format "\n", __FILE__, __LINE__, \
72                 __func__, ##__VA_ARGS__)
73 #else
74 #define log_debug(format, ...)
75 #endif
76
77 #define log_error_errno(__ret__, __errno__, format, ...)      \
78         ({                                                    \
79                 typeof(__ret__) __internal_ret__ = (__ret__); \
80                 errno = (__errno__);                          \
81                 log_stderr(format, ##__VA_ARGS__);            \
82                 __internal_ret__;                             \
83         })
84
85 #define log_errno(__ret__, format, ...) log_error_errno(__ret__, errno, format, ##__VA_ARGS__)
86
87 #define die_errno(__errno__, format, ...)          \
88         ({                                         \
89                 errno = (__errno__);               \
90                 log_stderr(format, ##__VA_ARGS__); \
91                 exit(EXIT_FAILURE);                \
92         })
93
94 #define die(format, ...) die_errno(errno, format, ##__VA_ARGS__)
95
96 #define ARRAY_SIZE(A) (sizeof(A) / sizeof((A)[0]))
97
98 uid_t t_overflowuid = 65534;
99 gid_t t_overflowgid = 65534;
100
101 /* path of the test device */
102 const char *t_fstype;
103
104 /* path of the test device */
105 const char *t_device;
106
107 /* path of the test scratch device */
108 const char *t_device_scratch;
109
110 /* mountpoint of the test device */
111 const char *t_mountpoint;
112
113 /* mountpoint of the test device */
114 const char *t_mountpoint_scratch;
115
116 /* fd for @t_mountpoint */
117 int t_mnt_fd;
118
119 /* fd for @t_mountpoint_scratch */
120 int t_mnt_scratch_fd;
121
122 /* fd for @T_DIR1 */
123 int t_dir1_fd;
124
125 /* temporary buffer */
126 char t_buf[PATH_MAX];
127
128 /* whether the underlying filesystem supports idmapped mounts */
129 bool t_fs_allow_idmap;
130
131 static void stash_overflowuid(void)
132 {
133         int fd;
134         ssize_t ret;
135         char buf[256];
136
137         fd = open("/proc/sys/fs/overflowuid", O_RDONLY | O_CLOEXEC);
138         if (fd < 0)
139                 return;
140
141         ret = read(fd, buf, sizeof(buf));
142         close(fd);
143         if (ret < 0)
144                 return;
145
146         t_overflowuid = atoi(buf);
147 }
148
149 static void stash_overflowgid(void)
150 {
151         int fd;
152         ssize_t ret;
153         char buf[256];
154
155         fd = open("/proc/sys/fs/overflowgid", O_RDONLY | O_CLOEXEC);
156         if (fd < 0)
157                 return;
158
159         ret = read(fd, buf, sizeof(buf));
160         close(fd);
161         if (ret < 0)
162                 return;
163
164         t_overflowgid = atoi(buf);
165 }
166
167 static bool is_xfs(void)
168 {
169         static int enabled = -1;
170
171         if (enabled == -1)
172                 enabled = !strcmp(t_fstype, "xfs");
173
174         return enabled;
175 }
176
177 static bool protected_symlinks_enabled(void)
178 {
179         static int enabled = -1;
180
181         if (enabled == -1) {
182                 int fd;
183                 ssize_t ret;
184                 char buf[256];
185
186                 enabled = 0;
187
188                 fd = open("/proc/sys/fs/protected_symlinks", O_RDONLY | O_CLOEXEC);
189                 if (fd < 0)
190                         return false;
191
192                 ret = read(fd, buf, sizeof(buf));
193                 close(fd);
194                 if (ret < 0)
195                         return false;
196
197                 if (atoi(buf) >= 1)
198                         enabled = 1;
199         }
200
201         return enabled == 1;
202 }
203
204 static bool xfs_irix_sgid_inherit_enabled(void)
205 {
206         static int enabled = -1;
207
208         if (enabled == -1) {
209                 int fd;
210                 ssize_t ret;
211                 char buf[256];
212
213                 enabled = 0;
214
215                 if (is_xfs()) {
216                         fd = open("/proc/sys/fs/xfs/irix_sgid_inherit", O_RDONLY | O_CLOEXEC);
217                         if (fd < 0)
218                                 return false;
219
220                         ret = read(fd, buf, sizeof(buf));
221                         close(fd);
222                         if (ret < 0)
223                                 return false;
224
225                         if (atoi(buf) >= 1)
226                                 enabled = 1;
227                 }
228         }
229
230         return enabled == 1;
231 }
232
233 static inline bool caps_supported(void)
234 {
235         bool ret = false;
236
237 #ifdef HAVE_SYS_CAPABILITY_H
238         ret = true;
239 #endif
240
241         return ret;
242 }
243
244 /* caps_down - lower all effective caps */
245 static int caps_down(void)
246 {
247         bool fret = false;
248 #ifdef HAVE_SYS_CAPABILITY_H
249         cap_t caps = NULL;
250         int ret = -1;
251
252         caps = cap_get_proc();
253         if (!caps)
254                 goto out;
255
256         ret = cap_clear_flag(caps, CAP_EFFECTIVE);
257         if (ret)
258                 goto out;
259
260         ret = cap_set_proc(caps);
261         if (ret)
262                 goto out;
263
264         fret = true;
265
266 out:
267         cap_free(caps);
268 #endif
269         return fret;
270 }
271
272 /* caps_up - raise all permitted caps */
273 static int caps_up(void)
274 {
275         bool fret = false;
276 #ifdef HAVE_SYS_CAPABILITY_H
277         cap_t caps = NULL;
278         cap_value_t cap;
279         int ret = -1;
280
281         caps = cap_get_proc();
282         if (!caps)
283                 goto out;
284
285         for (cap = 0; cap <= CAP_LAST_CAP; cap++) {
286                 cap_flag_value_t flag;
287
288                 ret = cap_get_flag(caps, cap, CAP_PERMITTED, &flag);
289                 if (ret) {
290                         if (errno == EINVAL)
291                                 break;
292                         else
293                                 goto out;
294                 }
295
296                 ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, flag);
297                 if (ret)
298                         goto out;
299         }
300
301         ret = cap_set_proc(caps);
302         if (ret)
303                 goto out;
304
305         fret = true;
306 out:
307         cap_free(caps);
308 #endif
309         return fret;
310 }
311
312 /* __expected_uid_gid - check whether file is owned by the provided uid and gid */
313 static bool __expected_uid_gid(int dfd, const char *path, int flags,
314                                uid_t expected_uid, gid_t expected_gid, bool log)
315 {
316         int ret;
317         struct stat st;
318
319         ret = fstatat(dfd, path, &st, flags);
320         if (ret < 0)
321                 return log_errno(false, "failure: fstatat");
322
323         if (log && st.st_uid != expected_uid)
324                 log_stderr("failure: uid(%d) != expected_uid(%d)", st.st_uid, expected_uid);
325
326         if (log && st.st_gid != expected_gid)
327                 log_stderr("failure: gid(%d) != expected_gid(%d)", st.st_gid, expected_gid);
328
329         errno = 0; /* Don't report misleading errno. */
330         return st.st_uid == expected_uid && st.st_gid == expected_gid;
331 }
332
333 static bool expected_uid_gid(int dfd, const char *path, int flags,
334                              uid_t expected_uid, gid_t expected_gid)
335 {
336         return __expected_uid_gid(dfd, path, flags,
337                                   expected_uid, expected_gid, true);
338 }
339
340 static bool expected_file_size(int dfd, const char *path,
341                                int flags, off_t expected_size)
342 {
343         int ret;
344         struct stat st;
345
346         ret = fstatat(dfd, path, &st, flags);
347         if (ret < 0)
348                 return log_errno(false, "failure: fstatat");
349
350         if (st.st_size != expected_size)
351                 return log_errno(false, "failure: st_size(%zu) != expected_size(%zu)",
352                                  (size_t)st.st_size, (size_t)expected_size);
353
354         return true;
355 }
356
357 /* is_setid - check whether file is S_ISUID and S_ISGID */
358 static bool is_setid(int dfd, const char *path, int flags)
359 {
360         int ret;
361         struct stat st;
362
363         ret = fstatat(dfd, path, &st, flags);
364         if (ret < 0)
365                 return false;
366
367         errno = 0; /* Don't report misleading errno. */
368         return (st.st_mode & S_ISUID) || (st.st_mode & S_ISGID);
369 }
370
371 /* is_setgid - check whether file or directory is S_ISGID */
372 static bool is_setgid(int dfd, const char *path, int flags)
373 {
374         int ret;
375         struct stat st;
376
377         ret = fstatat(dfd, path, &st, flags);
378         if (ret < 0)
379                 return false;
380
381         errno = 0; /* Don't report misleading errno. */
382         return (st.st_mode & S_ISGID);
383 }
384
385 /* is_sticky - check whether file is S_ISVTX */
386 static bool is_sticky(int dfd, const char *path, int flags)
387 {
388         int ret;
389         struct stat st;
390
391         ret = fstatat(dfd, path, &st, flags);
392         if (ret < 0)
393                 return false;
394
395         errno = 0; /* Don't report misleading errno. */
396         return (st.st_mode & S_ISVTX) > 0;
397 }
398
399 static inline bool switch_fsids(uid_t fsuid, gid_t fsgid)
400 {
401         if (setfsgid(fsgid))
402                 return log_errno(false, "failure: setfsgid");
403
404         if (setfsgid(-1) != fsgid)
405                 return log_errno(false, "failure: setfsgid(-1)");
406
407         if (setfsuid(fsuid))
408                 return log_errno(false, "failure: setfsuid");
409
410         if (setfsuid(-1) != fsuid)
411                 return log_errno(false, "failure: setfsuid(-1)");
412
413         return true;
414 }
415
416 static inline bool switch_resids(uid_t uid, gid_t gid)
417 {
418         if (setresgid(gid, gid, gid))
419                 return log_errno(false, "failure: setregid");
420
421         if (setresuid(uid, uid, uid))
422                 return log_errno(false, "failure: setresuid");
423
424         if (setfsgid(-1) != gid)
425                 return log_errno(false, "failure: setfsgid(-1)");
426
427         if (setfsuid(-1) != uid)
428                 return log_errno(false, "failure: setfsuid(-1)");
429
430         return true;
431 }
432
433 static inline bool switch_userns(int fd, uid_t uid, gid_t gid, bool drop_caps)
434 {
435         if (setns(fd, CLONE_NEWUSER))
436                 return log_errno(false, "failure: setns");
437
438         if (!switch_ids(uid, gid))
439                 return log_errno(false, "failure: switch_ids");
440
441         if (drop_caps && !caps_down())
442                 return log_errno(false, "failure: caps_down");
443
444         return true;
445 }
446
447 /* rm_r - recursively remove all files */
448 static int rm_r(int fd, const char *path)
449 {
450         int dfd, ret;
451         DIR *dir;
452         struct dirent *direntp;
453
454         if (!path || strcmp(path, "") == 0)
455                 return -1;
456
457         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
458         if (dfd < 0)
459                 return -1;
460
461         dir = fdopendir(dfd);
462         if (!dir) {
463                 close(dfd);
464                 return -1;
465         }
466
467         while ((direntp = readdir(dir))) {
468                 struct stat st;
469
470                 if (!strcmp(direntp->d_name, ".") ||
471                     !strcmp(direntp->d_name, ".."))
472                         continue;
473
474                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
475                 if (ret < 0 && errno != ENOENT)
476                         break;
477
478                 if (S_ISDIR(st.st_mode))
479                         ret = rm_r(dfd, direntp->d_name);
480                 else
481                         ret = unlinkat(dfd, direntp->d_name, 0);
482                 if (ret < 0 && errno != ENOENT)
483                         break;
484         }
485
486         ret = unlinkat(fd, path, AT_REMOVEDIR);
487         closedir(dir);
488         return ret;
489 }
490
491 /* chown_r - recursively change ownership of all files */
492 static int chown_r(int fd, const char *path, uid_t uid, gid_t gid)
493 {
494         int dfd, ret;
495         DIR *dir;
496         struct dirent *direntp;
497
498         dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY);
499         if (dfd < 0)
500                 return -1;
501
502         dir = fdopendir(dfd);
503         if (!dir) {
504                 close(dfd);
505                 return -1;
506         }
507
508         while ((direntp = readdir(dir))) {
509                 struct stat st;
510
511                 if (!strcmp(direntp->d_name, ".") ||
512                     !strcmp(direntp->d_name, ".."))
513                         continue;
514
515                 ret = fstatat(dfd, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
516                 if (ret < 0 && errno != ENOENT)
517                         break;
518
519                 if (S_ISDIR(st.st_mode))
520                         ret = chown_r(dfd, direntp->d_name, uid, gid);
521                 else
522                         ret = fchownat(dfd, direntp->d_name, uid, gid, AT_SYMLINK_NOFOLLOW);
523                 if (ret < 0 && errno != ENOENT)
524                         break;
525         }
526
527         ret = fchownat(fd, path, uid, gid, AT_SYMLINK_NOFOLLOW);
528         closedir(dir);
529         return ret;
530 }
531
532 /*
533  * There'll be scenarios where you'll want to see the attributes associated with
534  * a directory tree during debugging or just to make sure things look correct.
535  * Simply uncomment and place the print_r() helper where you need it.
536  */
537 #ifdef DEBUG_TRACE
538 static int fd_cloexec(int fd, bool cloexec)
539 {
540         int oflags, nflags;
541
542         oflags = fcntl(fd, F_GETFD, 0);
543         if (oflags < 0)
544                 return -errno;
545
546         if (cloexec)
547                 nflags = oflags | FD_CLOEXEC;
548         else
549                 nflags = oflags & ~FD_CLOEXEC;
550
551         if (nflags == oflags)
552                 return 0;
553
554         if (fcntl(fd, F_SETFD, nflags) < 0)
555                 return -errno;
556
557         return 0;
558 }
559
560 static inline int dup_cloexec(int fd)
561 {
562         int fd_dup;
563
564         fd_dup = dup(fd);
565         if (fd_dup < 0)
566                 return -errno;
567
568         if (fd_cloexec(fd_dup, true)) {
569                 close(fd_dup);
570                 return -errno;
571         }
572
573         return fd_dup;
574 }
575
576 __attribute__((unused)) static int print_r(int fd, const char *path)
577 {
578         int ret = 0;
579         int dfd, dfd_dup;
580         DIR *dir;
581         struct dirent *direntp;
582         struct stat st;
583
584         if (!path || *path == '\0') {
585                 char buf[sizeof("/proc/self/fd/") + 30];
586
587                 ret = snprintf(buf, sizeof(buf), "/proc/self/fd/%d", fd);
588                 if (ret < 0 || (size_t)ret >= sizeof(buf))
589                         return -1;
590
591                 /*
592                  * O_PATH file descriptors can't be used so we need to re-open
593                  * just in case.
594                  */
595                 dfd = openat(-EBADF, buf, O_CLOEXEC | O_DIRECTORY, 0);
596         } else {
597                 dfd = openat(fd, path, O_CLOEXEC | O_DIRECTORY, 0);
598         }
599         if (dfd < 0)
600                 return -1;
601
602         /*
603          * When fdopendir() below succeeds it assumes ownership of the fd so we
604          * to make sure we always have an fd that fdopendir() can own which is
605          * why we dup() in the case where the caller wants us to operate on the
606          * fd directly.
607          */
608         dfd_dup = dup_cloexec(dfd);
609         if (dfd_dup < 0) {
610                 close(dfd);
611                 return -1;
612         }
613
614         dir = fdopendir(dfd);
615         if (!dir) {
616                 close(dfd);
617                 close(dfd_dup);
618                 return -1;
619         }
620         /* Transfer ownership to fdopendir(). */
621         dfd = -EBADF;
622
623         while ((direntp = readdir(dir))) {
624                 if (!strcmp(direntp->d_name, ".") ||
625                     !strcmp(direntp->d_name, ".."))
626                         continue;
627
628                 ret = fstatat(dfd_dup, direntp->d_name, &st, AT_SYMLINK_NOFOLLOW);
629                 if (ret < 0 && errno != ENOENT)
630                         break;
631
632                 ret = 0;
633                 if (S_ISDIR(st.st_mode))
634                         ret = print_r(dfd_dup, direntp->d_name);
635                 else
636                         fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %d/%s\n",
637                                 (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
638                                 dfd_dup, direntp->d_name);
639                 if (ret < 0 && errno != ENOENT)
640                         break;
641         }
642
643         if (!path || *path == '\0')
644                 ret = fstatat(fd, "", &st,
645                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW |
646                               AT_EMPTY_PATH);
647         else
648                 ret = fstatat(fd, path, &st,
649                               AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW);
650         if (!ret)
651                 fprintf(stderr, "mode(%o):uid(%d):gid(%d) -> %s\n",
652                         (st.st_mode & ~S_IFMT), st.st_uid, st.st_gid,
653                         (path && *path) ? path : "(null)");
654
655         close(dfd_dup);
656         closedir(dir);
657
658         return ret;
659 }
660 #else
661 __attribute__((unused)) static int print_r(int fd, const char *path)
662 {
663         return 0;
664 }
665 #endif
666
667 /* fd_to_fd - transfer data from one fd to another */
668 static int fd_to_fd(int from, int to)
669 {
670         for (;;) {
671                 uint8_t buf[PATH_MAX];
672                 uint8_t *p = buf;
673                 ssize_t bytes_to_write;
674                 ssize_t bytes_read;
675
676                 bytes_read = read_nointr(from, buf, sizeof buf);
677                 if (bytes_read < 0)
678                         return -1;
679                 if (bytes_read == 0)
680                         break;
681
682                 bytes_to_write = (size_t)bytes_read;
683                 do {
684                         ssize_t bytes_written;
685
686                         bytes_written = write_nointr(to, p, bytes_to_write);
687                         if (bytes_written < 0)
688                                 return -1;
689
690                         bytes_to_write -= bytes_written;
691                         p += bytes_written;
692                 } while (bytes_to_write > 0);
693         }
694
695         return 0;
696 }
697
698 static int sys_execveat(int fd, const char *path, char **argv, char **envp,
699                         int flags)
700 {
701 #ifdef __NR_execveat
702         return syscall(__NR_execveat, fd, path, argv, envp, flags);
703 #else
704         errno = ENOSYS;
705         return -1;
706 #endif
707 }
708
709 #ifndef CAP_NET_RAW
710 #define CAP_NET_RAW 13
711 #endif
712
713 #ifndef VFS_CAP_FLAGS_EFFECTIVE
714 #define VFS_CAP_FLAGS_EFFECTIVE 0x000001
715 #endif
716
717 #ifndef VFS_CAP_U32_3
718 #define VFS_CAP_U32_3 2
719 #endif
720
721 #ifndef VFS_CAP_U32
722 #define VFS_CAP_U32 VFS_CAP_U32_3
723 #endif
724
725 #ifndef VFS_CAP_REVISION_1
726 #define VFS_CAP_REVISION_1 0x01000000
727 #endif
728
729 #ifndef VFS_CAP_REVISION_2
730 #define VFS_CAP_REVISION_2 0x02000000
731 #endif
732
733 #ifndef VFS_CAP_REVISION_3
734 #define VFS_CAP_REVISION_3 0x03000000
735 struct vfs_ns_cap_data {
736         __le32 magic_etc;
737         struct {
738                 __le32 permitted;
739                 __le32 inheritable;
740         } data[VFS_CAP_U32];
741         __le32 rootid;
742 };
743 #endif
744
745 #if __BYTE_ORDER == __BIG_ENDIAN
746 #define cpu_to_le16(w16) le16_to_cpu(w16)
747 #define le16_to_cpu(w16) ((u_int16_t)((u_int16_t)(w16) >> 8) | (u_int16_t)((u_int16_t)(w16) << 8))
748 #define cpu_to_le32(w32) le32_to_cpu(w32)
749 #define le32_to_cpu(w32)                                                                       \
750         ((u_int32_t)((u_int32_t)(w32) >> 24) | (u_int32_t)(((u_int32_t)(w32) >> 8) & 0xFF00) | \
751          (u_int32_t)(((u_int32_t)(w32) << 8) & 0xFF0000) | (u_int32_t)((u_int32_t)(w32) << 24))
752 #elif __BYTE_ORDER == __LITTLE_ENDIAN
753 #define cpu_to_le16(w16) ((u_int16_t)(w16))
754 #define le16_to_cpu(w16) ((u_int16_t)(w16))
755 #define cpu_to_le32(w32) ((u_int32_t)(w32))
756 #define le32_to_cpu(w32) ((u_int32_t)(w32))
757 #else
758 #error Expected endianess macro to be set
759 #endif
760
761 /* expected_dummy_vfs_caps_uid - check vfs caps are stored with the provided uid */
762 static bool expected_dummy_vfs_caps_uid(int fd, uid_t expected_uid)
763 {
764 #define __cap_raised_permitted(x, ns_cap_data)                                 \
765         ((ns_cap_data.data[(x) >> 5].permitted) & (1 << ((x)&31)))
766         struct vfs_ns_cap_data ns_xattr = {};
767         ssize_t ret;
768
769         ret = fgetxattr(fd, "security.capability", &ns_xattr, sizeof(ns_xattr));
770         if (ret < 0 || ret == 0)
771                 return false;
772
773         if (ns_xattr.magic_etc & VFS_CAP_REVISION_3) {
774
775                 if (le32_to_cpu(ns_xattr.rootid) != expected_uid) {
776                         errno = EINVAL;
777                         log_stderr("failure: rootid(%d) != expected_rootid(%d)", le32_to_cpu(ns_xattr.rootid), expected_uid);
778                 }
779
780                 return (le32_to_cpu(ns_xattr.rootid) == expected_uid) &&
781                        (__cap_raised_permitted(CAP_NET_RAW, ns_xattr) > 0);
782         } else {
783                 log_stderr("failure: fscaps version");
784         }
785
786         return false;
787 }
788
789 /* set_dummy_vfs_caps - set dummy vfs caps for the provided uid */
790 static int set_dummy_vfs_caps(int fd, int flags, int rootuid)
791 {
792 #define __raise_cap_permitted(x, ns_cap_data)                                  \
793         ns_cap_data.data[(x) >> 5].permitted |= (1 << ((x)&31))
794
795         struct vfs_ns_cap_data ns_xattr;
796
797         memset(&ns_xattr, 0, sizeof(ns_xattr));
798         __raise_cap_permitted(CAP_NET_RAW, ns_xattr);
799         ns_xattr.magic_etc |= VFS_CAP_REVISION_3 | VFS_CAP_FLAGS_EFFECTIVE;
800         ns_xattr.rootid = cpu_to_le32(rootuid);
801
802         return fsetxattr(fd, "security.capability",
803                          &ns_xattr, sizeof(ns_xattr), flags);
804 }
805
806 #define safe_close(fd)      \
807         if (fd >= 0) {           \
808                 int _e_ = errno; \
809                 close(fd);       \
810                 errno = _e_;     \
811                 fd = -EBADF;     \
812         }
813
814 static void test_setup(void)
815 {
816         if (mkdirat(t_mnt_fd, T_DIR1, 0777))
817                 die("failure: mkdirat");
818
819         t_dir1_fd = openat(t_mnt_fd, T_DIR1, O_CLOEXEC | O_DIRECTORY);
820         if (t_dir1_fd < 0)
821                 die("failure: openat");
822
823         if (fchmod(t_dir1_fd, 0777))
824                 die("failure: fchmod");
825 }
826
827 static void test_cleanup(void)
828 {
829         safe_close(t_dir1_fd);
830         if (rm_r(t_mnt_fd, T_DIR1))
831                 die("failure: rm_r");
832 }
833
834 /* Validate that basic file operations on idmapped mounts. */
835 static int fsids_unmapped(void)
836 {
837         int fret = -1;
838         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
839         struct mount_attr attr = {
840                 .attr_set = MOUNT_ATTR_IDMAP,
841         };
842
843         /* create hardlink target */
844         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
845         if (hardlink_target_fd < 0) {
846                 log_stderr("failure: openat");
847                 goto out;
848         }
849
850         /* create directory for rename test */
851         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
852                 log_stderr("failure: mkdirat");
853                 goto out;
854         }
855
856         /* change ownership of all files to uid 0 */
857         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
858                 log_stderr("failure: chown_r");
859                 goto out;
860         }
861
862         /* Changing mount properties on a detached mount. */
863         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
864         if (attr.userns_fd < 0) {
865                 log_stderr("failure: get_userns_fd");
866                 goto out;
867         }
868
869         open_tree_fd = sys_open_tree(t_dir1_fd, "",
870                                      AT_EMPTY_PATH |
871                                      AT_NO_AUTOMOUNT |
872                                      AT_SYMLINK_NOFOLLOW |
873                                      OPEN_TREE_CLOEXEC |
874                                      OPEN_TREE_CLONE);
875         if (open_tree_fd < 0) {
876                 log_stderr("failure: sys_open_tree");
877                 goto out;
878         }
879
880         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
881                 log_stderr("failure: sys_mount_setattr");
882                 goto out;
883         }
884
885         if (!switch_fsids(0, 0)) {
886                 log_stderr("failure: switch_fsids");
887                 goto out;
888         }
889
890         /* The caller's fsids don't have a mappings in the idmapped mount so any
891          * file creation must fail.
892          */
893
894         /* create hardlink */
895         if (!linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
896                 log_stderr("failure: linkat");
897                 goto out;
898         }
899         if (errno != EOVERFLOW) {
900                 log_stderr("failure: errno");
901                 goto out;
902         }
903
904         /* try to rename a file */
905         if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
906                 log_stderr("failure: renameat");
907                 goto out;
908         }
909         if (errno != EOVERFLOW) {
910                 log_stderr("failure: errno");
911                 goto out;
912         }
913
914         /* try to rename a directory */
915         if (!renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME)) {
916                 log_stderr("failure: renameat");
917                 goto out;
918         }
919         if (errno != EOVERFLOW) {
920                 log_stderr("failure: errno");
921                 goto out;
922         }
923
924         /* The caller is privileged over the inode so file deletion must work. */
925
926         /* remove file */
927         if (unlinkat(open_tree_fd, FILE1, 0)) {
928                 log_stderr("failure: unlinkat");
929                 goto out;
930         }
931
932         /* remove directory */
933         if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) {
934                 log_stderr("failure: unlinkat");
935                 goto out;
936         }
937
938         /* The caller's fsids don't have a mappings in the idmapped mount so
939          * any file creation must fail.
940          */
941
942         /* create regular file via open() */
943         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
944         if (file1_fd >= 0) {
945                 log_stderr("failure: create");
946                 goto out;
947         }
948         if (errno != EOVERFLOW) {
949                 log_stderr("failure: errno");
950                 goto out;
951         }
952
953         /* create regular file via mknod */
954         if (!mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0)) {
955                 log_stderr("failure: mknodat");
956                 goto out;
957         }
958         if (errno != EOVERFLOW) {
959                 log_stderr("failure: errno");
960                 goto out;
961         }
962
963         /* create character device */
964         if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
965                 log_stderr("failure: mknodat");
966                 goto out;
967         }
968         if (errno != EOVERFLOW) {
969                 log_stderr("failure: errno");
970                 goto out;
971         }
972
973         /* create symlink */
974         if (!symlinkat(FILE2, open_tree_fd, SYMLINK1)) {
975                 log_stderr("failure: symlinkat");
976                 goto out;
977         }
978         if (errno != EOVERFLOW) {
979                 log_stderr("failure: errno");
980                 goto out;
981         }
982
983         /* create directory */
984         if (!mkdirat(open_tree_fd, DIR1, 0700)) {
985                 log_stderr("failure: mkdirat");
986                 goto out;
987         }
988         if (errno != EOVERFLOW) {
989                 log_stderr("failure: errno");
990                 goto out;
991         }
992
993         fret = 0;
994         log_debug("Ran test");
995 out:
996         safe_close(attr.userns_fd);
997         safe_close(hardlink_target_fd);
998         safe_close(file1_fd);
999         safe_close(open_tree_fd);
1000
1001         return fret;
1002 }
1003
1004 static int fsids_mapped(void)
1005 {
1006         int fret = -1;
1007         int file1_fd = -EBADF, hardlink_target_fd = -EBADF, open_tree_fd = -EBADF;
1008         struct mount_attr attr = {
1009                 .attr_set = MOUNT_ATTR_IDMAP,
1010         };
1011         pid_t pid;
1012
1013         if (!caps_supported())
1014                 return 0;
1015
1016         /* create hardlink target */
1017         hardlink_target_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1018         if (hardlink_target_fd < 0) {
1019                 log_stderr("failure: openat");
1020                 goto out;
1021         }
1022
1023         /* create directory for rename test */
1024         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
1025                 log_stderr("failure: mkdirat");
1026                 goto out;
1027         }
1028
1029         /* change ownership of all files to uid 0 */
1030         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1031                 log_stderr("failure: chown_r");
1032                 goto out;
1033         }
1034
1035         /* Changing mount properties on a detached mount. */
1036         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1037         if (attr.userns_fd < 0) {
1038                 log_stderr("failure: get_userns_fd");
1039                 goto out;
1040         }
1041
1042         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1043                                      AT_EMPTY_PATH |
1044                                      AT_NO_AUTOMOUNT |
1045                                      AT_SYMLINK_NOFOLLOW |
1046                                      OPEN_TREE_CLOEXEC |
1047                                      OPEN_TREE_CLONE);
1048         if (open_tree_fd < 0) {
1049                 log_stderr("failure: sys_open_tree");
1050                 goto out;
1051         }
1052
1053         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1054                 log_stderr("failure: sys_mount_setattr");
1055                 goto out;
1056         }
1057
1058         pid = fork();
1059         if (pid < 0) {
1060                 log_stderr("failure: fork");
1061                 goto out;
1062         }
1063         if (pid == 0) {
1064                 if (!switch_fsids(10000, 10000))
1065                         die("failure: switch fsids");
1066
1067                 if (!caps_up())
1068                         die("failure: raise caps");
1069
1070                 /* The caller's fsids now have mappings in the idmapped mount so
1071                  * any file creation must fail.
1072                  */
1073
1074                 /* create hardlink */
1075                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1076                         die("failure: create hardlink");
1077
1078                 /* try to rename a file */
1079                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1080                         die("failure: rename");
1081
1082                 /* try to rename a directory */
1083                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1084                         die("failure: rename");
1085
1086                 /* remove file */
1087                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1088                         die("failure: delete");
1089
1090                 /* remove directory */
1091                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1092                         die("failure: delete");
1093
1094                 /* The caller's fsids have mappings in the idmapped mount so any
1095                  * file creation must fail.
1096                  */
1097
1098                 /* create regular file via open() */
1099                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1100                 if (file1_fd < 0)
1101                         die("failure: create");
1102
1103                 /* create regular file via mknod */
1104                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1105                         die("failure: create");
1106
1107                 /* create character device */
1108                 if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
1109                         die("failure: create");
1110
1111                 /* create symlink */
1112                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1113                         die("failure: create");
1114
1115                 /* create directory */
1116                 if (mkdirat(open_tree_fd, DIR1, 0700))
1117                         die("failure: create");
1118
1119                 exit(EXIT_SUCCESS);
1120         }
1121         if (wait_for_pid(pid))
1122                 goto out;
1123
1124         fret = 0;
1125         log_debug("Ran test");
1126 out:
1127         safe_close(attr.userns_fd);
1128         safe_close(file1_fd);
1129         safe_close(hardlink_target_fd);
1130         safe_close(open_tree_fd);
1131
1132         return fret;
1133 }
1134
1135 /* Validate that basic file operations on idmapped mounts from a user namespace. */
1136 static int create_in_userns(void)
1137 {
1138         int fret = -1;
1139         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1140         struct mount_attr attr = {
1141                 .attr_set = MOUNT_ATTR_IDMAP,
1142         };
1143         pid_t pid;
1144
1145         /* change ownership of all files to uid 0 */
1146         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1147                 log_stderr("failure: chown_r");
1148                 goto out;
1149         }
1150
1151         /* Changing mount properties on a detached mount. */
1152         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1153         if (attr.userns_fd < 0) {
1154                 log_stderr("failure: get_userns_fd");
1155                 goto out;
1156         }
1157
1158         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1159                                      AT_EMPTY_PATH |
1160                                      AT_NO_AUTOMOUNT |
1161                                      AT_SYMLINK_NOFOLLOW |
1162                                      OPEN_TREE_CLOEXEC |
1163                                      OPEN_TREE_CLONE);
1164         if (open_tree_fd < 0) {
1165                 log_stderr("failure: sys_open_tree");
1166                 goto out;
1167         }
1168
1169         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1170                 log_stderr("failure: sys_mount_setattr");
1171                 goto out;
1172         }
1173
1174         pid = fork();
1175         if (pid < 0) {
1176                 log_stderr("failure: fork");
1177                 goto out;
1178         }
1179         if (pid == 0) {
1180                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1181                         die("failure: switch_userns");
1182
1183                 /* create regular file via open() */
1184                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1185                 if (file1_fd < 0)
1186                         die("failure: open file");
1187                 safe_close(file1_fd);
1188
1189                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1190                         die("failure: check ownership");
1191
1192                 /* create regular file via mknod */
1193                 if (mknodat(open_tree_fd, FILE2, S_IFREG | 0000, 0))
1194                         die("failure: create");
1195
1196                 if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0))
1197                         die("failure: check ownership");
1198
1199                 /* create symlink */
1200                 if (symlinkat(FILE2, open_tree_fd, SYMLINK1))
1201                         die("failure: create");
1202
1203                 if (!expected_uid_gid(open_tree_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0))
1204                         die("failure: check ownership");
1205
1206                 /* create directory */
1207                 if (mkdirat(open_tree_fd, DIR1, 0700))
1208                         die("failure: create");
1209
1210                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
1211                         die("failure: check ownership");
1212
1213                 /* try to rename a file */
1214                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1215                         die("failure: create");
1216
1217                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1218                         die("failure: check ownership");
1219
1220                 /* try to rename a file */
1221                 if (renameat(open_tree_fd, DIR1, open_tree_fd, DIR1_RENAME))
1222                         die("failure: create");
1223
1224                 if (!expected_uid_gid(open_tree_fd, DIR1_RENAME, 0, 0, 0))
1225                         die("failure: check ownership");
1226
1227                 /* remove file */
1228                 if (unlinkat(open_tree_fd, FILE1_RENAME, 0))
1229                         die("failure: remove");
1230
1231                 /* remove directory */
1232                 if (unlinkat(open_tree_fd, DIR1_RENAME, AT_REMOVEDIR))
1233                         die("failure: remove");
1234
1235                 exit(EXIT_SUCCESS);
1236         }
1237
1238         if (wait_for_pid(pid))
1239                 goto out;
1240
1241         fret = 0;
1242         log_debug("Ran test");
1243 out:
1244         safe_close(attr.userns_fd);
1245         safe_close(file1_fd);
1246         safe_close(open_tree_fd);
1247
1248         return fret;
1249 }
1250
1251 static int hardlink_crossing_mounts(void)
1252 {
1253         int fret = -1;
1254         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1255
1256         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1257                 log_stderr("failure: chown_r");
1258                 goto out;
1259         }
1260
1261         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1262                                      AT_EMPTY_PATH |
1263                                      AT_NO_AUTOMOUNT |
1264                                      AT_SYMLINK_NOFOLLOW |
1265                                      OPEN_TREE_CLOEXEC |
1266                                      OPEN_TREE_CLONE);
1267         if (open_tree_fd < 0) {
1268                 log_stderr("failure: sys_open_tree");
1269                 goto out;
1270         }
1271
1272         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1273         if (file1_fd < 0) {
1274                 log_stderr("failure: openat");
1275                 goto out;
1276         }
1277
1278         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1279                 log_stderr("failure: mkdirat");
1280                 goto out;
1281         }
1282
1283         /* We're crossing a mountpoint so this must fail.
1284          *
1285          * Note that this must also fail for non-idmapped mounts but here we're
1286          * interested in making sure we're not introducing an accidental way to
1287          * violate that restriction or that suddenly this becomes possible.
1288          */
1289         if (!linkat(open_tree_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
1290                 log_stderr("failure: linkat");
1291                 goto out;
1292         }
1293         if (errno != EXDEV) {
1294                 log_stderr("failure: errno");
1295                 goto out;
1296         }
1297
1298         fret = 0;
1299         log_debug("Ran test");
1300 out:
1301         safe_close(file1_fd);
1302         safe_close(open_tree_fd);
1303
1304         return fret;
1305 }
1306
1307 static int hardlink_crossing_idmapped_mounts(void)
1308 {
1309         int fret = -1;
1310         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1311         struct mount_attr attr = {
1312                 .attr_set = MOUNT_ATTR_IDMAP,
1313         };
1314
1315         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1316                 log_stderr("failure: chown_r");
1317                 goto out;
1318         }
1319
1320         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1321         if (attr.userns_fd < 0) {
1322                 log_stderr("failure: get_userns_fd");
1323                 goto out;
1324         }
1325
1326         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1327                                      AT_EMPTY_PATH |
1328                                      AT_NO_AUTOMOUNT |
1329                                      AT_SYMLINK_NOFOLLOW |
1330                                      OPEN_TREE_CLOEXEC |
1331                                      OPEN_TREE_CLONE);
1332         if (open_tree_fd1 < 0) {
1333                 log_stderr("failure: sys_open_tree");
1334                 goto out;
1335         }
1336
1337         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1338                 log_stderr("failure: sys_mount_setattr");
1339                 goto out;
1340         }
1341
1342         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1343         if (file1_fd < 0) {
1344                 log_stderr("failure: openat");
1345                 goto out;
1346         }
1347
1348         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1349                 log_stderr("failure: expected_uid_gid");
1350                 goto out;
1351         }
1352
1353         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1354                 log_stderr("failure: expected_uid_gid");
1355                 goto out;
1356         }
1357
1358         safe_close(file1_fd);
1359
1360         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1361                 log_stderr("failure: mkdirat");
1362                 goto out;
1363         }
1364
1365         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1366                                       AT_NO_AUTOMOUNT |
1367                                       AT_SYMLINK_NOFOLLOW |
1368                                       OPEN_TREE_CLOEXEC |
1369                                       OPEN_TREE_CLONE |
1370                                       AT_RECURSIVE);
1371         if (open_tree_fd2 < 0) {
1372                 log_stderr("failure: sys_open_tree");
1373                 goto out;
1374         }
1375
1376         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1377                 log_stderr("failure: sys_mount_setattr");
1378                 goto out;
1379         }
1380
1381         /* We're crossing a mountpoint so this must fail.
1382          *
1383          * Note that this must also fail for non-idmapped mounts but here we're
1384          * interested in making sure we're not introducing an accidental way to
1385          * violate that restriction or that suddenly this becomes possible.
1386          */
1387         if (!linkat(open_tree_fd1, FILE1, open_tree_fd2, HARDLINK1, 0)) {
1388                 log_stderr("failure: linkat");
1389                 goto out;
1390         }
1391         if (errno != EXDEV) {
1392                 log_stderr("failure: errno");
1393                 goto out;
1394         }
1395
1396         fret = 0;
1397         log_debug("Ran test");
1398 out:
1399         safe_close(attr.userns_fd);
1400         safe_close(file1_fd);
1401         safe_close(open_tree_fd1);
1402         safe_close(open_tree_fd2);
1403
1404         return fret;
1405 }
1406
1407 static int hardlink_from_idmapped_mount(void)
1408 {
1409         int fret = -1;
1410         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1411         struct mount_attr attr = {
1412                 .attr_set = MOUNT_ATTR_IDMAP,
1413         };
1414
1415         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1416                 log_stderr("failure: chown_r");
1417                 goto out;
1418         }
1419
1420         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1421         if (attr.userns_fd < 0) {
1422                 log_stderr("failure: get_userns_fd");
1423                 goto out;
1424         }
1425
1426         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1427                                      AT_EMPTY_PATH |
1428                                      AT_NO_AUTOMOUNT |
1429                                      AT_SYMLINK_NOFOLLOW |
1430                                      OPEN_TREE_CLOEXEC |
1431                                      OPEN_TREE_CLONE);
1432         if (open_tree_fd < 0) {
1433                 log_stderr("failure: sys_open_tree");
1434                 goto out;
1435         }
1436
1437         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1438                 log_stderr("failure: sys_mount_setattr");
1439                 goto out;
1440         }
1441
1442         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1443         if (file1_fd < 0) {
1444                 log_stderr("failure: openat");
1445                 goto out;
1446         }
1447         safe_close(file1_fd);
1448
1449         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1450                 log_stderr("failure: expected_uid_gid");
1451                 goto out;
1452         }
1453
1454         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1455                 log_stderr("failure: expected_uid_gid");
1456                 goto out;
1457         }
1458
1459         /* We're not crossing a mountpoint so this must succeed. */
1460         if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0)) {
1461                 log_stderr("failure: linkat");
1462                 goto out;
1463         }
1464
1465
1466         fret = 0;
1467         log_debug("Ran test");
1468 out:
1469         safe_close(attr.userns_fd);
1470         safe_close(file1_fd);
1471         safe_close(open_tree_fd);
1472
1473         return fret;
1474 }
1475
1476 static int hardlink_from_idmapped_mount_in_userns(void)
1477 {
1478         int fret = -1;
1479         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1480         struct mount_attr attr = {
1481                 .attr_set = MOUNT_ATTR_IDMAP,
1482         };
1483         pid_t pid;
1484
1485         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1486                 log_stderr("failure: chown_r");
1487                 goto out;
1488         }
1489
1490         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1491         if (attr.userns_fd < 0) {
1492                 log_stderr("failure: get_userns_fd");
1493                 goto out;
1494         }
1495
1496         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1497                                      AT_EMPTY_PATH |
1498                                      AT_NO_AUTOMOUNT |
1499                                      AT_SYMLINK_NOFOLLOW |
1500                                      OPEN_TREE_CLOEXEC |
1501                                      OPEN_TREE_CLONE);
1502         if (open_tree_fd < 0) {
1503                 log_stderr("failure: sys_open_tree");
1504                 goto out;
1505         }
1506
1507         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1508                 log_stderr("failure: sys_mount_setattr");
1509                 goto out;
1510         }
1511
1512         pid = fork();
1513         if (pid < 0) {
1514                 log_stderr("failure: fork");
1515                 goto out;
1516         }
1517         if (pid == 0) {
1518                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1519                         die("failure: switch_userns");
1520
1521                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1522                 if (file1_fd < 0)
1523                         die("failure: create");
1524
1525                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1526                         die("failure: check ownership");
1527
1528                 /* We're not crossing a mountpoint so this must succeed. */
1529                 if (linkat(open_tree_fd, FILE1, open_tree_fd, HARDLINK1, 0))
1530                         die("failure: create");
1531
1532                 if (!expected_uid_gid(open_tree_fd, HARDLINK1, 0, 0, 0))
1533                         die("failure: check ownership");
1534
1535                 exit(EXIT_SUCCESS);
1536         }
1537
1538         if (wait_for_pid(pid))
1539                 goto out;
1540
1541         fret = 0;
1542         log_debug("Ran test");
1543 out:
1544         safe_close(attr.userns_fd);
1545         safe_close(file1_fd);
1546         safe_close(open_tree_fd);
1547
1548         return fret;
1549 }
1550
1551 static int rename_crossing_mounts(void)
1552 {
1553         int fret = -1;
1554         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1555
1556         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1557                 log_stderr("failure: chown_r");
1558                 goto out;
1559         }
1560
1561         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1562                                      AT_EMPTY_PATH |
1563                                      AT_NO_AUTOMOUNT |
1564                                      AT_SYMLINK_NOFOLLOW |
1565                                      OPEN_TREE_CLOEXEC |
1566                                      OPEN_TREE_CLONE);
1567         if (open_tree_fd < 0) {
1568                 log_stderr("failure: sys_open_tree");
1569                 goto out;
1570         }
1571
1572         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1573         if (file1_fd < 0) {
1574                 log_stderr("failure: openat");
1575                 goto out;
1576         }
1577
1578         if (mkdirat(open_tree_fd, DIR1, 0777)) {
1579                 log_stderr("failure: mkdirat");
1580                 goto out;
1581         }
1582
1583         /* We're crossing a mountpoint so this must fail.
1584          *
1585          * Note that this must also fail for non-idmapped mounts but here we're
1586          * interested in making sure we're not introducing an accidental way to
1587          * violate that restriction or that suddenly this becomes possible.
1588          */
1589         if (!renameat(open_tree_fd, FILE1, t_dir1_fd, FILE1_RENAME)) {
1590                 log_stderr("failure: renameat");
1591                 goto out;
1592         }
1593         if (errno != EXDEV) {
1594                 log_stderr("failure: errno");
1595                 goto out;
1596         }
1597
1598         fret = 0;
1599         log_debug("Ran test");
1600 out:
1601         safe_close(file1_fd);
1602         safe_close(open_tree_fd);
1603
1604         return fret;
1605 }
1606
1607 static int rename_crossing_idmapped_mounts(void)
1608 {
1609         int fret = -1;
1610         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
1611         struct mount_attr attr = {
1612                 .attr_set = MOUNT_ATTR_IDMAP,
1613         };
1614
1615         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1616                 log_stderr("failure: chown_r");
1617                 goto out;
1618         }
1619
1620         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1621         if (attr.userns_fd < 0) {
1622                 log_stderr("failure: get_userns_fd");
1623                 goto out;
1624         }
1625
1626         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
1627                                      AT_EMPTY_PATH |
1628                                      AT_NO_AUTOMOUNT |
1629                                      AT_SYMLINK_NOFOLLOW |
1630                                      OPEN_TREE_CLOEXEC |
1631                                      OPEN_TREE_CLONE);
1632         if (open_tree_fd1 < 0) {
1633                 log_stderr("failure: sys_open_tree");
1634                 goto out;
1635         }
1636
1637         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1638                 log_stderr("failure: sys_mount_setattr");
1639                 goto out;
1640         }
1641
1642         file1_fd = openat(open_tree_fd1, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1643         if (file1_fd < 0) {
1644                 log_stderr("failure: openat");
1645                 goto out;
1646         }
1647
1648         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 0, 0)) {
1649                 log_stderr("failure: expected_uid_gid");
1650                 goto out;
1651         }
1652
1653         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1654                 log_stderr("failure: expected_uid_gid");
1655                 goto out;
1656         }
1657
1658         if (mkdirat(open_tree_fd1, DIR1, 0777)) {
1659                 log_stderr("failure: mkdirat");
1660                 goto out;
1661         }
1662
1663         open_tree_fd2 = sys_open_tree(t_dir1_fd, DIR1,
1664                                       AT_NO_AUTOMOUNT |
1665                                       AT_SYMLINK_NOFOLLOW |
1666                                       OPEN_TREE_CLOEXEC |
1667                                       OPEN_TREE_CLONE |
1668                                       AT_RECURSIVE);
1669         if (open_tree_fd2 < 0) {
1670                 log_stderr("failure: sys_open_tree");
1671                 goto out;
1672         }
1673
1674         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1675                 log_stderr("failure: sys_mount_setattr");
1676                 goto out;
1677         }
1678
1679         /* We're crossing a mountpoint so this must fail.
1680          *
1681          * Note that this must also fail for non-idmapped mounts but here we're
1682          * interested in making sure we're not introducing an accidental way to
1683          * violate that restriction or that suddenly this becomes possible.
1684          */
1685         if (!renameat(open_tree_fd1, FILE1, open_tree_fd2, FILE1_RENAME)) {
1686                 log_stderr("failure: renameat");
1687                 goto out;
1688         }
1689         if (errno != EXDEV) {
1690                 log_stderr("failure: errno");
1691                 goto out;
1692         }
1693
1694         fret = 0;
1695         log_debug("Ran test");
1696 out:
1697         safe_close(attr.userns_fd);
1698         safe_close(file1_fd);
1699         safe_close(open_tree_fd1);
1700         safe_close(open_tree_fd2);
1701
1702         return fret;
1703 }
1704
1705 static int rename_from_idmapped_mount(void)
1706 {
1707         int fret = -1;
1708         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1709         struct mount_attr attr = {
1710                 .attr_set = MOUNT_ATTR_IDMAP,
1711         };
1712
1713         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1714                 log_stderr("failure: chown_r");
1715                 goto out;
1716         }
1717
1718         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
1719         if (attr.userns_fd < 0) {
1720                 log_stderr("failure: get_userns_fd");
1721                 goto out;
1722         }
1723
1724         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1725                                      AT_EMPTY_PATH |
1726                                      AT_NO_AUTOMOUNT |
1727                                      AT_SYMLINK_NOFOLLOW |
1728                                      OPEN_TREE_CLOEXEC |
1729                                      OPEN_TREE_CLONE);
1730         if (open_tree_fd < 0) {
1731                 log_stderr("failure: sys_open_tree");
1732                 goto out;
1733         }
1734
1735         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1736                 log_stderr("failure: sys_mount_setattr");
1737                 goto out;
1738         }
1739
1740         file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1741         if (file1_fd < 0) {
1742                 log_stderr("failure: openat");
1743                 goto out;
1744         }
1745
1746         if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) {
1747                 log_stderr("failure: expected_uid_gid");
1748                 goto out;
1749         }
1750
1751         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000)) {
1752                 log_stderr("failure: expected_uid_gid");
1753                 goto out;
1754         }
1755
1756         /* We're not crossing a mountpoint so this must succeed. */
1757         if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME)) {
1758                 log_stderr("failure: renameat");
1759                 goto out;
1760         }
1761
1762         fret = 0;
1763         log_debug("Ran test");
1764 out:
1765         safe_close(attr.userns_fd);
1766         safe_close(file1_fd);
1767         safe_close(open_tree_fd);
1768
1769         return fret;
1770 }
1771
1772 static int rename_from_idmapped_mount_in_userns(void)
1773 {
1774         int fret = -1;
1775         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1776         pid_t pid;
1777         struct mount_attr attr = {
1778                 .attr_set = MOUNT_ATTR_IDMAP,
1779         };
1780
1781         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1782                 log_stderr("failure: chown_r");
1783                 goto out;
1784         }
1785
1786         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1787         if (attr.userns_fd < 0) {
1788                 log_stderr("failure: get_userns_fd");
1789                 goto out;
1790         }
1791
1792         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1793                                      AT_EMPTY_PATH |
1794                                      AT_NO_AUTOMOUNT |
1795                                      AT_SYMLINK_NOFOLLOW |
1796                                      OPEN_TREE_CLOEXEC |
1797                                      OPEN_TREE_CLONE);
1798         if (open_tree_fd < 0) {
1799                 log_stderr("failure: sys_open_tree");
1800                 goto out;
1801         }
1802
1803         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1804                 log_stderr("failure: sys_mount_setattr");
1805                 goto out;
1806         }
1807
1808         pid = fork();
1809         if (pid < 0) {
1810                 log_stderr("failure: fork");
1811                 goto out;
1812         }
1813         if (pid == 0) {
1814                 if (!switch_userns(attr.userns_fd, 0, 0, false))
1815                         die("failure: switch_userns");
1816
1817                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1818                 if (file1_fd < 0)
1819                         die("failure: create");
1820
1821                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
1822                         die("failure: check ownership");
1823
1824                 /* We're not crossing a mountpoint so this must succeed. */
1825                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
1826                         die("failure: create");
1827
1828                 if (!expected_uid_gid(open_tree_fd, FILE1_RENAME, 0, 0, 0))
1829                         die("failure: check ownership");
1830
1831                 exit(EXIT_SUCCESS);
1832         }
1833
1834         if (wait_for_pid(pid))
1835                 goto out;
1836
1837         fret = 0;
1838         log_debug("Ran test");
1839 out:
1840         safe_close(attr.userns_fd);
1841         safe_close(file1_fd);
1842         safe_close(open_tree_fd);
1843
1844         return fret;
1845 }
1846
1847 static int symlink_regular_mounts(void)
1848 {
1849         int fret = -1;
1850         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1851         struct stat st;
1852
1853         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1854         if (file1_fd < 0) {
1855                 log_stderr("failure: openat");
1856                 goto out;
1857         }
1858
1859         if (chown_r(t_mnt_fd, T_DIR1, 10000, 10000)) {
1860                 log_stderr("failure: chown_r");
1861                 goto out;
1862         }
1863
1864         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1865                                      AT_EMPTY_PATH |
1866                                      AT_NO_AUTOMOUNT |
1867                                      AT_SYMLINK_NOFOLLOW |
1868                                      OPEN_TREE_CLOEXEC |
1869                                      OPEN_TREE_CLONE);
1870         if (open_tree_fd < 0) {
1871                 log_stderr("failure: sys_open_tree");
1872                 goto out;
1873         }
1874
1875         if (symlinkat(FILE1, open_tree_fd, FILE2)) {
1876                 log_stderr("failure: symlinkat");
1877                 goto out;
1878         }
1879
1880         if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW)) {
1881                 log_stderr("failure: fchownat");
1882                 goto out;
1883         }
1884
1885         if (fstatat(open_tree_fd, FILE2, &st, AT_SYMLINK_NOFOLLOW)) {
1886                 log_stderr("failure: fstatat");
1887                 goto out;
1888         }
1889
1890         if (st.st_uid != 15000 || st.st_gid != 15000) {
1891                 log_stderr("failure: compare ids");
1892                 goto out;
1893         }
1894
1895         if (fstatat(open_tree_fd, FILE1, &st, 0)) {
1896                 log_stderr("failure: fstatat");
1897                 goto out;
1898         }
1899
1900         if (st.st_uid != 10000 || st.st_gid != 10000) {
1901                 log_stderr("failure: compare ids");
1902                 goto out;
1903         }
1904
1905         fret = 0;
1906         log_debug("Ran test");
1907 out:
1908         safe_close(file1_fd);
1909         safe_close(open_tree_fd);
1910
1911         return fret;
1912 }
1913
1914 static int symlink_idmapped_mounts(void)
1915 {
1916         int fret = -1;
1917         int file1_fd = -EBADF, open_tree_fd = -EBADF;
1918         struct mount_attr attr = {
1919                 .attr_set = MOUNT_ATTR_IDMAP,
1920         };
1921         pid_t pid;
1922
1923         if (!caps_supported())
1924                 return 0;
1925
1926         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
1927         if (file1_fd < 0) {
1928                 log_stderr("failure: openat");
1929                 goto out;
1930         }
1931
1932         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
1933                 log_stderr("failure: chown_r");
1934                 goto out;
1935         }
1936
1937         /* Changing mount properties on a detached mount. */
1938         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
1939         if (attr.userns_fd < 0) {
1940                 log_stderr("failure: get_userns_fd");
1941                 goto out;
1942         }
1943
1944         open_tree_fd = sys_open_tree(t_dir1_fd, "",
1945                                      AT_EMPTY_PATH |
1946                                      AT_NO_AUTOMOUNT |
1947                                      AT_SYMLINK_NOFOLLOW |
1948                                      OPEN_TREE_CLOEXEC |
1949                                      OPEN_TREE_CLONE);
1950         if (open_tree_fd < 0) {
1951                 log_stderr("failure: sys_open_tree");
1952                 goto out;
1953         }
1954
1955         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
1956                 log_stderr("failure: sys_mount_setattr");
1957                 goto out;
1958         }
1959
1960         pid = fork();
1961         if (pid < 0) {
1962                 log_stderr("failure: fork");
1963                 goto out;
1964         }
1965         if (pid == 0) {
1966                 if (!switch_fsids(10000, 10000))
1967                         die("failure: switch fsids");
1968
1969                 if (!caps_up())
1970                         die("failure: raise caps");
1971
1972                 if (symlinkat(FILE1, open_tree_fd, FILE2))
1973                         die("failure: create");
1974
1975                 if (fchownat(open_tree_fd, FILE2, 15000, 15000, AT_SYMLINK_NOFOLLOW))
1976                         die("failure: change ownership");
1977
1978                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 15000, 15000))
1979                         die("failure: check ownership");
1980
1981                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
1982                         die("failure: check ownership");
1983
1984                 exit(EXIT_SUCCESS);
1985         }
1986         if (wait_for_pid(pid))
1987                 goto out;
1988
1989         fret = 0;
1990         log_debug("Ran test");
1991 out:
1992         safe_close(attr.userns_fd);
1993         safe_close(file1_fd);
1994         safe_close(open_tree_fd);
1995
1996         return fret;
1997 }
1998
1999 static int symlink_idmapped_mounts_in_userns(void)
2000 {
2001         int fret = -1;
2002         int file1_fd = -EBADF, open_tree_fd = -EBADF;
2003         struct mount_attr attr = {
2004                 .attr_set = MOUNT_ATTR_IDMAP,
2005         };
2006         pid_t pid;
2007
2008         if (chown_r(t_mnt_fd, T_DIR1, 0, 0)) {
2009                 log_stderr("failure: chown_r");
2010                 goto out;
2011         }
2012
2013         /* Changing mount properties on a detached mount. */
2014         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2015         if (attr.userns_fd < 0) {
2016                 log_stderr("failure: get_userns_fd");
2017                 goto out;
2018         }
2019
2020         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2021                                      AT_EMPTY_PATH |
2022                                      AT_NO_AUTOMOUNT |
2023                                      AT_SYMLINK_NOFOLLOW |
2024                                      OPEN_TREE_CLOEXEC |
2025                                      OPEN_TREE_CLONE);
2026         if (open_tree_fd < 0) {
2027                 log_stderr("failure: sys_open_tree");
2028                 goto out;
2029         }
2030
2031         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2032                 log_stderr("failure: sys_mount_setattr");
2033                 goto out;
2034         }
2035
2036         pid = fork();
2037         if (pid < 0) {
2038                 log_stderr("failure: fork");
2039                 goto out;
2040         }
2041         if (pid == 0) {
2042                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2043                         die("failure: switch_userns");
2044
2045                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2046                 if (file1_fd < 0)
2047                         die("failure: create");
2048                 safe_close(file1_fd);
2049
2050                 if (symlinkat(FILE1, open_tree_fd, FILE2))
2051                         die("failure: create");
2052
2053                 if (fchownat(open_tree_fd, FILE2, 5000, 5000, AT_SYMLINK_NOFOLLOW))
2054                         die("failure: change ownership");
2055
2056                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000))
2057                         die("failure: check ownership");
2058
2059                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
2060                         die("failure: check ownership");
2061
2062                 exit(EXIT_SUCCESS);
2063         }
2064
2065         if (wait_for_pid(pid))
2066                 goto out;
2067
2068         if (!expected_uid_gid(t_dir1_fd, FILE2, AT_SYMLINK_NOFOLLOW, 5000, 5000)) {
2069                 log_stderr("failure: expected_uid_gid");
2070                 goto out;
2071         }
2072
2073         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2074                 log_stderr("failure: expected_uid_gid");
2075                 goto out;
2076         }
2077
2078         fret = 0;
2079         log_debug("Ran test");
2080 out:
2081         safe_close(attr.userns_fd);
2082         safe_close(file1_fd);
2083         safe_close(open_tree_fd);
2084
2085         return fret;
2086 }
2087
2088 /* Validate that a caller whose fsids map into the idmapped mount within it's
2089  * user namespace cannot create any device nodes.
2090  */
2091 static int device_node_in_userns(void)
2092 {
2093         int fret = -1;
2094         int open_tree_fd = -EBADF;
2095         struct mount_attr attr = {
2096                 .attr_set = MOUNT_ATTR_IDMAP,
2097         };
2098         pid_t pid;
2099
2100         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
2101         if (attr.userns_fd < 0) {
2102                 log_stderr("failure: get_userns_fd");
2103                 goto out;
2104         }
2105
2106         open_tree_fd = sys_open_tree(t_dir1_fd, "",
2107                                      AT_EMPTY_PATH |
2108                                      AT_NO_AUTOMOUNT |
2109                                      AT_SYMLINK_NOFOLLOW |
2110                                      OPEN_TREE_CLOEXEC |
2111                                      OPEN_TREE_CLONE);
2112         if (open_tree_fd < 0) {
2113                 log_stderr("failure: sys_open_tree");
2114                 goto out;
2115         }
2116
2117         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
2118                 log_stderr("failure: sys_mount_setattr");
2119                 goto out;
2120         }
2121
2122         pid = fork();
2123         if (pid < 0) {
2124                 log_stderr("failure: fork");
2125                 goto out;
2126         }
2127         if (pid == 0) {
2128                 if (!switch_userns(attr.userns_fd, 0, 0, false))
2129                         die("failure: switch_userns");
2130
2131                 /* create character device */
2132                 if (!mknodat(open_tree_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1)))
2133                         die("failure: create");
2134
2135                 exit(EXIT_SUCCESS);
2136         }
2137
2138         if (wait_for_pid(pid))
2139                 goto out;
2140
2141         fret = 0;
2142         log_debug("Ran test");
2143 out:
2144         safe_close(attr.userns_fd);
2145         safe_close(open_tree_fd);
2146
2147         return fret;
2148 }
2149
2150
2151 /* Validate that changing file ownership works correctly on idmapped mounts. */
2152 static int expected_uid_gid_idmapped_mounts(void)
2153 {
2154         int fret = -1;
2155         int file1_fd = -EBADF, open_tree_fd1 = -EBADF, open_tree_fd2 = -EBADF;
2156         struct mount_attr attr1 = {
2157                 .attr_set = MOUNT_ATTR_IDMAP,
2158         };
2159         struct mount_attr attr2 = {
2160                 .attr_set = MOUNT_ATTR_IDMAP,
2161         };
2162         pid_t pid;
2163
2164         if (!switch_fsids(0, 0)) {
2165                 log_stderr("failure: switch_fsids");
2166                 goto out;
2167         }
2168
2169         /* create regular file via open() */
2170         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2171         if (file1_fd < 0) {
2172                 log_stderr("failure: openat");
2173                 goto out;
2174         }
2175
2176         /* create regular file via mknod */
2177         if (mknodat(t_dir1_fd, FILE2, S_IFREG | 0000, 0)) {
2178                 log_stderr("failure: mknodat");
2179                 goto out;
2180         }
2181
2182         /* create character device */
2183         if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | 0644, makedev(5, 1))) {
2184                 log_stderr("failure: mknodat");
2185                 goto out;
2186         }
2187
2188         /* create hardlink */
2189         if (linkat(t_dir1_fd, FILE1, t_dir1_fd, HARDLINK1, 0)) {
2190                 log_stderr("failure: linkat");
2191                 goto out;
2192         }
2193
2194         /* create symlink */
2195         if (symlinkat(FILE2, t_dir1_fd, SYMLINK1)) {
2196                 log_stderr("failure: symlinkat");
2197                 goto out;
2198         }
2199
2200         /* create directory */
2201         if (mkdirat(t_dir1_fd, DIR1, 0700)) {
2202                 log_stderr("failure: mkdirat");
2203                 goto out;
2204         }
2205
2206         /* Changing mount properties on a detached mount. */
2207         attr1.userns_fd = get_userns_fd(0, 10000, 10000);
2208         if (attr1.userns_fd < 0) {
2209                 log_stderr("failure: get_userns_fd");
2210                 goto out;
2211         }
2212
2213         open_tree_fd1 = sys_open_tree(t_dir1_fd, "",
2214                                      AT_EMPTY_PATH |
2215                                      AT_NO_AUTOMOUNT |
2216                                      AT_SYMLINK_NOFOLLOW |
2217                                      OPEN_TREE_CLOEXEC |
2218                                      OPEN_TREE_CLONE);
2219         if (open_tree_fd1 < 0) {
2220                 log_stderr("failure: sys_open_tree");
2221                 goto out;
2222         }
2223
2224         if (sys_mount_setattr(open_tree_fd1, "", AT_EMPTY_PATH, &attr1, sizeof(attr1))) {
2225                 log_stderr("failure: sys_mount_setattr");
2226                 goto out;
2227         }
2228
2229         /* Validate that all files created through the image mountpoint are
2230          * owned by the callers fsuid and fsgid.
2231          */
2232         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2233                 log_stderr("failure: expected_uid_gid");
2234                 goto out;
2235         }
2236         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2237                 log_stderr("failure: expected_uid_gid");
2238                 goto out;
2239         }
2240         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2241                 log_stderr("failure: expected_uid_gid");
2242                 goto out;
2243         }
2244         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2245                 log_stderr("failure: expected_uid_gid");
2246                 goto out;
2247         }
2248         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
2249                 log_stderr("failure: expected_uid_gid");
2250                 goto out;
2251         }
2252         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2253                 log_stderr("failure: expected_uid_gid");
2254                 goto out;
2255         }
2256         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2257                 log_stderr("failure: expected_uid_gid");
2258                 goto out;
2259         }
2260
2261         /* Validate that all files are owned by the uid and gid specified in
2262          * the idmapping of the mount they are accessed from.
2263          */
2264         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2265                 log_stderr("failure: expected_uid_gid");
2266                 goto out;
2267         }
2268         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2269                 log_stderr("failure: expected_uid_gid");
2270                 goto out;
2271         }
2272         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2273                 log_stderr("failure: expected_uid_gid");
2274                 goto out;
2275         }
2276         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2277                 log_stderr("failure: expected_uid_gid");
2278                 goto out;
2279         }
2280         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
2281                 log_stderr("failure: expected_uid_gid");
2282                 goto out;
2283         }
2284         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2285                 log_stderr("failure: expected_uid_gid");
2286                 goto out;
2287         }
2288         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2289                 log_stderr("failure: expected_uid_gid");
2290                 goto out;
2291         }
2292
2293         /* Changing mount properties on a detached mount. */
2294         attr2.userns_fd = get_userns_fd(0, 30000, 2001);
2295         if (attr2.userns_fd < 0) {
2296                 log_stderr("failure: get_userns_fd");
2297                 goto out;
2298         }
2299
2300         open_tree_fd2 = sys_open_tree(t_dir1_fd, "",
2301                                      AT_EMPTY_PATH |
2302                                      AT_NO_AUTOMOUNT |
2303                                      AT_SYMLINK_NOFOLLOW |
2304                                      OPEN_TREE_CLOEXEC |
2305                                      OPEN_TREE_CLONE);
2306         if (open_tree_fd2 < 0) {
2307                 log_stderr("failure: sys_open_tree");
2308                 goto out;
2309         }
2310
2311         if (sys_mount_setattr(open_tree_fd2, "", AT_EMPTY_PATH, &attr2, sizeof(attr2))) {
2312                 log_stderr("failure: sys_mount_setattr");
2313                 goto out;
2314         }
2315
2316         /* Validate that all files are owned by the uid and gid specified in
2317          * the idmapping of the mount they are accessed from.
2318          */
2319         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2320                 log_stderr("failure: expected_uid_gid");
2321                 goto out;
2322         }
2323         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2324                 log_stderr("failure: expected_uid_gid");
2325                 goto out;
2326         }
2327         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2328                 log_stderr("failure: expected_uid_gid");
2329                 goto out;
2330         }
2331         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2332                 log_stderr("failure: expected_uid_gid");
2333                 goto out;
2334         }
2335         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 30000, 30000)) {
2336                 log_stderr("failure: expected_uid_gid");
2337                 goto out;
2338         }
2339         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2340                 log_stderr("failure: expected_uid_gid");
2341                 goto out;
2342         }
2343         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2344                 log_stderr("failure: expected_uid_gid");
2345                 goto out;
2346         }
2347
2348         /* Change ownership throught original image mountpoint. */
2349         if (fchownat(t_dir1_fd, FILE1, 2000, 2000, 0)) {
2350                 log_stderr("failure: fchownat");
2351                 goto out;
2352         }
2353         if (fchownat(t_dir1_fd, FILE2, 2000, 2000, 0)) {
2354                 log_stderr("failure: fchownat");
2355                 goto out;
2356         }
2357         if (fchownat(t_dir1_fd, HARDLINK1, 2000, 2000, 0)) {
2358                 log_stderr("failure: fchownat");
2359                 goto out;
2360         }
2361         if (fchownat(t_dir1_fd, CHRDEV1, 2000, 2000, 0)) {
2362                 log_stderr("failure: fchownat");
2363                 goto out;
2364         }
2365         if (fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)) {
2366                 log_stderr("failure: fchownat");
2367                 goto out;
2368         }
2369         if (fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH)) {
2370                 log_stderr("failure: fchownat");
2371                 goto out;
2372         }
2373         if (fchownat(t_dir1_fd, DIR1, 2000, 2000, AT_EMPTY_PATH)) {
2374                 log_stderr("failure: fchownat");
2375                 goto out;
2376         }
2377
2378         /* Check ownership through original mount. */
2379         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 2000, 2000)) {
2380                 log_stderr("failure: expected_uid_gid");
2381                 goto out;
2382         }
2383         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 2000, 2000)) {
2384                 log_stderr("failure: expected_uid_gid");
2385                 goto out;
2386         }
2387         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 2000, 2000)) {
2388                 log_stderr("failure: expected_uid_gid");
2389                 goto out;
2390         }
2391         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 2000, 2000)) {
2392                 log_stderr("failure: expected_uid_gid");
2393                 goto out;
2394         }
2395         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 3000, 3000)) {
2396                 log_stderr("failure: expected_uid_gid");
2397                 goto out;
2398         }
2399         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 2000, 2000)) {
2400                 log_stderr("failure: expected_uid_gid");
2401                 goto out;
2402         }
2403         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 2000, 2000)) {
2404                 log_stderr("failure: expected_uid_gid");
2405                 goto out;
2406         }
2407
2408         /* Check ownership through first idmapped mount. */
2409         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 12000, 12000)) {
2410                 log_stderr("failure:expected_uid_gid ");
2411                 goto out;
2412         }
2413         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 12000, 12000)) {
2414                 log_stderr("failure: expected_uid_gid");
2415                 goto out;
2416         }
2417         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 12000, 12000)) {
2418                 log_stderr("failure: expected_uid_gid");
2419                 goto out;
2420         }
2421         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 12000, 12000)) {
2422                 log_stderr("failure: expected_uid_gid");
2423                 goto out;
2424         }
2425         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 13000, 13000)) {
2426                 log_stderr("failure: expected_uid_gid");
2427                 goto out;
2428         }
2429         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 12000, 12000)) {
2430                 log_stderr("failure:expected_uid_gid ");
2431                 goto out;
2432         }
2433         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 12000, 12000)) {
2434                 log_stderr("failure: expected_uid_gid");
2435                 goto out;
2436         }
2437
2438         /* Check ownership through second idmapped mount. */
2439         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 32000, 32000)) {
2440                 log_stderr("failure: expected_uid_gid");
2441                 goto out;
2442         }
2443         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 32000, 32000)) {
2444                 log_stderr("failure: expected_uid_gid");
2445                 goto out;
2446         }
2447         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 32000, 32000)) {
2448                 log_stderr("failure: expected_uid_gid");
2449                 goto out;
2450         }
2451         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 32000, 32000)) {
2452                 log_stderr("failure: expected_uid_gid");
2453                 goto out;
2454         }
2455         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid)) {
2456                 log_stderr("failure: expected_uid_gid");
2457                 goto out;
2458         }
2459         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 32000, 32000)) {
2460                 log_stderr("failure: expected_uid_gid");
2461                 goto out;
2462         }
2463         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 32000, 32000)) {
2464                 log_stderr("failure: expected_uid_gid");
2465                 goto out;
2466         }
2467
2468         pid = fork();
2469         if (pid < 0) {
2470                 log_stderr("failure: fork");
2471                 goto out;
2472         }
2473         if (pid == 0) {
2474                 if (!switch_userns(attr1.userns_fd, 0, 0, false))
2475                         die("failure: switch_userns");
2476
2477                 if (!fchownat(t_dir1_fd, FILE1, 1000, 1000, 0))
2478                         die("failure: fchownat");
2479                 if (!fchownat(t_dir1_fd, FILE2, 1000, 1000, 0))
2480                         die("failure: fchownat");
2481                 if (!fchownat(t_dir1_fd, HARDLINK1, 1000, 1000, 0))
2482                         die("failure: fchownat");
2483                 if (!fchownat(t_dir1_fd, CHRDEV1, 1000, 1000, 0))
2484                         die("failure: fchownat");
2485                 if (!fchownat(t_dir1_fd, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2486                         die("failure: fchownat");
2487                 if (!fchownat(t_dir1_fd, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2488                         die("failure: fchownat");
2489                 if (!fchownat(t_dir1_fd, DIR1, 1000, 1000, AT_EMPTY_PATH))
2490                         die("failure: fchownat");
2491
2492                 if (!fchownat(open_tree_fd2, FILE1, 1000, 1000, 0))
2493                         die("failure: fchownat");
2494                 if (!fchownat(open_tree_fd2, FILE2, 1000, 1000, 0))
2495                         die("failure: fchownat");
2496                 if (!fchownat(open_tree_fd2, HARDLINK1, 1000, 1000, 0))
2497                         die("failure: fchownat");
2498                 if (!fchownat(open_tree_fd2, CHRDEV1, 1000, 1000, 0))
2499                         die("failure: fchownat");
2500                 if (!fchownat(open_tree_fd2, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2501                         die("failure: fchownat");
2502                 if (!fchownat(open_tree_fd2, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2503                         die("failure: fchownat");
2504                 if (!fchownat(open_tree_fd2, DIR1, 1000, 1000, AT_EMPTY_PATH))
2505                         die("failure: fchownat");
2506
2507                 if (fchownat(open_tree_fd1, FILE1, 1000, 1000, 0))
2508                         die("failure: fchownat");
2509                 if (fchownat(open_tree_fd1, FILE2, 1000, 1000, 0))
2510                         die("failure: fchownat");
2511                 if (fchownat(open_tree_fd1, HARDLINK1, 1000, 1000, 0))
2512                         die("failure: fchownat");
2513                 if (fchownat(open_tree_fd1, CHRDEV1, 1000, 1000, 0))
2514                         die("failure: fchownat");
2515                 if (fchownat(open_tree_fd1, SYMLINK1, 2000, 2000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2516                         die("failure: fchownat");
2517                 if (fchownat(open_tree_fd1, SYMLINK1, 1000, 1000, AT_EMPTY_PATH))
2518                         die("failure: fchownat");
2519                 if (fchownat(open_tree_fd1, DIR1, 1000, 1000, AT_EMPTY_PATH))
2520                         die("failure: fchownat");
2521
2522                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2523                         die("failure: expected_uid_gid");
2524                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2525                         die("failure: expected_uid_gid");
2526                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2527                         die("failure: expected_uid_gid");
2528                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2529                         die("failure: expected_uid_gid");
2530                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2531                         die("failure: expected_uid_gid");
2532                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2533                         die("failure: expected_uid_gid");
2534                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2535                         die("failure: expected_uid_gid");
2536
2537                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, t_overflowuid, t_overflowgid))
2538                         die("failure: expected_uid_gid");
2539                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, t_overflowuid, t_overflowgid))
2540                         die("failure: expected_uid_gid");
2541                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2542                         die("failure: expected_uid_gid");
2543                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2544                         die("failure: expected_uid_gid");
2545                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2546                         die("failure: expected_uid_gid");
2547                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2548                         die("failure: expected_uid_gid");
2549                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, t_overflowuid, t_overflowgid))
2550                         die("failure: expected_uid_gid");
2551
2552                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 1000, 1000))
2553                         die("failure: expected_uid_gid");
2554                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 1000, 1000))
2555                         die("failure: expected_uid_gid");
2556                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 1000, 1000))
2557                         die("failure: expected_uid_gid");
2558                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 1000, 1000))
2559                         die("failure: expected_uid_gid");
2560                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2561                         die("failure: expected_uid_gid");
2562                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 1000, 1000))
2563                         die("failure: expected_uid_gid");
2564                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 1000, 1000))
2565                         die("failure: expected_uid_gid");
2566
2567                 exit(EXIT_SUCCESS);
2568         }
2569
2570         if (wait_for_pid(pid))
2571                 goto out;
2572
2573         /* Check ownership through original mount. */
2574         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 1000, 1000)) {
2575                 log_stderr("failure: expected_uid_gid");
2576                 goto out;
2577         }
2578         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 1000, 1000)) {
2579                 log_stderr("failure: expected_uid_gid");
2580                 goto out;
2581         }
2582         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 1000, 1000)) {
2583                 log_stderr("failure: expected_uid_gid");
2584                 goto out;
2585         }
2586         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 1000, 1000)) {
2587                 log_stderr("failure: expected_uid_gid");
2588                 goto out;
2589         }
2590         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2591                 log_stderr("failure: expected_uid_gid");
2592                 goto out;
2593         }
2594         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 1000, 1000)) {
2595                 log_stderr("failure: expected_uid_gid");
2596                 goto out;
2597         }
2598         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 1000, 1000)) {
2599                 log_stderr("failure: expected_uid_gid");
2600                 goto out;
2601         }
2602
2603         /* Check ownership through first idmapped mount. */
2604         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 11000, 11000)) {
2605                 log_stderr("failure: expected_uid_gid");
2606                 goto out;
2607         }
2608         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 11000, 11000)) {
2609                 log_stderr("failure: expected_uid_gid");
2610                 goto out;
2611         }
2612         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 11000, 11000)) {
2613                 log_stderr("failure: expected_uid_gid");
2614                 goto out;
2615         }
2616         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 11000, 11000)) {
2617                 log_stderr("failure: expected_uid_gid");
2618                 goto out;
2619         }
2620         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2621                 log_stderr("failure: expected_uid_gid");
2622                 goto out;
2623         }
2624         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 11000, 11000)) {
2625                 log_stderr("failure: expected_uid_gid");
2626                 goto out;
2627         }
2628         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 11000, 11000)) {
2629                 log_stderr("failure: expected_uid_gid");
2630                 goto out;
2631         }
2632
2633         /* Check ownership through second idmapped mount. */
2634         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 31000, 31000)) {
2635                 log_stderr("failure: expected_uid_gid");
2636                 goto out;
2637         }
2638         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 31000, 31000)) {
2639                 log_stderr("failure: expected_uid_gid");
2640                 goto out;
2641         }
2642         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 31000, 31000)) {
2643                 log_stderr("failure: expected_uid_gid");
2644                 goto out;
2645         }
2646         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 31000, 31000)) {
2647                 log_stderr("failure: expected_uid_gid");
2648                 goto out;
2649         }
2650         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2651                 log_stderr("failure: expected_uid_gid");
2652                 goto out;
2653         }
2654         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 31000, 31000)) {
2655                 log_stderr("failure: expected_uid_gid");
2656                 goto out;
2657         }
2658         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 31000, 31000)) {
2659                 log_stderr("failure: expected_uid_gid");
2660                 goto out;
2661         }
2662
2663         pid = fork();
2664         if (pid < 0) {
2665                 log_stderr("failure: fork");
2666                 goto out;
2667         }
2668         if (pid == 0) {
2669                 if (!switch_userns(attr2.userns_fd, 0, 0, false))
2670                         die("failure: switch_userns");
2671
2672                 if (!fchownat(t_dir1_fd, FILE1, 0, 0, 0))
2673                         die("failure: fchownat");
2674                 if (!fchownat(t_dir1_fd, FILE2, 0, 0, 0))
2675                         die("failure: fchownat");
2676                 if (!fchownat(t_dir1_fd, HARDLINK1, 0, 0, 0))
2677                         die("failure: fchownat");
2678                 if (!fchownat(t_dir1_fd, CHRDEV1, 0, 0, 0))
2679                         die("failure: fchownat");
2680                 if (!fchownat(t_dir1_fd, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2681                         die("failure: fchownat");
2682                 if (!fchownat(t_dir1_fd, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2683                         die("failure: fchownat");
2684                 if (!fchownat(t_dir1_fd, DIR1, 0, 0, AT_EMPTY_PATH))
2685                         die("failure: fchownat");
2686
2687                 if (!fchownat(open_tree_fd1, FILE1, 0, 0, 0))
2688                         die("failure: fchownat");
2689                 if (!fchownat(open_tree_fd1, FILE2, 0, 0, 0))
2690                         die("failure: fchownat");
2691                 if (!fchownat(open_tree_fd1, HARDLINK1, 0, 0, 0))
2692                         die("failure: fchownat");
2693                 if (!fchownat(open_tree_fd1, CHRDEV1, 0, 0, 0))
2694                         die("failure: fchownat");
2695                 if (!fchownat(open_tree_fd1, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2696                         die("failure: fchownat");
2697                 if (!fchownat(open_tree_fd1, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2698                         die("failure: fchownat");
2699                 if (!fchownat(open_tree_fd1, DIR1, 0, 0, AT_EMPTY_PATH))
2700                         die("failure: fchownat");
2701
2702                 if (fchownat(open_tree_fd2, FILE1, 0, 0, 0))
2703                         die("failure: fchownat");
2704                 if (fchownat(open_tree_fd2, FILE2, 0, 0, 0))
2705                         die("failure: fchownat");
2706                 if (fchownat(open_tree_fd2, HARDLINK1, 0, 0, 0))
2707                         die("failure: fchownat");
2708                 if (fchownat(open_tree_fd2, CHRDEV1, 0, 0, 0))
2709                         die("failure: fchownat");
2710                 if (!fchownat(open_tree_fd2, SYMLINK1, 3000, 3000, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW))
2711                         die("failure: fchownat");
2712                 if (fchownat(open_tree_fd2, SYMLINK1, 0, 0, AT_EMPTY_PATH))
2713                         die("failure: fchownat");
2714                 if (fchownat(open_tree_fd2, DIR1, 0, 0, AT_EMPTY_PATH))
2715                         die("failure: fchownat");
2716
2717                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, t_overflowuid, t_overflowgid))
2718                         die("failure: expected_uid_gid");
2719                 if (!expected_uid_gid(t_dir1_fd, FILE2, 0, t_overflowuid, t_overflowgid))
2720                         die("failure: expected_uid_gid");
2721                 if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2722                         die("failure: expected_uid_gid");
2723                 if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2724                         die("failure: expected_uid_gid");
2725                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2726                         die("failure: expected_uid_gid");
2727                 if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2728                         die("failure: expected_uid_gid");
2729                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, t_overflowuid, t_overflowgid))
2730                         die("failure: expected_uid_gid");
2731
2732                 if (!expected_uid_gid(open_tree_fd1, FILE1, 0, t_overflowuid, t_overflowgid))
2733                         die("failure: expected_uid_gid");
2734                 if (!expected_uid_gid(open_tree_fd1, FILE2, 0, t_overflowuid, t_overflowgid))
2735                         die("failure: expected_uid_gid");
2736                 if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, t_overflowuid, t_overflowgid))
2737                         die("failure: expected_uid_gid");
2738                 if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, t_overflowuid, t_overflowgid))
2739                         die("failure: expected_uid_gid");
2740                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, t_overflowuid, t_overflowgid))
2741                         die("failure: expected_uid_gid");
2742                 if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, t_overflowuid, t_overflowgid))
2743                         die("failure: expected_uid_gid");
2744                 if (!expected_uid_gid(open_tree_fd1, DIR1, 0, t_overflowuid, t_overflowgid))
2745                         die("failure: expected_uid_gid");
2746
2747                 if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 0, 0))
2748                         die("failure: expected_uid_gid");
2749                 if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 0, 0))
2750                         die("failure: expected_uid_gid");
2751                 if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 0, 0))
2752                         die("failure: expected_uid_gid");
2753                 if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 0, 0))
2754                         die("failure: expected_uid_gid");
2755                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000))
2756                         die("failure: expected_uid_gid");
2757                 if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 0, 0))
2758                         die("failure: expected_uid_gid");
2759                 if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 0, 0))
2760                         die("failure: expected_uid_gid");
2761
2762                 exit(EXIT_SUCCESS);
2763         }
2764
2765         if (wait_for_pid(pid))
2766                 goto out;
2767
2768         /* Check ownership through original mount. */
2769         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
2770                 log_stderr("failure: expected_uid_gid");
2771                 goto out;
2772         }
2773         if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) {
2774                 log_stderr("failure: expected_uid_gid");
2775                 goto out;
2776         }
2777         if (!expected_uid_gid(t_dir1_fd, HARDLINK1, 0, 0, 0)) {
2778                 log_stderr("failure: expected_uid_gid");
2779                 goto out;
2780         }
2781         if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) {
2782                 log_stderr("failure: expected_uid_gid");
2783                 goto out;
2784         }
2785         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
2786                 log_stderr("failure: expected_uid_gid");
2787                 goto out;
2788         }
2789         if (!expected_uid_gid(t_dir1_fd, SYMLINK1, 0, 0, 0)) {
2790                 log_stderr("failure: expected_uid_gid");
2791                 goto out;
2792         }
2793         if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) {
2794                 log_stderr("failure: expected_uid_gid");
2795                 goto out;
2796         }
2797
2798         /* Check ownership through first idmapped mount. */
2799         if (!expected_uid_gid(open_tree_fd1, FILE1, 0, 10000, 10000)) {
2800                 log_stderr("failure: expected_uid_gid");
2801                 goto out;
2802         }
2803         if (!expected_uid_gid(open_tree_fd1, FILE2, 0, 10000, 10000)) {
2804                 log_stderr("failure: expected_uid_gid");
2805                 goto out;
2806         }
2807         if (!expected_uid_gid(open_tree_fd1, HARDLINK1, 0, 10000, 10000)) {
2808                 log_stderr("failure: expected_uid_gid");
2809                 goto out;
2810         }
2811         if (!expected_uid_gid(open_tree_fd1, CHRDEV1, 0, 10000, 10000)) {
2812                 log_stderr("failure: expected_uid_gid");
2813                 goto out;
2814         }
2815         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
2816                 log_stderr("failure: expected_uid_gid");
2817                 goto out;
2818         }
2819         if (!expected_uid_gid(open_tree_fd1, SYMLINK1, 0, 10000, 10000)) {
2820                 log_stderr("failure: expected_uid_gid");
2821                 goto out;
2822         }
2823         if (!expected_uid_gid(open_tree_fd1, DIR1, 0, 10000, 10000)) {
2824                 log_stderr("failure: expected_uid_gid");
2825                 goto out;
2826         }
2827
2828         /* Check ownership through second idmapped mount. */
2829         if (!expected_uid_gid(open_tree_fd2, FILE1, 0, 30000, 30000)) {
2830                 log_stderr("failure: expected_uid_gid");
2831                 goto out;
2832         }
2833         if (!expected_uid_gid(open_tree_fd2, FILE2, 0, 30000, 30000)) {
2834                 log_stderr("failure: expected_uid_gid");
2835                 goto out;
2836         }
2837         if (!expected_uid_gid(open_tree_fd2, HARDLINK1, 0, 30000, 30000)) {
2838                 log_stderr("failure: expected_uid_gid");
2839                 goto out;
2840         }
2841         if (!expected_uid_gid(open_tree_fd2, CHRDEV1, 0, 30000, 30000)) {
2842                 log_stderr("failure: expected_uid_gid");
2843                 goto out;
2844         }
2845         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, AT_SYMLINK_NOFOLLOW, 32000, 32000)) {
2846                 log_stderr("failure: expected_uid_gid");
2847                 goto out;
2848         }
2849         if (!expected_uid_gid(open_tree_fd2, SYMLINK1, 0, 30000, 30000)) {
2850                 log_stderr("failure: expected_uid_gid");
2851                 goto out;
2852         }
2853         if (!expected_uid_gid(open_tree_fd2, DIR1, 0, 30000, 30000)) {
2854                 log_stderr("failure: expected_uid_gid");
2855                 goto out;
2856         }
2857
2858         fret = 0;
2859         log_debug("Ran test");
2860 out:
2861         safe_close(attr1.userns_fd);
2862         safe_close(attr2.userns_fd);
2863         safe_close(file1_fd);
2864         safe_close(open_tree_fd1);
2865         safe_close(open_tree_fd2);
2866
2867         return fret;
2868 }
2869
2870 static int fscaps(void)
2871 {
2872         int fret = -1;
2873         int file1_fd = -EBADF, fd_userns = -EBADF;
2874         pid_t pid;
2875
2876         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2877         if (file1_fd < 0) {
2878                 log_stderr("failure: openat");
2879                 goto out;
2880         }
2881
2882         /* Skip if vfs caps are unsupported. */
2883         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
2884                 return 0;
2885
2886         /* Changing mount properties on a detached mount. */
2887         fd_userns = get_userns_fd(0, 10000, 10000);
2888         if (fd_userns < 0) {
2889                 log_stderr("failure: get_userns_fd");
2890                 goto out;
2891         }
2892
2893         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
2894                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2895                 goto out;
2896         }
2897
2898         pid = fork();
2899         if (pid < 0) {
2900                 log_stderr("failure: fork");
2901                 goto out;
2902         }
2903         if (pid == 0) {
2904                 if (!switch_userns(fd_userns, 0, 0, false))
2905                         die("failure: switch_userns");
2906
2907                 /*
2908                  * On kernels before 5.12 this would succeed and return the
2909                  * unconverted caps. Then - for whatever reason - this behavior
2910                  * got changed and since 5.12 EOVERFLOW is returned when the
2911                  * rootid stored alongside the vfs caps does not map to uid 0 in
2912                  * the caller's user namespace.
2913                  */
2914                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
2915                         die("failure: expected_dummy_vfs_caps_uid");
2916
2917                 exit(EXIT_SUCCESS);
2918         }
2919
2920         if (wait_for_pid(pid))
2921                 goto out;
2922
2923         if (fremovexattr(file1_fd, "security.capability")) {
2924                 log_stderr("failure: fremovexattr");
2925                 goto out;
2926         }
2927         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2928                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2929                 goto out;
2930         }
2931         if (errno != ENODATA) {
2932                 log_stderr("failure: errno");
2933                 goto out;
2934         }
2935
2936         if (set_dummy_vfs_caps(file1_fd, 0, 10000)) {
2937                 log_stderr("failure: set_dummy_vfs_caps");
2938                 goto out;
2939         }
2940
2941         if (!expected_dummy_vfs_caps_uid(file1_fd, 10000)) {
2942                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2943                 goto out;
2944         }
2945
2946         pid = fork();
2947         if (pid < 0) {
2948                 log_stderr("failure: fork");
2949                 goto out;
2950         }
2951         if (pid == 0) {
2952                 if (!switch_userns(fd_userns, 0, 0, false))
2953                         die("failure: switch_userns");
2954
2955                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0))
2956                         die("failure: expected_dummy_vfs_caps_uid");
2957
2958                 exit(EXIT_SUCCESS);
2959         }
2960
2961         if (wait_for_pid(pid))
2962                 goto out;
2963
2964         if (fremovexattr(file1_fd, "security.capability")) {
2965                 log_stderr("failure: fremovexattr");
2966                 goto out;
2967         }
2968         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
2969                 log_stderr("failure: expected_dummy_vfs_caps_uid");
2970                 goto out;
2971         }
2972         if (errno != ENODATA) {
2973                 log_stderr("failure: errno");
2974                 goto out;
2975         }
2976
2977         fret = 0;
2978         log_debug("Ran test");
2979 out:
2980         safe_close(file1_fd);
2981         safe_close(fd_userns);
2982
2983         return fret;
2984 }
2985
2986 static int fscaps_idmapped_mounts(void)
2987 {
2988         int fret = -1;
2989         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
2990         struct mount_attr attr = {
2991                 .attr_set = MOUNT_ATTR_IDMAP,
2992         };
2993         pid_t pid;
2994
2995         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
2996         if (file1_fd < 0) {
2997                 log_stderr("failure: openat");
2998                 goto out;
2999         }
3000
3001         /* Skip if vfs caps are unsupported. */
3002         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3003                 return 0;
3004
3005         if (fremovexattr(file1_fd, "security.capability")) {
3006                 log_stderr("failure: fremovexattr");
3007                 goto out;
3008         }
3009
3010         /* Changing mount properties on a detached mount. */
3011         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3012         if (attr.userns_fd < 0) {
3013                 log_stderr("failure: get_userns_fd");
3014                 goto out;
3015         }
3016
3017         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3018                                      AT_EMPTY_PATH |
3019                                      AT_NO_AUTOMOUNT |
3020                                      AT_SYMLINK_NOFOLLOW |
3021                                      OPEN_TREE_CLOEXEC |
3022                                      OPEN_TREE_CLONE);
3023         if (open_tree_fd < 0) {
3024                 log_stderr("failure: sys_open_tree");
3025                 goto out;
3026         }
3027
3028         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3029                 log_stderr("failure: sys_mount_setattr");
3030                 goto out;
3031         }
3032
3033         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3034         if (file1_fd2 < 0) {
3035                 log_stderr("failure: openat");
3036                 goto out;
3037         }
3038
3039         if (!set_dummy_vfs_caps(file1_fd2, 0, 1000)) {
3040                 log_stderr("failure: set_dummy_vfs_caps");
3041                 goto out;
3042         }
3043
3044         if (set_dummy_vfs_caps(file1_fd2, 0, 10000)) {
3045                 log_stderr("failure: set_dummy_vfs_caps");
3046                 goto out;
3047         }
3048
3049         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3050                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3051                 goto out;
3052         }
3053
3054         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3055                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3056                 goto out;
3057         }
3058
3059         pid = fork();
3060         if (pid < 0) {
3061                 log_stderr("failure: fork");
3062                 goto out;
3063         }
3064         if (pid == 0) {
3065                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3066                         die("failure: switch_userns");
3067
3068                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3069                         die("failure: expected_dummy_vfs_caps_uid");
3070
3071                 exit(EXIT_SUCCESS);
3072         }
3073
3074         if (wait_for_pid(pid))
3075                 goto out;
3076
3077         if (fremovexattr(file1_fd2, "security.capability")) {
3078                 log_stderr("failure: fremovexattr");
3079                 goto out;
3080         }
3081         if (expected_dummy_vfs_caps_uid(file1_fd2, -1)) {
3082                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3083                 goto out;
3084         }
3085         if (errno != ENODATA) {
3086                 log_stderr("failure: errno");
3087                 goto out;
3088         }
3089
3090         if (set_dummy_vfs_caps(file1_fd2, 0, 12000)) {
3091                 log_stderr("failure: set_dummy_vfs_caps");
3092                 goto out;
3093         }
3094
3095         if (!expected_dummy_vfs_caps_uid(file1_fd2, 12000)) {
3096                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3097                 goto out;
3098         }
3099
3100         if (!expected_dummy_vfs_caps_uid(file1_fd, 2000)) {
3101                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3102                 goto out;
3103         }
3104
3105         pid = fork();
3106         if (pid < 0) {
3107                 log_stderr("failure: fork");
3108                 goto out;
3109         }
3110         if (pid == 0) {
3111                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3112                         die("failure: switch_userns");
3113
3114                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 2000))
3115                         die("failure: expected_dummy_vfs_caps_uid");
3116
3117                 exit(EXIT_SUCCESS);
3118         }
3119
3120         if (wait_for_pid(pid))
3121                 goto out;
3122
3123         fret = 0;
3124         log_debug("Ran test");
3125 out:
3126         safe_close(attr.userns_fd);
3127         safe_close(file1_fd);
3128         safe_close(file1_fd2);
3129         safe_close(open_tree_fd);
3130
3131         return fret;
3132 }
3133
3134 static int fscaps_idmapped_mounts_in_userns(void)
3135 {
3136         int fret = -1;
3137         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3138         struct mount_attr attr = {
3139                 .attr_set = MOUNT_ATTR_IDMAP,
3140         };
3141         pid_t pid;
3142
3143         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3144         if (file1_fd < 0) {
3145                 log_stderr("failure: openat");
3146                 goto out;
3147         }
3148
3149         /* Skip if vfs caps are unsupported. */
3150         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3151                 return 0;
3152
3153         if (fremovexattr(file1_fd, "security.capability")) {
3154                 log_stderr("failure: fremovexattr");
3155                 goto out;
3156         }
3157
3158         /* Changing mount properties on a detached mount. */
3159         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3160         if (attr.userns_fd < 0) {
3161                 log_stderr("failure: get_userns_fd");
3162                 goto out;
3163         }
3164
3165         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3166                                      AT_EMPTY_PATH |
3167                                      AT_NO_AUTOMOUNT |
3168                                      AT_SYMLINK_NOFOLLOW |
3169                                      OPEN_TREE_CLOEXEC |
3170                                      OPEN_TREE_CLONE);
3171         if (open_tree_fd < 0) {
3172                 log_stderr("failure: sys_open_tree");
3173                 goto out;
3174         }
3175
3176         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3177                 log_stderr("failure: sys_mount_setattr");
3178                 goto out;
3179         }
3180
3181         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3182         if (file1_fd2 < 0) {
3183                 log_stderr("failure: openat");
3184                 goto out;
3185         }
3186
3187         pid = fork();
3188         if (pid < 0) {
3189                 log_stderr("failure: fork");
3190                 goto out;
3191         }
3192         if (pid == 0) {
3193                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3194                         die("failure: switch_userns");
3195
3196                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3197                         die("failure: expected_dummy_vfs_caps_uid");
3198                 if (errno != ENODATA)
3199                         die("failure: errno");
3200
3201                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3202                         die("failure: set_dummy_vfs_caps");
3203
3204                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3205                         die("failure: expected_dummy_vfs_caps_uid");
3206
3207                 if (!expected_dummy_vfs_caps_uid(file1_fd, 1000) && errno != EOVERFLOW)
3208                         die("failure: expected_dummy_vfs_caps_uid");
3209
3210                 exit(EXIT_SUCCESS);
3211         }
3212
3213         if (wait_for_pid(pid))
3214                 goto out;
3215
3216         if (!expected_dummy_vfs_caps_uid(file1_fd, 1000)) {
3217                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3218                 goto out;
3219         }
3220
3221         fret = 0;
3222         log_debug("Ran test");
3223 out:
3224         safe_close(attr.userns_fd);
3225         safe_close(file1_fd);
3226         safe_close(file1_fd2);
3227         safe_close(open_tree_fd);
3228
3229         return fret;
3230 }
3231
3232 static int fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns(void)
3233 {
3234         int fret = -1;
3235         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3236         struct mount_attr attr = {
3237                 .attr_set = MOUNT_ATTR_IDMAP,
3238         };
3239         pid_t pid;
3240
3241         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3242         if (file1_fd < 0) {
3243                 log_stderr("failure: openat");
3244                 goto out;
3245         }
3246
3247         /* Skip if vfs caps are unsupported. */
3248         if (set_dummy_vfs_caps(file1_fd, 0, 1000))
3249                 return 0;
3250
3251         if (fremovexattr(file1_fd, "security.capability")) {
3252                 log_stderr("failure: fremovexattr");
3253                 goto out;
3254         }
3255         if (expected_dummy_vfs_caps_uid(file1_fd, -1)) {
3256                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3257                 goto out;
3258         }
3259         if (errno != ENODATA) {
3260                 log_stderr("failure: errno");
3261                 goto out;
3262         }
3263
3264         /* Changing mount properties on a detached mount. */
3265         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3266         if (attr.userns_fd < 0) {
3267                 log_stderr("failure: get_userns_fd");
3268                 goto out;
3269         }
3270
3271         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3272                                      AT_EMPTY_PATH |
3273                                      AT_NO_AUTOMOUNT |
3274                                      AT_SYMLINK_NOFOLLOW |
3275                                      OPEN_TREE_CLOEXEC |
3276                                      OPEN_TREE_CLONE);
3277         if (open_tree_fd < 0) {
3278                 log_stderr("failure: sys_open_tree");
3279                 goto out;
3280         }
3281
3282         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3283                 log_stderr("failure: sys_mount_setattr");
3284                 goto out;
3285         }
3286
3287         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3288         if (file1_fd2 < 0) {
3289                 log_stderr("failure: openat");
3290                 goto out;
3291         }
3292
3293         /*
3294          * Verify we can set an v3 fscap for real root this was regressed at
3295          * some point. Make sure this doesn't happen again!
3296          */
3297         pid = fork();
3298         if (pid < 0) {
3299                 log_stderr("failure: fork");
3300                 goto out;
3301         }
3302         if (pid == 0) {
3303                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3304                         die("failure: switch_userns");
3305
3306                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3307                         die("failure: expected_dummy_vfs_caps_uid");
3308                 if (errno != ENODATA)
3309                         die("failure: errno");
3310
3311                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3312                         die("failure: set_dummy_vfs_caps");
3313
3314                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3315                         die("failure: expected_dummy_vfs_caps_uid");
3316
3317                 if (!expected_dummy_vfs_caps_uid(file1_fd, 0) && errno != EOVERFLOW)
3318                         die("failure: expected_dummy_vfs_caps_uid");
3319
3320                 exit(EXIT_SUCCESS);
3321         }
3322
3323         if (wait_for_pid(pid))
3324                 goto out;
3325
3326         if (!expected_dummy_vfs_caps_uid(file1_fd2, 10000)) {
3327                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3328                 goto out;
3329         }
3330
3331         if (!expected_dummy_vfs_caps_uid(file1_fd, 0)) {
3332                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3333                 goto out;
3334         }
3335
3336         fret = 0;
3337         log_debug("Ran test");
3338 out:
3339         safe_close(attr.userns_fd);
3340         safe_close(file1_fd);
3341         safe_close(file1_fd2);
3342         safe_close(open_tree_fd);
3343
3344         return fret;
3345 }
3346
3347 static int fscaps_idmapped_mounts_in_userns_separate_userns(void)
3348 {
3349         int fret = -1;
3350         int file1_fd = -EBADF, file1_fd2 = -EBADF, open_tree_fd = -EBADF;
3351         struct mount_attr attr = {
3352                 .attr_set = MOUNT_ATTR_IDMAP,
3353         };
3354         pid_t pid;
3355
3356         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, 0644);
3357         if (file1_fd < 0) {
3358                 log_stderr("failure: openat");
3359                 goto out;
3360         }
3361
3362         /* Skip if vfs caps are unsupported. */
3363         if (set_dummy_vfs_caps(file1_fd, 0, 1000)) {
3364                 log_stderr("failure: set_dummy_vfs_caps");
3365                 goto out;
3366         }
3367
3368         if (fremovexattr(file1_fd, "security.capability")) {
3369                 log_stderr("failure: fremovexattr");
3370                 goto out;
3371         }
3372
3373         /* change ownership of all files to uid 0 */
3374         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
3375                 log_stderr("failure: chown_r");
3376                 goto out;
3377         }
3378
3379         /* Changing mount properties on a detached mount. */
3380         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
3381         if (attr.userns_fd < 0) {
3382                 log_stderr("failure: get_userns_fd");
3383                 goto out;
3384         }
3385
3386         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3387                                      AT_EMPTY_PATH |
3388                                      AT_NO_AUTOMOUNT |
3389                                      AT_SYMLINK_NOFOLLOW |
3390                                      OPEN_TREE_CLOEXEC |
3391                                      OPEN_TREE_CLONE);
3392         if (open_tree_fd < 0) {
3393                 log_stderr("failure: sys_open_tree");
3394                 goto out;
3395         }
3396
3397         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3398                 log_stderr("failure: sys_mount_setattr");
3399                 goto out;
3400         }
3401
3402         file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, 0);
3403         if (file1_fd2 < 0) {
3404                 log_stderr("failure: openat");
3405                 goto out;
3406         }
3407
3408         pid = fork();
3409         if (pid < 0) {
3410                 log_stderr("failure: fork");
3411                 goto out;
3412         }
3413         if (pid == 0) {
3414                 int userns_fd;
3415
3416                 userns_fd = get_userns_fd(0, 10000, 10000);
3417                 if (userns_fd < 0)
3418                         die("failure: get_userns_fd");
3419
3420                 if (!switch_userns(userns_fd, 0, 0, false))
3421                         die("failure: switch_userns");
3422
3423                 if (set_dummy_vfs_caps(file1_fd2, 0, 0))
3424                         die("failure: set fscaps");
3425
3426                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 0))
3427                         die("failure: expected_dummy_vfs_caps_uid");
3428
3429                 if (!expected_dummy_vfs_caps_uid(file1_fd, 20000) && errno != EOVERFLOW)
3430                         die("failure: expected_dummy_vfs_caps_uid");
3431
3432                 exit(EXIT_SUCCESS);
3433         }
3434
3435         if (wait_for_pid(pid))
3436                 goto out;
3437
3438         if (!expected_dummy_vfs_caps_uid(file1_fd, 20000)) {
3439                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3440                 goto out;
3441         }
3442
3443         pid = fork();
3444         if (pid < 0) {
3445                 log_stderr("failure: fork");
3446                 goto out;
3447         }
3448         if (pid == 0) {
3449                 int userns_fd;
3450
3451                 userns_fd = get_userns_fd(0, 10000, 10000);
3452                 if (userns_fd < 0)
3453                         die("failure: get_userns_fd");
3454
3455                 if (!switch_userns(userns_fd, 0, 0, false))
3456                         die("failure: switch_userns");
3457
3458                 if (fremovexattr(file1_fd2, "security.capability"))
3459                         die("failure: fremovexattr");
3460                 if (expected_dummy_vfs_caps_uid(file1_fd2, -1))
3461                         die("failure: expected_dummy_vfs_caps_uid");
3462                 if (errno != ENODATA)
3463                         die("failure: errno");
3464
3465                 if (set_dummy_vfs_caps(file1_fd2, 0, 1000))
3466                         die("failure: set_dummy_vfs_caps");
3467
3468                 if (!expected_dummy_vfs_caps_uid(file1_fd2, 1000))
3469                         die("failure: expected_dummy_vfs_caps_uid");
3470
3471                 if (!expected_dummy_vfs_caps_uid(file1_fd, 21000) && errno != EOVERFLOW)
3472                         die("failure: expected_dummy_vfs_caps_uid");
3473
3474                 exit(EXIT_SUCCESS);
3475         }
3476
3477         if (wait_for_pid(pid))
3478                 goto out;
3479
3480         if (!expected_dummy_vfs_caps_uid(file1_fd, 21000)) {
3481                 log_stderr("failure: expected_dummy_vfs_caps_uid");
3482                 goto out;
3483         }
3484
3485         fret = 0;
3486         log_debug("Ran test");
3487 out:
3488         safe_close(attr.userns_fd);
3489         safe_close(file1_fd);
3490         safe_close(file1_fd2);
3491         safe_close(open_tree_fd);
3492
3493         return fret;
3494 }
3495
3496 /* Validate that when the IDMAP_MOUNT_TEST_RUN_SETID environment variable is set
3497  * to 1 that we are executed with setid privileges and if set to 0 we are not.
3498  * If the env variable isn't set the tests are not run.
3499  */
3500 static void __attribute__((constructor)) setuid_rexec(void)
3501 {
3502         const char *expected_euid_str, *expected_egid_str, *rexec;
3503
3504         rexec = getenv("IDMAP_MOUNT_TEST_RUN_SETID");
3505         /* This is a regular test-suite run. */
3506         if (!rexec)
3507                 return;
3508
3509         expected_euid_str = getenv("EXPECTED_EUID");
3510         expected_egid_str = getenv("EXPECTED_EGID");
3511
3512         if (expected_euid_str && expected_egid_str) {
3513                 uid_t expected_euid;
3514                 gid_t expected_egid;
3515
3516                 expected_euid = atoi(expected_euid_str);
3517                 expected_egid = atoi(expected_egid_str);
3518
3519                 if (strcmp(rexec, "1") == 0) {
3520                         /* we're expecting to run setid */
3521                         if ((getuid() != geteuid()) && (expected_euid == geteuid()) &&
3522                             (getgid() != getegid()) && (expected_egid == getegid()))
3523                                 exit(EXIT_SUCCESS);
3524                 } else if (strcmp(rexec, "0") == 0) {
3525                         /* we're expecting to not run setid */
3526                         if ((getuid() == geteuid()) && (expected_euid == geteuid()) &&
3527                             (getgid() == getegid()) && (expected_egid == getegid()))
3528                                 exit(EXIT_SUCCESS);
3529                         else
3530                                 die("failure: non-setid");
3531                 }
3532         }
3533
3534         exit(EXIT_FAILURE);
3535 }
3536
3537 /* Validate that setid transitions are handled correctly. */
3538 static int setid_binaries(void)
3539 {
3540         int fret = -1;
3541         int file1_fd = -EBADF, exec_fd = -EBADF;
3542         pid_t pid;
3543
3544         /* create a file to be used as setuid binary */
3545         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3546         if (file1_fd < 0) {
3547                 log_stderr("failure: openat");
3548                 goto out;
3549         }
3550
3551         /* open our own executable */
3552         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3553         if (exec_fd < 0) {
3554                 log_stderr("failure: openat");
3555                 goto out;
3556         }
3557
3558         /* copy our own executable into the file we created */
3559         if (fd_to_fd(exec_fd, file1_fd)) {
3560                 log_stderr("failure: fd_to_fd");
3561                 goto out;
3562         }
3563
3564         /* chown the file to the uid and gid we want to assume */
3565         if (fchown(file1_fd, 5000, 5000)) {
3566                 log_stderr("failure: fchown");
3567                 goto out;
3568         }
3569
3570         /* set the setid bits and grant execute permissions to the group */
3571         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3572                 log_stderr("failure: fchmod");
3573                 goto out;
3574         }
3575
3576         /* Verify that the sid bits got raised. */
3577         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3578                 log_stderr("failure: is_setid");
3579                 goto out;
3580         }
3581
3582         safe_close(exec_fd);
3583         safe_close(file1_fd);
3584
3585         /* Verify we run setid binary as uid and gid 5000 from the original
3586          * mount.
3587          */
3588         pid = fork();
3589         if (pid < 0) {
3590                 log_stderr("failure: fork");
3591                 goto out;
3592         }
3593         if (pid == 0) {
3594                 static char *envp[] = {
3595                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3596                         "EXPECTED_EUID=5000",
3597                         "EXPECTED_EGID=5000",
3598                         NULL,
3599                 };
3600                 static char *argv[] = {
3601                         NULL,
3602                 };
3603
3604                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 5000, 5000))
3605                         die("failure: expected_uid_gid");
3606
3607                 sys_execveat(t_dir1_fd, FILE1, argv, envp, 0);
3608                 die("failure: sys_execveat");
3609
3610                 exit(EXIT_FAILURE);
3611         }
3612         if (wait_for_pid(pid))
3613                 goto out;
3614
3615         fret = 0;
3616         log_debug("Ran test");
3617 out:
3618
3619         return fret;
3620 }
3621
3622 /* Validate that setid transitions are handled correctly on idmapped mounts. */
3623 static int setid_binaries_idmapped_mounts(void)
3624 {
3625         int fret = -1;
3626         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3627         struct mount_attr attr = {
3628                 .attr_set = MOUNT_ATTR_IDMAP,
3629         };
3630         pid_t pid;
3631
3632         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3633                 log_stderr("failure: mkdirat");
3634                 goto out;
3635         }
3636
3637         /* create a file to be used as setuid binary */
3638         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3639         if (file1_fd < 0) {
3640                 log_stderr("failure: openat");
3641                 goto out;
3642         }
3643
3644         /* open our own executable */
3645         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3646         if (exec_fd < 0) {
3647                 log_stderr("failure:openat ");
3648                 goto out;
3649         }
3650
3651         /* copy our own executable into the file we created */
3652         if (fd_to_fd(exec_fd, file1_fd)) {
3653                 log_stderr("failure: fd_to_fd");
3654                 goto out;
3655         }
3656
3657         /* chown the file to the uid and gid we want to assume */
3658         if (fchown(file1_fd, 5000, 5000)) {
3659                 log_stderr("failure: fchown");
3660                 goto out;
3661         }
3662
3663         /* set the setid bits and grant execute permissions to the group */
3664         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3665                 log_stderr("failure: fchmod");
3666                 goto out;
3667         }
3668
3669         /* Verify that the sid bits got raised. */
3670         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3671                 log_stderr("failure: is_setid");
3672                 goto out;
3673         }
3674
3675         safe_close(exec_fd);
3676         safe_close(file1_fd);
3677
3678         /* Changing mount properties on a detached mount. */
3679         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3680         if (attr.userns_fd < 0) {
3681                 log_stderr("failure: get_userns_fd");
3682                 goto out;
3683         }
3684
3685         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3686                                      AT_EMPTY_PATH |
3687                                      AT_NO_AUTOMOUNT |
3688                                      AT_SYMLINK_NOFOLLOW |
3689                                      OPEN_TREE_CLOEXEC |
3690                                      OPEN_TREE_CLONE);
3691         if (open_tree_fd < 0) {
3692                 log_stderr("failure: sys_open_tree");
3693                 goto out;
3694         }
3695
3696         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3697                 log_stderr("failure: sys_mount_setattr");
3698                 goto out;
3699         }
3700
3701         /* A detached mount will have an anonymous mount namespace attached to
3702          * it. This means that we can't execute setid binaries on a detached
3703          * mount because the mnt_may_suid() helper will fail the check_mount()
3704          * part of its check which compares the caller's mount namespace to the
3705          * detached mount's mount namespace. Since by definition an anonymous
3706          * mount namespace is not equale to any mount namespace currently in
3707          * use this can't work. So attach the mount to the filesystem first
3708          * before performing this check.
3709          */
3710         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3711                 log_stderr("failure: sys_move_mount");
3712                 goto out;
3713         }
3714
3715         /* Verify we run setid binary as uid and gid 10000 from idmapped mount mount. */
3716         pid = fork();
3717         if (pid < 0) {
3718                 log_stderr("failure: fork");
3719                 goto out;
3720         }
3721         if (pid == 0) {
3722                 static char *envp[] = {
3723                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3724                         "EXPECTED_EUID=15000",
3725                         "EXPECTED_EGID=15000",
3726                         NULL,
3727                 };
3728                 static char *argv[] = {
3729                         NULL,
3730                 };
3731
3732                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 15000, 15000))
3733                         die("failure: expected_uid_gid");
3734
3735                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3736                 die("failure: sys_execveat");
3737
3738                 exit(EXIT_FAILURE);
3739         }
3740
3741         if (wait_for_pid(pid))
3742                 goto out;
3743
3744         fret = 0;
3745         log_debug("Ran test");
3746 out:
3747         safe_close(exec_fd);
3748         safe_close(file1_fd);
3749         safe_close(open_tree_fd);
3750
3751         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
3752         sys_umount2(t_buf, MNT_DETACH);
3753         rm_r(t_mnt_fd, DIR1);
3754
3755         return fret;
3756 }
3757
3758 /* Validate that setid transitions are handled correctly on idmapped mounts
3759  * running in a user namespace where the uid and gid of the setid binary have no
3760  * mapping.
3761  */
3762 static int setid_binaries_idmapped_mounts_in_userns(void)
3763 {
3764         int fret = -1;
3765         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
3766         struct mount_attr attr = {
3767                 .attr_set = MOUNT_ATTR_IDMAP,
3768         };
3769         pid_t pid;
3770
3771         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
3772                 log_stderr("failure: ");
3773                 goto out;
3774         }
3775
3776         /* create a file to be used as setuid binary */
3777         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
3778         if (file1_fd < 0) {
3779                 log_stderr("failure: openat");
3780                 goto out;
3781         }
3782
3783         /* open our own executable */
3784         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
3785         if (exec_fd < 0) {
3786                 log_stderr("failure: openat");
3787                 goto out;
3788         }
3789
3790         /* copy our own executable into the file we created */
3791         if (fd_to_fd(exec_fd, file1_fd)) {
3792                 log_stderr("failure: fd_to_fd");
3793                 goto out;
3794         }
3795
3796         safe_close(exec_fd);
3797
3798         /* chown the file to the uid and gid we want to assume */
3799         if (fchown(file1_fd, 5000, 5000)) {
3800                 log_stderr("failure: fchown");
3801                 goto out;
3802         }
3803
3804         /* set the setid bits and grant execute permissions to the group */
3805         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3806                 log_stderr("failure: fchmod");
3807                 goto out;
3808         }
3809
3810         /* Verify that the sid bits got raised. */
3811         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3812                 log_stderr("failure: is_setid");
3813                 goto out;
3814         }
3815
3816         safe_close(file1_fd);
3817
3818         /* Changing mount properties on a detached mount. */
3819         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
3820         if (attr.userns_fd < 0) {
3821                 log_stderr("failure: get_userns_fd");
3822                 goto out;
3823         }
3824
3825         open_tree_fd = sys_open_tree(t_dir1_fd, "",
3826                                      AT_EMPTY_PATH |
3827                                      AT_NO_AUTOMOUNT |
3828                                      AT_SYMLINK_NOFOLLOW |
3829                                      OPEN_TREE_CLOEXEC |
3830                                      OPEN_TREE_CLONE);
3831         if (open_tree_fd < 0) {
3832                 log_stderr("failure: sys_open_tree");
3833                 goto out;
3834         }
3835
3836         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
3837                 log_stderr("failure: sys_mount_setattr");
3838                 goto out;
3839         }
3840
3841         /* A detached mount will have an anonymous mount namespace attached to
3842          * it. This means that we can't execute setid binaries on a detached
3843          * mount because the mnt_may_suid() helper will fail the check_mount()
3844          * part of its check which compares the caller's mount namespace to the
3845          * detached mount's mount namespace. Since by definition an anonymous
3846          * mount namespace is not equale to any mount namespace currently in
3847          * use this can't work. So attach the mount to the filesystem first
3848          * before performing this check.
3849          */
3850         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
3851                 log_stderr("failure: sys_move_mount");
3852                 goto out;
3853         }
3854
3855         pid = fork();
3856         if (pid < 0) {
3857                 log_stderr("failure: fork");
3858                 goto out;
3859         }
3860         if (pid == 0) {
3861                 static char *envp[] = {
3862                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3863                         "EXPECTED_EUID=5000",
3864                         "EXPECTED_EGID=5000",
3865                         NULL,
3866                 };
3867                 static char *argv[] = {
3868                         NULL,
3869                 };
3870
3871                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3872                         die("failure: switch_userns");
3873
3874                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
3875                         die("failure: expected_uid_gid");
3876
3877                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3878                 die("failure: sys_execveat");
3879
3880                 exit(EXIT_FAILURE);
3881         }
3882
3883         if (wait_for_pid(pid)) {
3884                 log_stderr("failure: wait_for_pid");
3885                 goto out;
3886         }
3887
3888         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3889         if (file1_fd < 0) {
3890                 log_stderr("failure: openat");
3891                 goto out;
3892         }
3893
3894         /* chown the file to the uid and gid we want to assume */
3895         if (fchown(file1_fd, 0, 0)) {
3896                 log_stderr("failure: fchown");
3897                 goto out;
3898         }
3899
3900         /* set the setid bits and grant execute permissions to the group */
3901         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
3902                 log_stderr("failure: fchmod");
3903                 goto out;
3904         }
3905
3906         /* Verify that the sid bits got raised. */
3907         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3908                 log_stderr("failure: is_setid");
3909                 goto out;
3910         }
3911
3912         safe_close(file1_fd);
3913
3914         pid = fork();
3915         if (pid < 0) {
3916                 log_stderr("failure: fork");
3917                 goto out;
3918         }
3919         if (pid == 0) {
3920                 static char *envp[] = {
3921                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
3922                         "EXPECTED_EUID=0",
3923                         "EXPECTED_EGID=0",
3924                         NULL,
3925                 };
3926                 static char *argv[] = {
3927                         NULL,
3928                 };
3929
3930                 if (!caps_supported()) {
3931                         log_debug("skip: capability library not installed");
3932                         exit(EXIT_SUCCESS);
3933                 }
3934
3935                 if (!switch_userns(attr.userns_fd, 5000, 5000, true))
3936                         die("failure: switch_userns");
3937
3938                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
3939                         die("failure: expected_uid_gid");
3940
3941                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
3942                 die("failure: sys_execveat");
3943
3944                 exit(EXIT_FAILURE);
3945         }
3946
3947         if (wait_for_pid(pid)) {
3948                 log_stderr("failure: wait_for_pid");
3949                 goto out;
3950         }
3951
3952         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
3953         if (file1_fd < 0) {
3954                 log_stderr("failure: openat");
3955                 goto out;
3956         }
3957
3958         /* chown the file to the uid and gid we want to assume */
3959         if (fchown(file1_fd, 30000, 30000)) {
3960                 log_stderr("failure: fchown");
3961                 goto out;
3962         }
3963
3964         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
3965                 log_stderr("failure: fchmod");
3966                 goto out;
3967         }
3968
3969         /* Verify that the sid bits got raised. */
3970         if (!is_setid(t_dir1_fd, FILE1, 0)) {
3971                 log_stderr("failure: is_setid");
3972                 goto out;
3973         }
3974
3975         safe_close(file1_fd);
3976
3977         /* Verify that we can't assume a uid and gid of a setid binary for which
3978          * we have no mapping in our user namespace.
3979          */
3980         pid = fork();
3981         if (pid < 0) {
3982                 log_stderr("failure: fork");
3983                 goto out;
3984         }
3985         if (pid == 0) {
3986                 char expected_euid[100];
3987                 char expected_egid[100];
3988                 static char *envp[4] = {
3989                         NULL,
3990                         NULL,
3991                         NULL,
3992                         NULL,
3993                 };
3994                 static char *argv[] = {
3995                         NULL,
3996                 };
3997
3998                 if (!switch_userns(attr.userns_fd, 0, 0, false))
3999                         die("failure: switch_userns");
4000
4001                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4002                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4003                 envp[1] = expected_euid;
4004                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4005                 envp[2] = expected_egid;
4006
4007                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4008                         die("failure: expected_uid_gid");
4009
4010                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4011                 die("failure: sys_execveat");
4012
4013                 exit(EXIT_FAILURE);
4014         }
4015
4016         if (wait_for_pid(pid)) {
4017                 log_stderr("failure: wait_for_pid");
4018                 goto out;
4019         }
4020
4021         fret = 0;
4022         log_debug("Ran test");
4023 out:
4024         safe_close(attr.userns_fd);
4025         safe_close(exec_fd);
4026         safe_close(file1_fd);
4027         safe_close(open_tree_fd);
4028
4029         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4030         sys_umount2(t_buf, MNT_DETACH);
4031         rm_r(t_mnt_fd, DIR1);
4032
4033         return fret;
4034 }
4035
4036 /* Validate that setid transitions are handled correctly on idmapped mounts
4037  * running in a user namespace where the uid and gid of the setid binary have no
4038  * mapping.
4039  */
4040 static int setid_binaries_idmapped_mounts_in_userns_separate_userns(void)
4041 {
4042         int fret = -1;
4043         int file1_fd = -EBADF, exec_fd = -EBADF, open_tree_fd = -EBADF;
4044         struct mount_attr attr = {
4045                 .attr_set = MOUNT_ATTR_IDMAP,
4046         };
4047         pid_t pid;
4048
4049         if (mkdirat(t_mnt_fd, DIR1, 0777)) {
4050                 log_stderr("failure: mkdirat");
4051                 goto out;
4052         }
4053
4054         /* create a file to be used as setuid binary */
4055         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC | O_RDWR, 0644);
4056         if (file1_fd < 0) {
4057                 log_stderr("failure: openat");
4058                 goto out;
4059         }
4060
4061         /* open our own executable */
4062         exec_fd = openat(-EBADF, "/proc/self/exe", O_RDONLY | O_CLOEXEC, 0000);
4063         if (exec_fd < 0) {
4064                 log_stderr("failure: openat");
4065                 goto out;
4066         }
4067
4068         /* copy our own executable into the file we created */
4069         if (fd_to_fd(exec_fd, file1_fd)) {
4070                 log_stderr("failure: fd_to_fd");
4071                 goto out;
4072         }
4073
4074         safe_close(exec_fd);
4075
4076         /* change ownership of all files to uid 0 */
4077         if (chown_r(t_mnt_fd, T_DIR1, 20000, 20000)) {
4078                 log_stderr("failure: chown_r");
4079                 goto out;
4080         }
4081
4082         /* chown the file to the uid and gid we want to assume */
4083         if (fchown(file1_fd, 25000, 25000)) {
4084                 log_stderr("failure: fchown");
4085                 goto out;
4086         }
4087
4088         /* set the setid bits and grant execute permissions to the group */
4089         if (fchmod(file1_fd, S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4090                 log_stderr("failure: fchmod");
4091                 goto out;
4092         }
4093
4094         /* Verify that the sid bits got raised. */
4095         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4096                 log_stderr("failure: is_setid");
4097                 goto out;
4098         }
4099
4100         safe_close(file1_fd);
4101
4102         /* Changing mount properties on a detached mount. */
4103         attr.userns_fd  = get_userns_fd(20000, 10000, 10000);
4104         if (attr.userns_fd < 0) {
4105                 log_stderr("failure: get_userns_fd");
4106                 goto out;
4107         }
4108
4109         open_tree_fd = sys_open_tree(t_dir1_fd, "",
4110                                      AT_EMPTY_PATH |
4111                                      AT_NO_AUTOMOUNT |
4112                                      AT_SYMLINK_NOFOLLOW |
4113                                      OPEN_TREE_CLOEXEC |
4114                                      OPEN_TREE_CLONE);
4115         if (open_tree_fd < 0) {
4116                 log_stderr("failure: sys_open_tree");
4117                 goto out;
4118         }
4119
4120         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4121                 log_stderr("failure: sys_mount_setattr");
4122                 goto out;
4123         }
4124
4125         /* A detached mount will have an anonymous mount namespace attached to
4126          * it. This means that we can't execute setid binaries on a detached
4127          * mount because the mnt_may_suid() helper will fail the check_mount()
4128          * part of its check which compares the caller's mount namespace to the
4129          * detached mount's mount namespace. Since by definition an anonymous
4130          * mount namespace is not equale to any mount namespace currently in
4131          * use this can't work. So attach the mount to the filesystem first
4132          * before performing this check.
4133          */
4134         if (sys_move_mount(open_tree_fd, "", t_mnt_fd, DIR1, MOVE_MOUNT_F_EMPTY_PATH)) {
4135                 log_stderr("failure: sys_move_mount");
4136                 goto out;
4137         }
4138
4139         pid = fork();
4140         if (pid < 0) {
4141                 log_stderr("failure: fork");
4142                 goto out;
4143         }
4144         if (pid == 0) {
4145                 int userns_fd;
4146                 static char *envp[] = {
4147                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4148                         "EXPECTED_EUID=5000",
4149                         "EXPECTED_EGID=5000",
4150                         NULL,
4151                 };
4152                 static char *argv[] = {
4153                         NULL,
4154                 };
4155
4156                 userns_fd = get_userns_fd(0, 10000, 10000);
4157                 if (userns_fd < 0)
4158                         die("failure: get_userns_fd");
4159
4160                 if (!switch_userns(userns_fd, 0, 0, false))
4161                         die("failure: switch_userns");
4162
4163                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 5000, 5000))
4164                         die("failure: expected_uid_gid");
4165
4166                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4167                 die("failure: sys_execveat");
4168
4169                 exit(EXIT_FAILURE);
4170         }
4171
4172         if (wait_for_pid(pid)) {
4173                 log_stderr("failure: wait_for_pid");
4174                 goto out;
4175         }
4176
4177         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4178         if (file1_fd < 0) {
4179                 log_stderr("failure: openat");
4180                 goto out;
4181         }
4182
4183         /* chown the file to the uid and gid we want to assume */
4184         if (fchown(file1_fd, 20000, 20000)) {
4185                 log_stderr("failure: fchown");
4186                 goto out;
4187         }
4188
4189         /* set the setid bits and grant execute permissions to the group */
4190         if (fchmod(file1_fd, S_IXOTH | S_IXGRP | S_IEXEC | S_ISUID | S_ISGID), 0) {
4191                 log_stderr("failure: fchmod");
4192                 goto out;
4193         }
4194
4195         /* Verify that the sid bits got raised. */
4196         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4197                 log_stderr("failure: is_setid");
4198                 goto out;
4199         }
4200
4201         safe_close(file1_fd);
4202
4203         pid = fork();
4204         if (pid < 0) {
4205                 log_stderr("failure: fork");
4206                 goto out;
4207         }
4208         if (pid == 0) {
4209                 int userns_fd;
4210                 static char *envp[] = {
4211                         "IDMAP_MOUNT_TEST_RUN_SETID=1",
4212                         "EXPECTED_EUID=0",
4213                         "EXPECTED_EGID=0",
4214                         NULL,
4215                 };
4216                 static char *argv[] = {
4217                         NULL,
4218                 };
4219
4220                 userns_fd = get_userns_fd(0, 10000, 10000);
4221                 if (userns_fd < 0)
4222                         die("failure: get_userns_fd");
4223
4224                 if (!caps_supported()) {
4225                         log_debug("skip: capability library not installed");
4226                         exit(EXIT_SUCCESS);
4227                 }
4228
4229                 if (!switch_userns(userns_fd, 1000, 1000, true))
4230                         die("failure: switch_userns");
4231
4232                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
4233                         die("failure: expected_uid_gid");
4234
4235                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4236                 die("failure: sys_execveat");
4237
4238                 exit(EXIT_FAILURE);
4239         }
4240         if (wait_for_pid(pid)) {
4241                 log_stderr("failure: wait_for_pid");
4242                 goto out;
4243         }
4244
4245         file1_fd = openat(t_dir1_fd, FILE1, O_RDWR | O_CLOEXEC, 0644);
4246         if (file1_fd < 0) {
4247                 log_stderr("failure: openat");
4248                 goto out;
4249         }
4250
4251         /* chown the file to the uid and gid we want to assume */
4252         if (fchown(file1_fd, 0, 0)) {
4253                 log_stderr("failure: fchown");
4254                 goto out;
4255         }
4256
4257         if (fchmod(file1_fd, S_IXOTH | S_IEXEC | S_ISUID | S_ISGID), 0) {
4258                 log_stderr("failure: fchmod");
4259                 goto out;
4260         }
4261
4262         /* Verify that the sid bits got raised. */
4263         if (!is_setid(t_dir1_fd, FILE1, 0)) {
4264                 log_stderr("failure: is_setid");
4265                 goto out;
4266         }
4267
4268         safe_close(file1_fd);
4269
4270         /* Verify that we can't assume a uid and gid of a setid binary for
4271          * which we have no mapping in our user namespace.
4272          */
4273         pid = fork();
4274         if (pid < 0) {
4275                 log_stderr("failure: fork");
4276                 goto out;
4277         }
4278         if (pid == 0) {
4279                 int userns_fd;
4280                 char expected_euid[100];
4281                 char expected_egid[100];
4282                 static char *envp[4] = {
4283                         NULL,
4284                         NULL,
4285                         NULL,
4286                         NULL,
4287                 };
4288                 static char *argv[] = {
4289                         NULL,
4290                 };
4291
4292                 userns_fd = get_userns_fd(0, 10000, 10000);
4293                 if (userns_fd < 0)
4294                         die("failure: get_userns_fd");
4295
4296                 if (!switch_userns(userns_fd, 0, 0, false))
4297                         die("failure: switch_userns");
4298
4299                 envp[0] = "IDMAP_MOUNT_TEST_RUN_SETID=0";
4300                 snprintf(expected_euid, sizeof(expected_euid), "EXPECTED_EUID=%d", geteuid());
4301                 envp[1] = expected_euid;
4302                 snprintf(expected_egid, sizeof(expected_egid), "EXPECTED_EGID=%d", getegid());
4303                 envp[2] = expected_egid;
4304
4305                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid))
4306                         die("failure: expected_uid_gid");
4307
4308                 sys_execveat(open_tree_fd, FILE1, argv, envp, 0);
4309                 die("failure: sys_execveat");
4310
4311                 exit(EXIT_FAILURE);
4312         }
4313         if (wait_for_pid(pid)) {
4314                 log_stderr("failure: wait_for_pid");
4315                 goto out;
4316         }
4317
4318         fret = 0;
4319         log_debug("Ran test");
4320 out:
4321         safe_close(attr.userns_fd);
4322         safe_close(exec_fd);
4323         safe_close(file1_fd);
4324         safe_close(open_tree_fd);
4325
4326         snprintf(t_buf, sizeof(t_buf), "%s/" DIR1, t_mountpoint);
4327         sys_umount2(t_buf, MNT_DETACH);
4328         rm_r(t_mnt_fd, DIR1);
4329
4330         return fret;
4331 }
4332
4333 static int sticky_bit_unlink(void)
4334 {
4335         int fret = -1;
4336         int dir_fd = -EBADF;
4337         pid_t pid;
4338
4339         if (!caps_supported())
4340                 return 0;
4341
4342         /* create directory */
4343         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4344                 log_stderr("failure: mkdirat");
4345                 goto out;
4346         }
4347
4348         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4349         if (dir_fd < 0) {
4350                 log_stderr("failure: openat");
4351                 goto out;
4352         }
4353
4354         if (fchown(dir_fd, 0, 0)) {
4355                 log_stderr("failure: fchown");
4356                 goto out;
4357         }
4358
4359         if (fchmod(dir_fd, 0777)) {
4360                 log_stderr("failure: fchmod");
4361                 goto out;
4362         }
4363
4364         /* create regular file via mknod */
4365         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4366                 log_stderr("failure: mknodat");
4367                 goto out;
4368         }
4369         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4370                 log_stderr("failure: fchownat");
4371                 goto out;
4372         }
4373         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4374                 log_stderr("failure: fchmodat");
4375                 goto out;
4376         }
4377
4378         /* create regular file via mknod */
4379         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4380                 log_stderr("failure: mknodat");
4381                 goto out;
4382         }
4383         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4384                 log_stderr("failure: fchownat");
4385                 goto out;
4386         }
4387         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4388                 log_stderr("failure: fchmodat");
4389                 goto out;
4390         }
4391
4392         /* The sticky bit is not set so we must be able to delete files not
4393          * owned by us.
4394          */
4395         pid = fork();
4396         if (pid < 0) {
4397                 log_stderr("failure: fork");
4398                 goto out;
4399         }
4400         if (pid == 0) {
4401                 if (!switch_ids(1000, 1000))
4402                         die("failure: switch_ids");
4403
4404                 if (unlinkat(dir_fd, FILE1, 0))
4405                         die("failure: unlinkat");
4406
4407                 if (unlinkat(dir_fd, FILE2, 0))
4408                         die("failure: unlinkat");
4409
4410                 exit(EXIT_SUCCESS);
4411         }
4412         if (wait_for_pid(pid)) {
4413                 log_stderr("failure: wait_for_pid");
4414                 goto out;
4415         }
4416
4417         /* set sticky bit */
4418         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4419                 log_stderr("failure: fchmod");
4420                 goto out;
4421         }
4422
4423         /* validate sticky bit is set */
4424         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4425                 log_stderr("failure: is_sticky");
4426                 goto out;
4427         }
4428
4429         /* create regular file via mknod */
4430         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4431                 log_stderr("failure: mknodat");
4432                 goto out;
4433         }
4434         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4435                 log_stderr("failure: fchownat");
4436                 goto out;
4437         }
4438         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4439                 log_stderr("failure: fchmodat");
4440                 goto out;
4441         }
4442
4443         /* create regular file via mknod */
4444         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4445                 log_stderr("failure: mknodat");
4446                 goto out;
4447         }
4448         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4449                 log_stderr("failure: fchownat");
4450                 goto out;
4451         }
4452         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4453                 log_stderr("failure: fchmodat");
4454                 goto out;
4455         }
4456
4457         /* The sticky bit is set so we must not be able to delete files not
4458          * owned by us.
4459          */
4460         pid = fork();
4461         if (pid < 0) {
4462                 log_stderr("failure: fork");
4463                 goto out;
4464         }
4465         if (pid == 0) {
4466                 if (!switch_ids(1000, 1000))
4467                         die("failure: switch_ids");
4468
4469                 if (!unlinkat(dir_fd, FILE1, 0))
4470                         die("failure: unlinkat");
4471                 if (errno != EPERM)
4472                         die("failure: errno");
4473
4474                 if (!unlinkat(dir_fd, FILE2, 0))
4475                         die("failure: unlinkat");
4476                 if (errno != EPERM)
4477                         die("failure: errno");
4478
4479                 exit(EXIT_SUCCESS);
4480         }
4481         if (wait_for_pid(pid)) {
4482                 log_stderr("failure: wait_for_pid");
4483                 goto out;
4484         }
4485
4486         /* The sticky bit is set and we own the files so we must be able to
4487          * delete the files now.
4488          */
4489         pid = fork();
4490         if (pid < 0) {
4491                 log_stderr("failure: fork");
4492                 goto out;
4493         }
4494         if (pid == 0) {
4495                 /* change ownership */
4496                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
4497                         die("failure: fchownat");
4498                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
4499                         die("failure: expected_uid_gid");
4500                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
4501                         die("failure: fchownat");
4502                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
4503                         die("failure: expected_uid_gid");
4504
4505                 if (!switch_ids(1000, 1000))
4506                         die("failure: switch_ids");
4507
4508                 if (unlinkat(dir_fd, FILE1, 0))
4509                         die("failure: unlinkat");
4510
4511                 if (unlinkat(dir_fd, FILE2, 0))
4512                         die("failure: unlinkat");
4513
4514                 exit(EXIT_SUCCESS);
4515         }
4516         if (wait_for_pid(pid)) {
4517                 log_stderr("failure: wait_for_pid");
4518                 goto out;
4519         }
4520
4521         /* change uid to unprivileged user */
4522         if (fchown(dir_fd, 1000, -1)) {
4523                 log_stderr("failure: fchown");
4524                 goto out;
4525         }
4526         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4527                 log_stderr("failure: fchmod");
4528                 goto out;
4529         }
4530         /* validate sticky bit is set */
4531         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4532                 log_stderr("failure: is_sticky");
4533                 goto out;
4534         }
4535
4536         /* create regular file via mknod */
4537         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4538                 log_stderr("failure: mknodat");
4539                 goto out;
4540         }
4541         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4542                 log_stderr("failure: fchownat");
4543                 goto out;
4544         }
4545         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4546                 log_stderr("failure: fchmodat");
4547                 goto out;
4548         }
4549
4550         /* create regular file via mknod */
4551         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4552                 log_stderr("failure: mknodat");
4553                 goto out;
4554         }
4555         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4556                 log_stderr("failure: fchownat");
4557                 goto out;
4558         }
4559         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4560                 log_stderr("failure: fchmodat");
4561                 goto out;
4562         }
4563
4564         /* The sticky bit is set and we own the directory so we must be able to
4565          * delete the files now.
4566          */
4567         pid = fork();
4568         if (pid < 0) {
4569                 log_stderr("failure: fork");
4570                 goto out;
4571         }
4572         if (pid == 0) {
4573                 if (!switch_ids(1000, 1000))
4574                         die("failure: switch_ids");
4575
4576                 if (unlinkat(dir_fd, FILE1, 0))
4577                         die("failure: unlinkat");
4578
4579                 if (unlinkat(dir_fd, FILE2, 0))
4580                         die("failure: unlinkat");
4581
4582                 exit(EXIT_SUCCESS);
4583         }
4584         if (wait_for_pid(pid)) {
4585                 log_stderr("failure: wait_for_pid");
4586                 goto out;
4587         }
4588
4589         fret = 0;
4590         log_debug("Ran test");
4591 out:
4592         safe_close(dir_fd);
4593
4594         return fret;
4595 }
4596
4597 static int sticky_bit_unlink_idmapped_mounts(void)
4598 {
4599         int fret = -1;
4600         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4601         struct mount_attr attr = {
4602                 .attr_set = MOUNT_ATTR_IDMAP,
4603         };
4604         pid_t pid;
4605
4606         if (!caps_supported())
4607                 return 0;
4608
4609         /* create directory */
4610         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4611                 log_stderr("failure: mkdirat");
4612                 goto out;
4613         }
4614
4615         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4616         if (dir_fd < 0) {
4617                 log_stderr("failure: openat");
4618                 goto out;
4619         }
4620         if (fchown(dir_fd, 10000, 10000)) {
4621                 log_stderr("failure: fchown");
4622                 goto out;
4623         }
4624         if (fchmod(dir_fd, 0777)) {
4625                 log_stderr("failure: fchmod");
4626                 goto out;
4627         }
4628
4629         /* create regular file via mknod */
4630         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4631                 log_stderr("failure: mknodat");
4632                 goto out;
4633         }
4634         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4635                 log_stderr("failure: fchownat");
4636                 goto out;
4637         }
4638         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4639                 log_stderr("failure: fchmodat");
4640                 goto out;
4641         }
4642
4643         /* create regular file via mknod */
4644         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4645                 log_stderr("failure: mknodat");
4646                 goto out;
4647         }
4648         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4649                 log_stderr("failure: fchownat");
4650                 goto out;
4651         }
4652         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4653                 log_stderr("failure: fchmodat");
4654                 goto out;
4655         }
4656
4657         /* Changing mount properties on a detached mount. */
4658         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
4659         if (attr.userns_fd < 0) {
4660                 log_stderr("failure: get_userns_fd");
4661                 goto out;
4662         }
4663
4664         open_tree_fd = sys_open_tree(dir_fd, "",
4665                                      AT_EMPTY_PATH |
4666                                      AT_NO_AUTOMOUNT |
4667                                      AT_SYMLINK_NOFOLLOW |
4668                                      OPEN_TREE_CLOEXEC |
4669                                      OPEN_TREE_CLONE);
4670         if (open_tree_fd < 0) {
4671                 log_stderr("failure: sys_open_tree");
4672                 goto out;
4673         }
4674
4675         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4676                 log_stderr("failure: sys_mount_setattr");
4677                 goto out;
4678         }
4679
4680         /* The sticky bit is not set so we must be able to delete files not
4681          * owned by us.
4682          */
4683         pid = fork();
4684         if (pid < 0) {
4685                 log_stderr("failure: fork");
4686                 goto out;
4687         }
4688         if (pid == 0) {
4689                 if (!switch_ids(1000, 1000))
4690                         die("failure: switch_ids");
4691
4692                 if (unlinkat(open_tree_fd, FILE1, 0))
4693                         die("failure: unlinkat");
4694
4695                 if (unlinkat(open_tree_fd, FILE2, 0))
4696                         die("failure: unlinkat");
4697
4698                 exit(EXIT_SUCCESS);
4699         }
4700         if (wait_for_pid(pid)) {
4701                 log_stderr("failure: wait_for_pid");
4702                 goto out;
4703         }
4704
4705         /* set sticky bit */
4706         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4707                 log_stderr("failure: fchmod");
4708                 goto out;
4709         }
4710
4711         /* validate sticky bit is set */
4712         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4713                 log_stderr("failure: is_sticky");
4714                 goto out;
4715         }
4716
4717         /* create regular file via mknod */
4718         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4719                 log_stderr("failure: mknodat");
4720                 goto out;
4721         }
4722         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4723                 log_stderr("failure: fchownat");
4724                 goto out;
4725         }
4726         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4727                 log_stderr("failure: fchmodat");
4728                 goto out;
4729         }
4730
4731         /* create regular file via mknod */
4732         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4733                 log_stderr("failure: mknodat");
4734                 goto out;
4735         }
4736         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4737                 log_stderr("failure: fchownat");
4738                 goto out;
4739         }
4740         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4741                 log_stderr("failure: fchmodat");
4742                 goto out;
4743         }
4744
4745         /* The sticky bit is set so we must not be able to delete files not
4746          * owned by us.
4747          */
4748         pid = fork();
4749         if (pid < 0) {
4750                 log_stderr("failure: fork");
4751                 goto out;
4752         }
4753         if (pid == 0) {
4754                 if (!switch_ids(1000, 1000))
4755                         die("failure: switch_ids");
4756
4757                 if (!unlinkat(open_tree_fd, FILE1, 0))
4758                         die("failure: unlinkat");
4759                 if (errno != EPERM)
4760                         die("failure: errno");
4761
4762                 if (!unlinkat(open_tree_fd, FILE2, 0))
4763                         die("failure: unlinkat");
4764                 if (errno != EPERM)
4765                         die("failure: errno");
4766
4767                 exit(EXIT_SUCCESS);
4768         }
4769         if (wait_for_pid(pid)) {
4770                 log_stderr("failure: wait_for_pid");
4771                 goto out;
4772         }
4773
4774         /* The sticky bit is set and we own the files so we must be able to
4775          * delete the files now.
4776          */
4777         pid = fork();
4778         if (pid < 0) {
4779                 log_stderr("failure: fork");
4780                 goto out;
4781         }
4782         if (pid == 0) {
4783                 /* change ownership */
4784                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
4785                         die("failure: fchownat");
4786                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
4787                         die("failure: expected_uid_gid");
4788                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
4789                         die("failure: fchownat");
4790                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
4791                         die("failure: expected_uid_gid");
4792
4793                 if (!switch_ids(1000, 1000))
4794                         die("failure: switch_ids");
4795
4796                 if (unlinkat(open_tree_fd, FILE1, 0))
4797                         die("failure: unlinkat");
4798
4799                 if (unlinkat(open_tree_fd, FILE2, 0))
4800                         die("failure: unlinkat");
4801
4802                 exit(EXIT_SUCCESS);
4803         }
4804         if (wait_for_pid(pid)) {
4805                 log_stderr("failure: wait_for_pid");
4806                 goto out;
4807         }
4808
4809         /* change uid to unprivileged user */
4810         if (fchown(dir_fd, 11000, -1)) {
4811                 log_stderr("failure: fchown");
4812                 goto out;
4813         }
4814         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
4815                 log_stderr("failure: fchmod");
4816                 goto out;
4817         }
4818         /* validate sticky bit is set */
4819         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
4820                 log_stderr("failure: is_sticky");
4821                 goto out;
4822         }
4823
4824         /* create regular file via mknod */
4825         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4826                 log_stderr("failure: mknodat");
4827                 goto out;
4828         }
4829         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
4830                 log_stderr("failure: fchownat");
4831                 goto out;
4832         }
4833         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4834                 log_stderr("failure: fchmodat");
4835                 goto out;
4836         }
4837
4838         /* create regular file via mknod */
4839         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4840                 log_stderr("failure: mknodat");
4841                 goto out;
4842         }
4843         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
4844                 log_stderr("failure: fchownat");
4845                 goto out;
4846         }
4847         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4848                 log_stderr("failure: fchmodat");
4849                 goto out;
4850         }
4851
4852         /* The sticky bit is set and we own the directory so we must be able to
4853          * delete the files now.
4854          */
4855         pid = fork();
4856         if (pid < 0) {
4857                 log_stderr("failure: fork");
4858                 goto out;
4859         }
4860         if (pid == 0) {
4861                 if (!switch_ids(1000, 1000))
4862                         die("failure: switch_ids");
4863
4864                 if (unlinkat(open_tree_fd, FILE1, 0))
4865                         die("failure: unlinkat");
4866
4867                 if (unlinkat(open_tree_fd, FILE2, 0))
4868                         die("failure: unlinkat");
4869
4870                 exit(EXIT_SUCCESS);
4871         }
4872         if (wait_for_pid(pid)) {
4873                 log_stderr("failure: wait_for_pid");
4874                 goto out;
4875         }
4876
4877         fret = 0;
4878         log_debug("Ran test");
4879 out:
4880         safe_close(attr.userns_fd);
4881         safe_close(dir_fd);
4882         safe_close(open_tree_fd);
4883
4884         return fret;
4885 }
4886
4887 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
4888  * operations in a user namespace.
4889  */
4890 static int sticky_bit_unlink_idmapped_mounts_in_userns(void)
4891 {
4892         int fret = -1;
4893         int dir_fd = -EBADF, open_tree_fd = -EBADF;
4894         struct mount_attr attr = {
4895                 .attr_set = MOUNT_ATTR_IDMAP,
4896         };
4897         pid_t pid;
4898
4899         if (!caps_supported())
4900                 return 0;
4901
4902         /* create directory */
4903         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
4904                 log_stderr("failure: mkdirat");
4905                 goto out;
4906         }
4907
4908         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
4909         if (dir_fd < 0) {
4910                 log_stderr("failure: openat");
4911                 goto out;
4912         }
4913         if (fchown(dir_fd, 0, 0)) {
4914                 log_stderr("failure: fchown");
4915                 goto out;
4916         }
4917         if (fchmod(dir_fd, 0777)) {
4918                 log_stderr("failure: fchmod");
4919                 goto out;
4920         }
4921
4922         /* create regular file via mknod */
4923         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
4924                 log_stderr("failure: mknodat");
4925                 goto out;
4926         }
4927         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
4928                 log_stderr("failure: fchownat");
4929                 goto out;
4930         }
4931         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
4932                 log_stderr("failure: fchmodat");
4933                 goto out;
4934         }
4935
4936         /* create regular file via mknod */
4937         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
4938                 log_stderr("failure: mknodat");
4939                 goto out;
4940         }
4941         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
4942                 log_stderr("failure: fchownat");
4943                 goto out;
4944         }
4945         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
4946                 log_stderr("failure: fchmodat");
4947                 goto out;
4948         }
4949
4950         /* Changing mount properties on a detached mount. */
4951         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
4952         if (attr.userns_fd < 0) {
4953                 log_stderr("failure: get_userns_fd");
4954                 goto out;
4955         }
4956
4957         open_tree_fd = sys_open_tree(dir_fd, "",
4958                                      AT_EMPTY_PATH |
4959                                      AT_NO_AUTOMOUNT |
4960                                      AT_SYMLINK_NOFOLLOW |
4961                                      OPEN_TREE_CLOEXEC |
4962                                      OPEN_TREE_CLONE);
4963         if (open_tree_fd < 0) {
4964                 log_stderr("failure: sys_open_tree");
4965                 goto out;
4966         }
4967
4968         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
4969                 log_stderr("failure: sys_mount_setattr");
4970                 goto out;
4971         }
4972
4973         /* The sticky bit is not set so we must be able to delete files not
4974          * owned by us.
4975          */
4976         pid = fork();
4977         if (pid < 0) {
4978                 log_stderr("failure: fork");
4979                 goto out;
4980         }
4981         if (pid == 0) {
4982                 if (!caps_supported()) {
4983                         log_debug("skip: capability library not installed");
4984                         exit(EXIT_SUCCESS);
4985                 }
4986
4987                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
4988                         die("failure: switch_userns");
4989
4990                 if (unlinkat(dir_fd, FILE1, 0))
4991                         die("failure: unlinkat");
4992
4993                 if (unlinkat(dir_fd, FILE2, 0))
4994                         die("failure: unlinkat");
4995
4996                 exit(EXIT_SUCCESS);
4997         }
4998         if (wait_for_pid(pid)) {
4999                 log_stderr("failure: wait_for_pid");
5000                 goto out;
5001         }
5002
5003         /* set sticky bit */
5004         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5005                 log_stderr("failure: fchmod");
5006                 goto out;
5007         }
5008
5009         /* validate sticky bit is set */
5010         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5011                 log_stderr("failure: is_sticky");
5012                 goto out;
5013         }
5014
5015         /* create regular file via mknod */
5016         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5017                 log_stderr("failure: mknodat");
5018                 goto out;
5019         }
5020         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5021                 log_stderr("failure: fchownat");
5022                 goto out;
5023         }
5024         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5025                 log_stderr("failure: fchmodat");
5026                 goto out;
5027         }
5028
5029         /* create regular file via mknod */
5030         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5031                 log_stderr("failure: mknodat");
5032                 goto out;
5033         }
5034         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5035                 log_stderr("failure: fchownat");
5036                 goto out;
5037         }
5038         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5039                 log_stderr("failure: fchmodat");
5040                 goto out;
5041         }
5042
5043         /* The sticky bit is set so we must not be able to delete files not
5044          * owned by us.
5045          */
5046         pid = fork();
5047         if (pid < 0) {
5048                 log_stderr("failure: fork");
5049                 goto out;
5050         }
5051         if (pid == 0) {
5052                 if (!caps_supported()) {
5053                         log_debug("skip: capability library not installed");
5054                         exit(EXIT_SUCCESS);
5055                 }
5056
5057                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5058                         die("failure: switch_userns");
5059
5060                 if (!unlinkat(dir_fd, FILE1, 0))
5061                         die("failure: unlinkat");
5062                 if (errno != EPERM)
5063                         die("failure: errno");
5064
5065                 if (!unlinkat(dir_fd, FILE2, 0))
5066                         die("failure: unlinkat");
5067                 if (errno != EPERM)
5068                         die("failure: errno");
5069
5070                 if (!unlinkat(open_tree_fd, FILE1, 0))
5071                         die("failure: unlinkat");
5072                 if (errno != EPERM)
5073                         die("failure: errno");
5074
5075                 if (!unlinkat(open_tree_fd, FILE2, 0))
5076                         die("failure: unlinkat");
5077                 if (errno != EPERM)
5078                         die("failure: errno");
5079
5080                 exit(EXIT_SUCCESS);
5081         }
5082         if (wait_for_pid(pid)) {
5083                 log_stderr("failure: wait_for_pid");
5084                 goto out;
5085         }
5086
5087         /* The sticky bit is set and we own the files so we must be able to
5088          * delete the files now.
5089          */
5090         pid = fork();
5091         if (pid < 0) {
5092                 log_stderr("failure: fork");
5093                 goto out;
5094         }
5095         if (pid == 0) {
5096                 /* change ownership */
5097                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5098                         die("failure: fchownat");
5099                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5100                         die("failure: expected_uid_gid");
5101                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5102                         die("failure: fchownat");
5103                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5104                         die("failure: expected_uid_gid");
5105
5106                 if (!caps_supported()) {
5107                         log_debug("skip: capability library not installed");
5108                         exit(EXIT_SUCCESS);
5109                 }
5110
5111                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5112                         die("failure: switch_userns");
5113
5114                 if (!unlinkat(dir_fd, FILE1, 0))
5115                         die("failure: unlinkat");
5116                 if (errno != EPERM)
5117                         die("failure: errno");
5118
5119                 if (!unlinkat(dir_fd, FILE2, 0))
5120                         die("failure: unlinkat");
5121                 if (errno != EPERM)
5122                         die("failure: errno");
5123
5124                 if (unlinkat(open_tree_fd, FILE1, 0))
5125                         die("failure: unlinkat");
5126
5127                 if (unlinkat(open_tree_fd, FILE2, 0))
5128                         die("failure: unlinkat");
5129
5130                 exit(EXIT_SUCCESS);
5131         }
5132         if (wait_for_pid(pid)) {
5133                 log_stderr("failure: wait_for_pid");
5134                 goto out;
5135         }
5136
5137         /* change uid to unprivileged user */
5138         if (fchown(dir_fd, 1000, -1)) {
5139                 log_stderr("failure: fchown");
5140                 goto out;
5141         }
5142         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5143                 log_stderr("failure: fchmod");
5144                 goto out;
5145         }
5146         /* validate sticky bit is set */
5147         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5148                 log_stderr("failure: is_sticky");
5149                 goto out;
5150         }
5151
5152         /* create regular file via mknod */
5153         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5154                 log_stderr("failure: mknodat");
5155                 goto out;
5156         }
5157         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5158                 log_stderr("failure: fchownat");
5159                 goto out;
5160         }
5161         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5162                 log_stderr("failure: fchmodat");
5163                 goto out;
5164         }
5165
5166         /* create regular file via mknod */
5167         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5168                 log_stderr("failure: mknodat");
5169                 goto out;
5170         }
5171         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5172                 log_stderr("failure: fchownat");
5173                 goto out;
5174         }
5175         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5176                 log_stderr("failure: fchmodat");
5177                 goto out;
5178         }
5179
5180         /* The sticky bit is set and we own the directory so we must be able to
5181          * delete the files now.
5182          */
5183         pid = fork();
5184         if (pid < 0) {
5185                 log_stderr("failure: fork");
5186                 goto out;
5187         }
5188         if (pid == 0) {
5189                 if (!caps_supported()) {
5190                         log_debug("skip: capability library not installed");
5191                         exit(EXIT_SUCCESS);
5192                 }
5193
5194                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5195                         die("failure: switch_userns");
5196
5197                 /* we don't own the directory from the original mount */
5198                 if (!unlinkat(dir_fd, FILE1, 0))
5199                         die("failure: unlinkat");
5200                 if (errno != EPERM)
5201                         die("failure: errno");
5202
5203                 if (!unlinkat(dir_fd, FILE2, 0))
5204                         die("failure: unlinkat");
5205                 if (errno != EPERM)
5206                         die("failure: errno");
5207
5208                 /* we own the file from the idmapped mount */
5209                 if (unlinkat(open_tree_fd, FILE1, 0))
5210                         die("failure: unlinkat");
5211                 if (unlinkat(open_tree_fd, FILE2, 0))
5212                         die("failure: unlinkat");
5213
5214                 exit(EXIT_SUCCESS);
5215         }
5216         if (wait_for_pid(pid)) {
5217                 log_stderr("failure: wait_for_pid");
5218                 goto out;
5219         }
5220
5221         fret = 0;
5222         log_debug("Ran test");
5223 out:
5224         safe_close(attr.userns_fd);
5225         safe_close(dir_fd);
5226         safe_close(open_tree_fd);
5227
5228         return fret;
5229 }
5230
5231 static int sticky_bit_rename(void)
5232 {
5233         int fret = -1;
5234         int dir_fd = -EBADF;
5235         pid_t pid;
5236
5237         if (!caps_supported())
5238                 return 0;
5239
5240         /* create directory */
5241         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5242                 log_stderr("failure: mkdirat");
5243                 goto out;
5244         }
5245
5246         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5247         if (dir_fd < 0) {
5248                 log_stderr("failure: openat");
5249                 goto out;
5250         }
5251         if (fchown(dir_fd, 0, 0)) {
5252                 log_stderr("failure: fchown");
5253                 goto out;
5254         }
5255         if (fchmod(dir_fd, 0777)) {
5256                 log_stderr("failure: fchmod");
5257                 goto out;
5258         }
5259
5260         /* create regular file via mknod */
5261         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5262                 log_stderr("failure: mknodat");
5263                 goto out;
5264         }
5265         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5266                 log_stderr("failure: fchownat");
5267                 goto out;
5268         }
5269         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5270                 log_stderr("failure: fchmodat");
5271                 goto out;
5272         }
5273
5274         /* create regular file via mknod */
5275         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5276                 log_stderr("failure: mknodat");
5277                 goto out;
5278         }
5279         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5280                 log_stderr("failure: fchownat");
5281                 goto out;
5282         }
5283         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5284                 log_stderr("failure: fchmodat");
5285                 goto out;
5286         }
5287
5288         /* The sticky bit is not set so we must be able to delete files not
5289          * owned by us.
5290          */
5291         pid = fork();
5292         if (pid < 0) {
5293                 log_stderr("failure: fork");
5294                 goto out;
5295         }
5296         if (pid == 0) {
5297                 if (!switch_ids(1000, 1000))
5298                         die("failure: switch_ids");
5299
5300                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5301                         die("failure: renameat");
5302
5303                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5304                         die("failure: renameat");
5305
5306                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5307                         die("failure: renameat");
5308
5309                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5310                         die("failure: renameat");
5311
5312                 exit(EXIT_SUCCESS);
5313         }
5314         if (wait_for_pid(pid)) {
5315                 log_stderr("failure: wait_for_pid");
5316                 goto out;
5317         }
5318
5319         /* set sticky bit */
5320         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5321                 log_stderr("failure: fchmod");
5322                 goto out;
5323         }
5324
5325         /* validate sticky bit is set */
5326         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5327                 log_stderr("failure: is_sticky");
5328                 goto out;
5329         }
5330
5331         /* The sticky bit is set so we must not be able to delete files not
5332          * owned by us.
5333          */
5334         pid = fork();
5335         if (pid < 0) {
5336                 log_stderr("failure: fork");
5337                 goto out;
5338         }
5339         if (pid == 0) {
5340                 if (!switch_ids(1000, 1000))
5341                         die("failure: switch_ids");
5342
5343                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5344                         die("failure: renameat");
5345                 if (errno != EPERM)
5346                         die("failure: errno");
5347
5348                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5349                         die("failure: renameat");
5350                 if (errno != EPERM)
5351                         die("failure: errno");
5352
5353                 exit(EXIT_SUCCESS);
5354         }
5355         if (wait_for_pid(pid)) {
5356                 log_stderr("failure: wait_for_pid");
5357                 goto out;
5358         }
5359
5360         /* The sticky bit is set and we own the files so we must be able to
5361          * delete the files now.
5362          */
5363         pid = fork();
5364         if (pid < 0) {
5365                 log_stderr("failure: fork");
5366                 goto out;
5367         }
5368         if (pid == 0) {
5369                 /* change ownership */
5370                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5371                         die("failure: fchownat");
5372                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5373                         die("failure: expected_uid_gid");
5374                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5375                         die("failure: fchownat");
5376                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5377                         die("failure: expected_uid_gid");
5378
5379                 if (!switch_ids(1000, 1000))
5380                         die("failure: switch_ids");
5381
5382                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5383                         die("failure: renameat");
5384
5385                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5386                         die("failure: renameat");
5387
5388                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5389                         die("failure: renameat");
5390
5391                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5392                         die("failure: renameat");
5393
5394                 exit(EXIT_SUCCESS);
5395         }
5396         if (wait_for_pid(pid)) {
5397                 log_stderr("failure: wait_for_pid");
5398                 goto out;
5399         }
5400
5401         /* change uid to unprivileged user */
5402         if (fchown(dir_fd, 1000, -1)) {
5403                 log_stderr("failure: fchown");
5404                 goto out;
5405         }
5406         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5407                 log_stderr("failure: fchmod");
5408                 goto out;
5409         }
5410         /* validate sticky bit is set */
5411         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5412                 log_stderr("failure: is_sticky");
5413                 goto out;
5414         }
5415
5416
5417         /* The sticky bit is set and we own the directory so we must be able to
5418          * delete the files now.
5419          */
5420         pid = fork();
5421         if (pid < 0) {
5422                 log_stderr("failure: fork");
5423                 goto out;
5424         }
5425         if (pid == 0) {
5426                 if (!switch_ids(1000, 1000))
5427                         die("failure: switch_ids");
5428
5429                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5430                         die("failure: renameat");
5431
5432                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5433                         die("failure: renameat");
5434
5435                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5436                         die("failure: renameat");
5437
5438                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5439                         die("failure: renameat");
5440
5441                 exit(EXIT_SUCCESS);
5442         }
5443         if (wait_for_pid(pid)) {
5444                 log_stderr("failure: wait_for_pid");
5445                 goto out;
5446         }
5447
5448         fret = 0;
5449         log_debug("Ran test");
5450 out:
5451         safe_close(dir_fd);
5452
5453         return fret;
5454 }
5455
5456 static int sticky_bit_rename_idmapped_mounts(void)
5457 {
5458         int fret = -1;
5459         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5460         struct mount_attr attr = {
5461                 .attr_set = MOUNT_ATTR_IDMAP,
5462         };
5463         pid_t pid;
5464
5465         if (!caps_supported())
5466                 return 0;
5467
5468         /* create directory */
5469         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5470                 log_stderr("failure: mkdirat");
5471                 goto out;
5472         }
5473
5474         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5475         if (dir_fd < 0) {
5476                 log_stderr("failure: openat");
5477                 goto out;
5478         }
5479
5480         if (fchown(dir_fd, 10000, 10000)) {
5481                 log_stderr("failure: fchown");
5482                 goto out;
5483         }
5484
5485         if (fchmod(dir_fd, 0777)) {
5486                 log_stderr("failure: fchmod");
5487                 goto out;
5488         }
5489
5490         /* create regular file via mknod */
5491         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5492                 log_stderr("failure: mknodat");
5493                 goto out;
5494         }
5495         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
5496                 log_stderr("failure: fchownat");
5497                 goto out;
5498         }
5499         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5500                 log_stderr("failure: fchmodat");
5501                 goto out;
5502         }
5503
5504         /* create regular file via mknod */
5505         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5506                 log_stderr("failure: mknodat");
5507                 goto out;
5508         }
5509         if (fchownat(dir_fd, FILE2, 12000, 12000, 0)) {
5510                 log_stderr("failure: fchownat");
5511                 goto out;
5512         }
5513         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5514                 log_stderr("failure: fchmodat");
5515                 goto out;
5516         }
5517
5518         /* Changing mount properties on a detached mount. */
5519         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
5520         if (attr.userns_fd < 0) {
5521                 log_stderr("failure: get_userns_fd");
5522                 goto out;
5523         }
5524
5525         open_tree_fd = sys_open_tree(dir_fd, "",
5526                                      AT_EMPTY_PATH |
5527                                      AT_NO_AUTOMOUNT |
5528                                      AT_SYMLINK_NOFOLLOW |
5529                                      OPEN_TREE_CLOEXEC |
5530                                      OPEN_TREE_CLONE);
5531         if (open_tree_fd < 0) {
5532                 log_stderr("failure: sys_open_tree");
5533                 goto out;
5534         }
5535
5536         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5537                 log_stderr("failure: sys_mount_setattr");
5538                 goto out;
5539         }
5540
5541         /* The sticky bit is not set so we must be able to delete files not
5542          * owned by us.
5543          */
5544         pid = fork();
5545         if (pid < 0) {
5546                 log_stderr("failure: fork");
5547                 goto out;
5548         }
5549         if (pid == 0) {
5550                 if (!switch_ids(1000, 1000))
5551                         die("failure: switch_ids");
5552
5553                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5554                         die("failure: renameat");
5555
5556                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5557                         die("failure: renameat");
5558
5559                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5560                         die("failure: renameat");
5561
5562                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5563                         die("failure: renameat");
5564
5565                 exit(EXIT_SUCCESS);
5566         }
5567         if (wait_for_pid(pid)) {
5568                 log_stderr("failure: wait_for_pid");
5569                 goto out;
5570         }
5571
5572         /* set sticky bit */
5573         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5574                 log_stderr("failure: fchmod");
5575                 goto out;
5576         }
5577
5578         /* validate sticky bit is set */
5579         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5580                 log_stderr("failure: is_sticky");
5581                 goto out;
5582         }
5583
5584         /* The sticky bit is set so we must not be able to delete files not
5585          * owned by us.
5586          */
5587         pid = fork();
5588         if (pid < 0) {
5589                 log_stderr("failure: fork");
5590                 goto out;
5591         }
5592         if (pid == 0) {
5593                 if (!switch_ids(1000, 1000))
5594                         die("failure: switch_ids");
5595
5596                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5597                         die("failure: renameat");
5598                 if (errno != EPERM)
5599                         die("failure: errno");
5600
5601                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5602                         die("failure: renameat");
5603                 if (errno != EPERM)
5604                         die("failure: errno");
5605
5606                 exit(EXIT_SUCCESS);
5607         }
5608         if (wait_for_pid(pid)) {
5609                 log_stderr("failure: wait_for_pid");
5610                 goto out;
5611         }
5612
5613         /* The sticky bit is set and we own the files so we must be able to
5614          * delete the files now.
5615          */
5616         pid = fork();
5617         if (pid < 0) {
5618                 log_stderr("failure: fork");
5619                 goto out;
5620         }
5621         if (pid == 0) {
5622                 /* change ownership */
5623                 if (fchownat(dir_fd, FILE1, 11000, -1, 0))
5624                         die("failure: fchownat");
5625                 if (!expected_uid_gid(dir_fd, FILE1, 0, 11000, 10000))
5626                         die("failure: expected_uid_gid");
5627                 if (fchownat(dir_fd, FILE2, 11000, -1, 0))
5628                         die("failure: fchownat");
5629                 if (!expected_uid_gid(dir_fd, FILE2, 0, 11000, 12000))
5630                         die("failure: expected_uid_gid");
5631
5632                 if (!switch_ids(1000, 1000))
5633                         die("failure: switch_ids");
5634
5635                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5636                         die("failure: renameat");
5637
5638                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5639                         die("failure: renameat");
5640
5641                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5642                         die("failure: renameat");
5643
5644                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5645                         die("failure: renameat");
5646
5647                 exit(EXIT_SUCCESS);
5648         }
5649         if (wait_for_pid(pid)) {
5650                 log_stderr("failure: wait_for_pid");
5651                 goto out;
5652         }
5653
5654         /* change uid to unprivileged user */
5655         if (fchown(dir_fd, 11000, -1)) {
5656                 log_stderr("failure: fchown");
5657                 goto out;
5658         }
5659         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5660                 log_stderr("failure: fchmod");
5661                 goto out;
5662         }
5663         /* validate sticky bit is set */
5664         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5665                 log_stderr("failure: is_sticky");
5666                 goto out;
5667         }
5668
5669         /* The sticky bit is set and we own the directory so we must be able to
5670          * delete the files now.
5671          */
5672         pid = fork();
5673         if (pid < 0) {
5674                 log_stderr("failure: fork");
5675                 goto out;
5676         }
5677         if (pid == 0) {
5678                 if (!switch_ids(1000, 1000))
5679                         die("failure: switch_ids");
5680
5681                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5682                         die("failure: renameat");
5683
5684                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5685                         die("failure: renameat");
5686
5687                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5688                         die("failure: renameat");
5689
5690                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5691                         die("failure: renameat");
5692
5693                 exit(EXIT_SUCCESS);
5694         }
5695         if (wait_for_pid(pid)) {
5696                 log_stderr("failure: wait_for_pid");
5697                 goto out;
5698         }
5699
5700         fret = 0;
5701         log_debug("Ran test");
5702 out:
5703         safe_close(attr.userns_fd);
5704         safe_close(dir_fd);
5705         safe_close(open_tree_fd);
5706
5707         return fret;
5708 }
5709
5710 /* Validate that the sticky bit behaves correctly on idmapped mounts for unlink
5711  * operations in a user namespace.
5712  */
5713 static int sticky_bit_rename_idmapped_mounts_in_userns(void)
5714 {
5715         int fret = -1;
5716         int dir_fd = -EBADF, open_tree_fd = -EBADF;
5717         struct mount_attr attr = {
5718                 .attr_set = MOUNT_ATTR_IDMAP,
5719         };
5720         pid_t pid;
5721
5722         if (!caps_supported())
5723                 return 0;
5724
5725         /* create directory */
5726         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
5727                 log_stderr("failure: mkdirat");
5728                 goto out;
5729         }
5730
5731         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
5732         if (dir_fd < 0) {
5733                 log_stderr("failure: openat");
5734                 goto out;
5735         }
5736         if (fchown(dir_fd, 0, 0)) {
5737                 log_stderr("failure: fchown");
5738                 goto out;
5739         }
5740         if (fchmod(dir_fd, 0777)) {
5741                 log_stderr("failure: fchmod");
5742                 goto out;
5743         }
5744
5745         /* create regular file via mknod */
5746         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
5747                 log_stderr("failure: mknodat");
5748                 goto out;
5749         }
5750         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
5751                 log_stderr("failure: fchownat");
5752                 goto out;
5753         }
5754         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
5755                 log_stderr("failure: fchmodat");
5756                 goto out;
5757         }
5758
5759         /* create regular file via mknod */
5760         if (mknodat(dir_fd, FILE2, S_IFREG | 0000, 0)) {
5761                 log_stderr("failure: mknodat");
5762                 goto out;
5763         }
5764         if (fchownat(dir_fd, FILE2, 2000, 2000, 0)) {
5765                 log_stderr("failure: fchownat");
5766                 goto out;
5767         }
5768         if (fchmodat(dir_fd, FILE2, 0644, 0)) {
5769                 log_stderr("failure: fchmodat");
5770                 goto out;
5771         }
5772
5773         /* Changing mount properties on a detached mount. */
5774         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
5775         if (attr.userns_fd < 0) {
5776                 log_stderr("failure: get_userns_fd");
5777                 goto out;
5778         }
5779
5780         open_tree_fd = sys_open_tree(dir_fd, "",
5781                                      AT_EMPTY_PATH |
5782                                      AT_NO_AUTOMOUNT |
5783                                      AT_SYMLINK_NOFOLLOW |
5784                                      OPEN_TREE_CLOEXEC |
5785                                      OPEN_TREE_CLONE);
5786         if (open_tree_fd < 0) {
5787                 log_stderr("failure: sys_open_tree");
5788                 goto out;
5789         }
5790
5791         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
5792                 log_stderr("failure: sys_mount_setattr");
5793                 goto out;
5794         }
5795
5796         /* The sticky bit is not set so we must be able to delete files not
5797          * owned by us.
5798          */
5799         pid = fork();
5800         if (pid < 0) {
5801                 log_stderr("failure: fork");
5802                 goto out;
5803         }
5804         if (pid == 0) {
5805                 if (!caps_supported()) {
5806                         log_debug("skip: capability library not installed");
5807                         exit(EXIT_SUCCESS);
5808                 }
5809
5810                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5811                         die("failure: switch_userns");
5812
5813                 if (renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5814                         die("failure: renameat");
5815
5816                 if (renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5817                         die("failure: renameat");
5818
5819                 if (renameat(dir_fd, FILE1_RENAME, dir_fd, FILE1))
5820                         die("failure: renameat");
5821
5822                 if (renameat(dir_fd, FILE2_RENAME, dir_fd, FILE2))
5823                         die("failure: renameat");
5824
5825                 exit(EXIT_SUCCESS);
5826         }
5827         if (wait_for_pid(pid)) {
5828                 log_stderr("failure: wait_for_pid");
5829                 goto out;
5830         }
5831
5832         /* set sticky bit */
5833         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5834                 log_stderr("failure: fchmod");
5835                 goto out;
5836         }
5837
5838         /* validate sticky bit is set */
5839         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5840                 log_stderr("failure: is_sticky");
5841                 goto out;
5842         }
5843
5844         /* The sticky bit is set so we must not be able to delete files not
5845          * owned by us.
5846          */
5847         pid = fork();
5848         if (pid < 0) {
5849                 log_stderr("failure: fork");
5850                 goto out;
5851         }
5852         if (pid == 0) {
5853                 if (!caps_supported()) {
5854                         log_debug("skip: capability library not installed");
5855                         exit(EXIT_SUCCESS);
5856                 }
5857
5858                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5859                         die("failure: switch_userns");
5860
5861                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5862                         die("failure: renameat");
5863                 if (errno != EPERM)
5864                         die("failure: errno");
5865
5866                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5867                         die("failure: renameat");
5868                 if (errno != EPERM)
5869                         die("failure: errno");
5870
5871                 if (!renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5872                         die("failure: renameat");
5873                 if (errno != EPERM)
5874                         die("failure: errno");
5875
5876                 if (!renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5877                         die("failure: renameat");
5878                 if (errno != EPERM)
5879                         die("failure: errno");
5880
5881                 exit(EXIT_SUCCESS);
5882         }
5883         if (wait_for_pid(pid)) {
5884                 log_stderr("failure: wait_for_pid");
5885                 goto out;
5886         }
5887
5888         /* The sticky bit is set and we own the files so we must be able to
5889          * delete the files now.
5890          */
5891         pid = fork();
5892         if (pid < 0) {
5893                 log_stderr("failure: fork");
5894                 goto out;
5895         }
5896         if (pid == 0) {
5897                 /* change ownership */
5898                 if (fchownat(dir_fd, FILE1, 1000, -1, 0))
5899                         die("failure: fchownat");
5900                 if (!expected_uid_gid(dir_fd, FILE1, 0, 1000, 0))
5901                         die("failure: expected_uid_gid");
5902                 if (fchownat(dir_fd, FILE2, 1000, -1, 0))
5903                         die("failure: fchownat");
5904                 if (!expected_uid_gid(dir_fd, FILE2, 0, 1000, 2000))
5905                         die("failure: expected_uid_gid");
5906
5907                 if (!caps_supported()) {
5908                         log_debug("skip: capability library not installed");
5909                         exit(EXIT_SUCCESS);
5910                 }
5911
5912                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5913                         die("failure: switch_userns");
5914
5915                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5916                         die("failure: renameat");
5917                 if (errno != EPERM)
5918                         die("failure: errno");
5919
5920                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5921                         die("failure: renameat");
5922                 if (errno != EPERM)
5923                         die("failure: errno");
5924
5925                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5926                         die("failure: renameat");
5927
5928                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5929                         die("failure: renameat");
5930
5931                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5932                         die("failure: renameat");
5933
5934                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5935                         die("failure: renameat");
5936
5937                 exit(EXIT_SUCCESS);
5938         }
5939         if (wait_for_pid(pid)) {
5940                 log_stderr("failure: wait_for_pid");
5941                 goto out;
5942         }
5943
5944         /* change uid to unprivileged user */
5945         if (fchown(dir_fd, 1000, -1)) {
5946                 log_stderr("failure: fchown");
5947                 goto out;
5948         }
5949         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
5950                 log_stderr("failure: fchmod");
5951                 goto out;
5952         }
5953         /* validate sticky bit is set */
5954         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
5955                 log_stderr("failure: is_sticky");
5956                 goto out;
5957         }
5958
5959         /* The sticky bit is set and we own the directory so we must be able to
5960          * delete the files now.
5961          */
5962         pid = fork();
5963         if (pid < 0) {
5964                 log_stderr("failure: fork");
5965                 goto out;
5966         }
5967         if (pid == 0) {
5968                 if (!caps_supported()) {
5969                         log_debug("skip: capability library not installed");
5970                         exit(EXIT_SUCCESS);
5971                 }
5972
5973                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
5974                         die("failure: switch_userns");
5975
5976                 /* we don't own the directory from the original mount */
5977                 if (!renameat(dir_fd, FILE1, dir_fd, FILE1_RENAME))
5978                         die("failure: renameat");
5979                 if (errno != EPERM)
5980                         die("failure: errno");
5981
5982                 if (!renameat(dir_fd, FILE2, dir_fd, FILE2_RENAME))
5983                         die("failure: renameat");
5984                 if (errno != EPERM)
5985                         die("failure: errno");
5986
5987                 /* we own the file from the idmapped mount */
5988                 if (renameat(open_tree_fd, FILE1, open_tree_fd, FILE1_RENAME))
5989                         die("failure: renameat");
5990
5991                 if (renameat(open_tree_fd, FILE2, open_tree_fd, FILE2_RENAME))
5992                         die("failure: renameat");
5993
5994                 if (renameat(open_tree_fd, FILE1_RENAME, open_tree_fd, FILE1))
5995                         die("failure: renameat");
5996
5997                 if (renameat(open_tree_fd, FILE2_RENAME, open_tree_fd, FILE2))
5998                         die("failure: renameat");
5999
6000                 exit(EXIT_SUCCESS);
6001         }
6002         if (wait_for_pid(pid)) {
6003                 log_stderr("failure: wait_for_pid");
6004                 goto out;
6005         }
6006
6007         fret = 0;
6008         log_debug("Ran test");
6009 out:
6010         safe_close(open_tree_fd);
6011         safe_close(attr.userns_fd);
6012         safe_close(dir_fd);
6013
6014         return fret;
6015 }
6016
6017 /* Validate that protected symlinks work correctly. */
6018 static int protected_symlinks(void)
6019 {
6020         int fret = -1;
6021         int dir_fd = -EBADF, fd = -EBADF;
6022         pid_t pid;
6023
6024         if (!protected_symlinks_enabled())
6025                 return 0;
6026
6027         if (!caps_supported())
6028                 return 0;
6029
6030         /* create directory */
6031         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6032                 log_stderr("failure: mkdirat");
6033                 goto out;
6034         }
6035
6036         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6037         if (dir_fd < 0) {
6038                 log_stderr("failure: openat");
6039                 goto out;
6040         }
6041         if (fchown(dir_fd, 0, 0)) {
6042                 log_stderr("failure: fchown");
6043                 goto out;
6044         }
6045         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6046                 log_stderr("failure: fchmod");
6047                 goto out;
6048         }
6049         /* validate sticky bit is set */
6050         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6051                 log_stderr("failure: is_sticky");
6052                 goto out;
6053         }
6054
6055         /* create regular file via mknod */
6056         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6057                 log_stderr("failure: mknodat");
6058                 goto out;
6059         }
6060         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6061                 log_stderr("failure: fchownat");
6062                 goto out;
6063         }
6064         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6065                 log_stderr("failure: fchmodat");
6066                 goto out;
6067         }
6068
6069         /* create symlinks */
6070         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6071                 log_stderr("failure: symlinkat");
6072                 goto out;
6073         }
6074         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6075                 log_stderr("failure: fchownat");
6076                 goto out;
6077         }
6078         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6079                 log_stderr("failure: expected_uid_gid");
6080                 goto out;
6081         }
6082         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6083                 log_stderr("failure: expected_uid_gid");
6084                 goto out;
6085         }
6086
6087         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6088                 log_stderr("failure: symlinkat");
6089                 goto out;
6090         }
6091         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6092                 log_stderr("failure: fchownat");
6093                 goto out;
6094         }
6095         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6096                 log_stderr("failure: expected_uid_gid");
6097                 goto out;
6098         }
6099         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6100                 log_stderr("failure: expected_uid_gid");
6101                 goto out;
6102         }
6103
6104         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6105                 log_stderr("failure: symlinkat");
6106                 goto out;
6107         }
6108         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6109                 log_stderr("failure: fchownat");
6110                 goto out;
6111         }
6112         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6113                 log_stderr("failure: expected_uid_gid");
6114                 goto out;
6115         }
6116         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6117                 log_stderr("failure: expected_uid_gid");
6118                 goto out;
6119         }
6120
6121         /* validate file can be directly read */
6122         fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6123         if (fd < 0) {
6124                 log_stderr("failure: openat");
6125                 goto out;
6126         }
6127         safe_close(fd);
6128
6129         /* validate file can be read through own symlink */
6130         fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6131         if (fd < 0) {
6132                 log_stderr("failure: openat");
6133                 goto out;
6134         }
6135         safe_close(fd);
6136
6137         pid = fork();
6138         if (pid < 0) {
6139                 log_stderr("failure: fork");
6140                 goto out;
6141         }
6142         if (pid == 0) {
6143                 if (!switch_ids(1000, 1000))
6144                         die("failure: switch_ids");
6145
6146                 /* validate file can be directly read */
6147                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6148                 if (fd < 0)
6149                         die("failure: openat");
6150                 safe_close(fd);
6151
6152                 /* validate file can be read through own symlink */
6153                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6154                 if (fd < 0)
6155                         die("failure: openat");
6156                 safe_close(fd);
6157
6158                 /* validate file can be read through root symlink */
6159                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6160                 if (fd < 0)
6161                         die("failure: openat");
6162                 safe_close(fd);
6163
6164                 /* validate file can't be read through other users symlink */
6165                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6166                 if (fd >= 0)
6167                         die("failure: openat");
6168                 if (errno != EACCES)
6169                         die("failure: errno");
6170
6171                 exit(EXIT_SUCCESS);
6172         }
6173         if (wait_for_pid(pid)) {
6174                 log_stderr("failure: wait_for_pid");
6175                 goto out;
6176         }
6177
6178         pid = fork();
6179         if (pid < 0) {
6180                 log_stderr("failure: fork");
6181                 goto out;
6182         }
6183         if (pid == 0) {
6184                 if (!switch_ids(2000, 2000))
6185                         die("failure: switch_ids");
6186
6187                 /* validate file can be directly read */
6188                 fd = openat(dir_fd, FILE1, O_RDONLY | O_CLOEXEC, 0);
6189                 if (fd < 0)
6190                         die("failure: openat");
6191                 safe_close(fd);
6192
6193                 /* validate file can be read through own symlink */
6194                 fd = openat(dir_fd, SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6195                 if (fd < 0)
6196                         die("failure: openat");
6197                 safe_close(fd);
6198
6199                 /* validate file can be read through root symlink */
6200                 fd = openat(dir_fd, SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6201                 if (fd < 0)
6202                         die("failure: openat");
6203                 safe_close(fd);
6204
6205                 /* validate file can't be read through other users symlink */
6206                 fd = openat(dir_fd, SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6207                 if (fd >= 0)
6208                         die("failure: openat");
6209                 if (errno != EACCES)
6210                         die("failure: errno");
6211
6212                 exit(EXIT_SUCCESS);
6213         }
6214         if (wait_for_pid(pid)) {
6215                 log_stderr("failure: wait_for_pid");
6216                 goto out;
6217         }
6218
6219         fret = 0;
6220         log_debug("Ran test");
6221 out:
6222         safe_close(fd);
6223         safe_close(dir_fd);
6224
6225         return fret;
6226 }
6227
6228 /* Validate that protected symlinks work correctly on idmapped mounts. */
6229 static int protected_symlinks_idmapped_mounts(void)
6230 {
6231         int fret = -1;
6232         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6233         struct mount_attr attr = {
6234                 .attr_set = MOUNT_ATTR_IDMAP,
6235         };
6236         pid_t pid;
6237
6238         if (!protected_symlinks_enabled())
6239                 return 0;
6240
6241         if (!caps_supported())
6242                 return 0;
6243
6244         /* create directory */
6245         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6246                 log_stderr("failure: mkdirat");
6247                 goto out;
6248         }
6249
6250         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6251         if (dir_fd < 0) {
6252                 log_stderr("failure: openat");
6253                 goto out;
6254         }
6255         if (fchown(dir_fd, 10000, 10000)) {
6256                 log_stderr("failure: fchown");
6257                 goto out;
6258         }
6259         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6260                 log_stderr("failure: fchmod");
6261                 goto out;
6262         }
6263         /* validate sticky bit is set */
6264         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6265                 log_stderr("failure: is_sticky");
6266                 goto out;
6267         }
6268
6269         /* create regular file via mknod */
6270         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6271                 log_stderr("failure: mknodat");
6272                 goto out;
6273         }
6274         if (fchownat(dir_fd, FILE1, 10000, 10000, 0)) {
6275                 log_stderr("failure: fchownat");
6276                 goto out;
6277         }
6278         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6279                 log_stderr("failure: fchmodat");
6280                 goto out;
6281         }
6282
6283         /* create symlinks */
6284         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6285                 log_stderr("failure: symlinkat");
6286                 goto out;
6287         }
6288         if (fchownat(dir_fd, SYMLINK_USER1, 10000, 10000, AT_SYMLINK_NOFOLLOW)) {
6289                 log_stderr("failure: fchownat");
6290                 goto out;
6291         }
6292         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 10000, 10000)) {
6293                 log_stderr("failure: expected_uid_gid");
6294                 goto out;
6295         }
6296         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6297                 log_stderr("failure: expected_uid_gid");
6298                 goto out;
6299         }
6300
6301         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6302                 log_stderr("failure: symlinkat");
6303                 goto out;
6304         }
6305         if (fchownat(dir_fd, SYMLINK_USER2, 11000, 11000, AT_SYMLINK_NOFOLLOW)) {
6306                 log_stderr("failure: fchownat");
6307                 goto out;
6308         }
6309         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 11000, 11000)) {
6310                 log_stderr("failure: expected_uid_gid");
6311                 goto out;
6312         }
6313         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6314                 log_stderr("failure: expected_uid_gid");
6315                 goto out;
6316         }
6317
6318         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6319                 log_stderr("failure: symlinkat");
6320                 goto out;
6321         }
6322         if (fchownat(dir_fd, SYMLINK_USER3, 12000, 12000, AT_SYMLINK_NOFOLLOW)) {
6323                 log_stderr("failure: fchownat");
6324                 goto out;
6325         }
6326         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 12000, 12000)) {
6327                 log_stderr("failure: expected_uid_gid");
6328                 goto out;
6329         }
6330         if (!expected_uid_gid(dir_fd, FILE1, 0, 10000, 10000)) {
6331                 log_stderr("failure: expected_uid_gid");
6332                 goto out;
6333         }
6334
6335         /* Changing mount properties on a detached mount. */
6336         attr.userns_fd  = get_userns_fd(10000, 0, 10000);
6337         if (attr.userns_fd < 0) {
6338                 log_stderr("failure: get_userns_fd");
6339                 goto out;
6340         }
6341
6342         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6343                                      AT_EMPTY_PATH |
6344                                      AT_NO_AUTOMOUNT |
6345                                      AT_SYMLINK_NOFOLLOW |
6346                                      OPEN_TREE_CLOEXEC |
6347                                      OPEN_TREE_CLONE);
6348         if (open_tree_fd < 0) {
6349                 log_stderr("failure: open_tree_fd");
6350                 goto out;
6351         }
6352
6353         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6354                 log_stderr("failure: sys_mount_setattr");
6355                 goto out;
6356         }
6357
6358         /* validate file can be directly read */
6359         fd = openat(open_tree_fd, DIR1 "/"  FILE1, O_RDONLY | O_CLOEXEC, 0);
6360         if (fd < 0) {
6361                 log_stderr("failure: openat");
6362                 goto out;
6363         }
6364         safe_close(fd);
6365
6366         /* validate file can be read through own symlink */
6367         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6368         if (fd < 0) {
6369                 log_stderr("failure: openat");
6370                 goto out;
6371         }
6372         safe_close(fd);
6373
6374         pid = fork();
6375         if (pid < 0) {
6376                 log_stderr("failure: fork");
6377                 goto out;
6378         }
6379         if (pid == 0) {
6380                 if (!switch_ids(1000, 1000))
6381                         die("failure: switch_ids");
6382
6383                 /* validate file can be directly read */
6384                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6385                 if (fd < 0)
6386                         die("failure: openat");
6387                 safe_close(fd);
6388
6389                 /* validate file can be read through own symlink */
6390                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6391                 if (fd < 0)
6392                         die("failure: openat");
6393                 safe_close(fd);
6394
6395                 /* validate file can be read through root symlink */
6396                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6397                 if (fd < 0)
6398                         die("failure: openat");
6399                 safe_close(fd);
6400
6401                 /* validate file can't be read through other users symlink */
6402                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6403                 if (fd >= 0)
6404                         die("failure: openat");
6405                 if (errno != EACCES)
6406                         die("failure: errno");
6407
6408                 exit(EXIT_SUCCESS);
6409         }
6410         if (wait_for_pid(pid)) {
6411                 log_stderr("failure: wait_for_pid");
6412                 goto out;
6413         }
6414
6415         pid = fork();
6416         if (pid < 0) {
6417                 log_stderr("failure: fork");
6418                 goto out;
6419         }
6420         if (pid == 0) {
6421                 if (!switch_ids(2000, 2000))
6422                         die("failure: switch_ids");
6423
6424                 /* validate file can be directly read */
6425                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6426                 if (fd < 0)
6427                         die("failure: openat");
6428                 safe_close(fd);
6429
6430                 /* validate file can be read through own symlink */
6431                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6432                 if (fd < 0)
6433                         die("failure: openat");
6434                 safe_close(fd);
6435
6436                 /* validate file can be read through root symlink */
6437                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6438                 if (fd < 0)
6439                         die("failure: openat");
6440                 safe_close(fd);
6441
6442                 /* validate file can't be read through other users symlink */
6443                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6444                 if (fd >= 0)
6445                         die("failure: openat");
6446                 if (errno != EACCES)
6447                         die("failure: errno");
6448
6449                 exit(EXIT_SUCCESS);
6450         }
6451         if (wait_for_pid(pid)) {
6452                 log_stderr("failure: wait_for_pid");
6453                 goto out;
6454         }
6455
6456         fret = 0;
6457         log_debug("Ran test");
6458 out:
6459         safe_close(attr.userns_fd);
6460         safe_close(fd);
6461         safe_close(dir_fd);
6462         safe_close(open_tree_fd);
6463
6464         return fret;
6465 }
6466
6467 /* Validate that protected symlinks work correctly on idmapped mounts inside a
6468  * user namespace.
6469  */
6470 static int protected_symlinks_idmapped_mounts_in_userns(void)
6471 {
6472         int fret = -1;
6473         int dir_fd = -EBADF, fd = -EBADF, open_tree_fd = -EBADF;
6474         struct mount_attr attr = {
6475                 .attr_set = MOUNT_ATTR_IDMAP,
6476         };
6477         pid_t pid;
6478
6479         if (!protected_symlinks_enabled())
6480                 return 0;
6481
6482         if (!caps_supported())
6483                 return 0;
6484
6485         /* create directory */
6486         if (mkdirat(t_dir1_fd, DIR1, 0000)) {
6487                 log_stderr("failure: mkdirat");
6488                 goto out;
6489         }
6490
6491         dir_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6492         if (dir_fd < 0) {
6493                 log_stderr("failure: openat");
6494                 goto out;
6495         }
6496         if (fchown(dir_fd, 0, 0)) {
6497                 log_stderr("failure: fchown");
6498                 goto out;
6499         }
6500         if (fchmod(dir_fd, 0777 | S_ISVTX)) {
6501                 log_stderr("failure: fchmod");
6502                 goto out;
6503         }
6504         /* validate sticky bit is set */
6505         if (!is_sticky(t_dir1_fd, DIR1, 0)) {
6506                 log_stderr("failure: is_sticky");
6507                 goto out;
6508         }
6509
6510         /* create regular file via mknod */
6511         if (mknodat(dir_fd, FILE1, S_IFREG | 0000, 0)) {
6512                 log_stderr("failure: mknodat");
6513                 goto out;
6514         }
6515         if (fchownat(dir_fd, FILE1, 0, 0, 0)) {
6516                 log_stderr("failure: fchownat");
6517                 goto out;
6518         }
6519         if (fchmodat(dir_fd, FILE1, 0644, 0)) {
6520                 log_stderr("failure: fchmodat");
6521                 goto out;
6522         }
6523
6524         /* create symlinks */
6525         if (symlinkat(FILE1, dir_fd, SYMLINK_USER1)) {
6526                 log_stderr("failure: symlinkat");
6527                 goto out;
6528         }
6529         if (fchownat(dir_fd, SYMLINK_USER1, 0, 0, AT_SYMLINK_NOFOLLOW)) {
6530                 log_stderr("failure: fchownat");
6531                 goto out;
6532         }
6533         if (!expected_uid_gid(dir_fd, SYMLINK_USER1, AT_SYMLINK_NOFOLLOW, 0, 0)) {
6534                 log_stderr("failure: expected_uid_gid");
6535                 goto out;
6536         }
6537         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6538                 log_stderr("failure: expected_uid_gid");
6539                 goto out;
6540         }
6541
6542         if (symlinkat(FILE1, dir_fd, SYMLINK_USER2)) {
6543                 log_stderr("failure: symlinkat");
6544                 goto out;
6545         }
6546         if (fchownat(dir_fd, SYMLINK_USER2, 1000, 1000, AT_SYMLINK_NOFOLLOW)) {
6547                 log_stderr("failure: fchownat");
6548                 goto out;
6549         }
6550         if (!expected_uid_gid(dir_fd, SYMLINK_USER2, AT_SYMLINK_NOFOLLOW, 1000, 1000)) {
6551                 log_stderr("failure: expected_uid_gid");
6552                 goto out;
6553         }
6554         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6555                 log_stderr("failure: expected_uid_gid");
6556                 goto out;
6557         }
6558
6559         if (symlinkat(FILE1, dir_fd, SYMLINK_USER3)) {
6560                 log_stderr("failure: symlinkat");
6561                 goto out;
6562         }
6563         if (fchownat(dir_fd, SYMLINK_USER3, 2000, 2000, AT_SYMLINK_NOFOLLOW)) {
6564                 log_stderr("failure: fchownat");
6565                 goto out;
6566         }
6567         if (!expected_uid_gid(dir_fd, SYMLINK_USER3, AT_SYMLINK_NOFOLLOW, 2000, 2000)) {
6568                 log_stderr("failure: expected_uid_gid");
6569                 goto out;
6570         }
6571         if (!expected_uid_gid(dir_fd, FILE1, 0, 0, 0)) {
6572                 log_stderr("failure: expected_uid_gid");
6573                 goto out;
6574         }
6575
6576         /* Changing mount properties on a detached mount. */
6577         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
6578         if (attr.userns_fd < 0) {
6579                 log_stderr("failure: get_userns_fd");
6580                 goto out;
6581         }
6582
6583         open_tree_fd = sys_open_tree(t_dir1_fd, "",
6584                                      AT_EMPTY_PATH |
6585                                      AT_NO_AUTOMOUNT |
6586                                      AT_SYMLINK_NOFOLLOW |
6587                                      OPEN_TREE_CLOEXEC |
6588                                      OPEN_TREE_CLONE);
6589         if (open_tree_fd < 0) {
6590                 log_stderr("failure: sys_open_tree");
6591                 goto out;
6592         }
6593
6594         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6595                 log_stderr("failure: sys_mount_setattr");
6596                 goto out;
6597         }
6598
6599         /* validate file can be directly read */
6600         fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6601         if (fd < 0) {
6602                 log_stderr("failure: openat");
6603                 goto out;
6604         }
6605         safe_close(fd);
6606
6607         /* validate file can be read through own symlink */
6608         fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6609         if (fd < 0) {
6610                 log_stderr("failure: openat");
6611                 goto out;
6612         }
6613         safe_close(fd);
6614
6615         pid = fork();
6616         if (pid < 0) {
6617                 log_stderr("failure: fork");
6618                 goto out;
6619         }
6620         if (pid == 0) {
6621                 if (!caps_supported()) {
6622                         log_debug("skip: capability library not installed");
6623                         exit(EXIT_SUCCESS);
6624                 }
6625
6626                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
6627                         die("failure: switch_userns");
6628
6629                 /* validate file can be directly read */
6630                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6631                 if (fd < 0)
6632                         die("failure: openat");
6633                 safe_close(fd);
6634
6635                 /* validate file can be read through own symlink */
6636                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6637                 if (fd < 0)
6638                         die("failure: openat");
6639                 safe_close(fd);
6640
6641                 /* validate file can be read through root symlink */
6642                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6643                 if (fd < 0)
6644                         die("failure: openat");
6645                 safe_close(fd);
6646
6647                 /* validate file can't be read through other users symlink */
6648                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6649                 if (fd >= 0)
6650                         die("failure: openat");
6651                 if (errno != EACCES)
6652                         die("failure: errno");
6653
6654                 exit(EXIT_SUCCESS);
6655         }
6656         if (wait_for_pid(pid)) {
6657                 log_stderr("failure: wait_for_pid");
6658                 goto out;
6659         }
6660
6661         pid = fork();
6662         if (pid < 0) {
6663                 log_stderr("failure: fork");
6664                 goto out;
6665         }
6666         if (pid == 0) {
6667                 if (!caps_supported()) {
6668                         log_debug("skip: capability library not installed");
6669                         exit(EXIT_SUCCESS);
6670                 }
6671
6672                 if (!switch_userns(attr.userns_fd, 2000, 2000, true))
6673                         die("failure: switch_userns");
6674
6675                 /* validate file can be directly read */
6676                 fd = openat(open_tree_fd, DIR1 "/" FILE1, O_RDONLY | O_CLOEXEC, 0);
6677                 if (fd < 0)
6678                         die("failure: openat");
6679                 safe_close(fd);
6680
6681                 /* validate file can be read through own symlink */
6682                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER3, O_RDONLY | O_CLOEXEC, 0);
6683                 if (fd < 0)
6684                         die("failure: openat");
6685                 safe_close(fd);
6686
6687                 /* validate file can be read through root symlink */
6688                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER1, O_RDONLY | O_CLOEXEC, 0);
6689                 if (fd < 0)
6690                         die("failure: openat");
6691                 safe_close(fd);
6692
6693                 /* validate file can't be read through other users symlink */
6694                 fd = openat(open_tree_fd, DIR1 "/" SYMLINK_USER2, O_RDONLY | O_CLOEXEC, 0);
6695                 if (fd >= 0)
6696                         die("failure: openat");
6697                 if (errno != EACCES)
6698                         die("failure: errno");
6699
6700                 exit(EXIT_SUCCESS);
6701         }
6702         if (wait_for_pid(pid)) {
6703                 log_stderr("failure: wait_for_pid");
6704                 goto out;
6705         }
6706
6707         fret = 0;
6708         log_debug("Ran test");
6709 out:
6710         safe_close(dir_fd);
6711         safe_close(open_tree_fd);
6712         safe_close(attr.userns_fd);
6713
6714         return fret;
6715 }
6716
6717 static int acls(void)
6718 {
6719         int fret = -1;
6720         int dir1_fd = -EBADF, open_tree_fd = -EBADF;
6721         struct mount_attr attr = {
6722                 .attr_set = MOUNT_ATTR_IDMAP,
6723         };
6724         pid_t pid;
6725
6726         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
6727                 log_stderr("failure: mkdirat");
6728                 goto out;
6729         }
6730         if (fchmodat(t_dir1_fd, DIR1, 0777, 0)) {
6731                 log_stderr("failure: fchmodat");
6732                 goto out;
6733         }
6734
6735         if (mkdirat(t_dir1_fd, DIR2, 0777)) {
6736                 log_stderr("failure: mkdirat");
6737                 goto out;
6738         }
6739         if (fchmodat(t_dir1_fd, DIR2, 0777, 0)) {
6740                 log_stderr("failure: fchmodat");
6741                 goto out;
6742         }
6743
6744         /* Changing mount properties on a detached mount. */
6745         attr.userns_fd = get_userns_fd(100010, 100020, 5);
6746         if (attr.userns_fd < 0) {
6747                 log_stderr("failure: get_userns_fd");
6748                 goto out;
6749         }
6750
6751         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
6752                                      AT_NO_AUTOMOUNT |
6753                                      AT_SYMLINK_NOFOLLOW |
6754                                      OPEN_TREE_CLOEXEC |
6755                                      OPEN_TREE_CLONE);
6756         if (open_tree_fd < 0) {
6757                 log_stderr("failure: sys_open_tree");
6758                 goto out;
6759         }
6760
6761         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
6762                 log_stderr("failure: sys_mount_setattr");
6763                 goto out;
6764         }
6765
6766         if (sys_move_mount(open_tree_fd, "", t_dir1_fd, DIR2, MOVE_MOUNT_F_EMPTY_PATH)) {
6767                 log_stderr("failure: sys_move_mount");
6768                 goto out;
6769         }
6770
6771         dir1_fd = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
6772         if (dir1_fd < 0) {
6773                 log_stderr("failure: openat");
6774                 goto out;
6775         }
6776
6777         if (mkdirat(dir1_fd, DIR3, 0000)) {
6778                 log_stderr("failure: mkdirat");
6779                 goto out;
6780         }
6781         if (fchown(dir1_fd, 100010, 100010)) {
6782                 log_stderr("failure: fchown");
6783                 goto out;
6784         }
6785         if (fchmod(dir1_fd, 0777)) {
6786                 log_stderr("failure: fchmod");
6787                 goto out;
6788         }
6789
6790         snprintf(t_buf, sizeof(t_buf), "setfacl -m u:100010:rwx %s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6791         if (system(t_buf)) {
6792                 log_stderr("failure: system");
6793                 goto out;
6794         }
6795
6796         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100010:rwx", t_mountpoint, T_DIR1, DIR1, DIR3);
6797         if (system(t_buf)) {
6798                 log_stderr("failure: system");
6799                 goto out;
6800         }
6801
6802         snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:100020:rwx", t_mountpoint, T_DIR1, DIR2, DIR3);
6803         if (system(t_buf)) {
6804                 log_stderr("failure: system");
6805                 goto out;
6806         }
6807
6808         pid = fork();
6809         if (pid < 0) {
6810                 log_stderr("failure: fork");
6811                 goto out;
6812         }
6813         if (pid == 0) {
6814                 if (!caps_supported()) {
6815                         log_debug("skip: capability library not installed");
6816                         exit(EXIT_SUCCESS);
6817                 }
6818
6819                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6820                         die("failure: switch_userns");
6821
6822                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6823                          t_mountpoint, T_DIR1, DIR1, DIR3, 4294967295LU);
6824                 if (system(t_buf))
6825                         die("failure: system");
6826
6827                 exit(EXIT_SUCCESS);
6828         }
6829         if (wait_for_pid(pid)) {
6830                 log_stderr("failure: wait_for_pid");
6831                 goto out;
6832         }
6833
6834         pid = fork();
6835         if (pid < 0) {
6836                 log_stderr("failure: fork");
6837                 goto out;
6838         }
6839         if (pid == 0) {
6840                 if (!caps_supported()) {
6841                         log_debug("skip: capability library not installed");
6842                         exit(EXIT_SUCCESS);
6843                 }
6844
6845                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6846                         die("failure: switch_userns");
6847
6848                 snprintf(t_buf, sizeof(t_buf), "getfacl -p %s/%s/%s/%s | grep -q user:%lu:rwx",
6849                          t_mountpoint, T_DIR1, DIR2, DIR3, 100010LU);
6850                 if (system(t_buf))
6851                         die("failure: system");
6852
6853                 exit(EXIT_SUCCESS);
6854         }
6855         if (wait_for_pid(pid)) {
6856                 log_stderr("failure: wait_for_pid");
6857                 goto out;
6858         }
6859
6860         /* Now, dir is owned by someone else in the user namespace, but we can
6861          * still read it because of acls.
6862          */
6863         if (fchown(dir1_fd, 100012, 100012)) {
6864                 log_stderr("failure: fchown");
6865                 goto out;
6866         }
6867
6868         pid = fork();
6869         if (pid < 0) {
6870                 log_stderr("failure: fork");
6871                 goto out;
6872         }
6873         if (pid == 0) {
6874                 int fd;
6875
6876                 if (!caps_supported()) {
6877                         log_debug("skip: capability library not installed");
6878                         exit(EXIT_SUCCESS);
6879                 }
6880
6881                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6882                         die("failure: switch_userns");
6883
6884                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6885                 if (fd < 0)
6886                         die("failure: openat");
6887
6888                 exit(EXIT_SUCCESS);
6889         }
6890         if (wait_for_pid(pid)) {
6891                 log_stderr("failure: wait_for_pid");
6892                 goto out;
6893         }
6894
6895         /* if we delete the acls, the ls should fail because it's 700. */
6896         snprintf(t_buf, sizeof(t_buf), "%s/%s/%s/%s", t_mountpoint, T_DIR1, DIR1, DIR3);
6897         if (removexattr(t_buf, "system.posix_acl_access")) {
6898                 log_stderr("failure: removexattr");
6899                 goto out;
6900         }
6901
6902         pid = fork();
6903         if (pid < 0) {
6904                 log_stderr("failure: fork");
6905                 goto out;
6906         }
6907         if (pid == 0) {
6908                 int fd;
6909
6910                 if (!caps_supported()) {
6911                         log_debug("skip: capability library not installed");
6912                         exit(EXIT_SUCCESS);
6913                 }
6914
6915                 if (!switch_userns(attr.userns_fd, 100010, 100010, true))
6916                         die("failure: switch_userns");
6917
6918                 fd = openat(open_tree_fd, DIR3, O_CLOEXEC | O_DIRECTORY);
6919                 if (fd >= 0)
6920                         die("failure: openat");
6921
6922                 exit(EXIT_SUCCESS);
6923         }
6924         if (wait_for_pid(pid)) {
6925                 log_stderr("failure: wait_for_pid");
6926                 goto out;
6927         }
6928
6929         snprintf(t_buf, sizeof(t_buf), "%s/" T_DIR1 "/" DIR2, t_mountpoint);
6930         sys_umount2(t_buf, MNT_DETACH);
6931
6932         fret = 0;
6933         log_debug("Ran test");
6934 out:
6935         safe_close(attr.userns_fd);
6936         safe_close(dir1_fd);
6937         safe_close(open_tree_fd);
6938
6939         return fret;
6940 }
6941
6942 #ifdef HAVE_LIBURING_H
6943 static int io_uring_openat_with_creds(struct io_uring *ring, int dfd, const char *path, int cred_id,
6944                                       bool with_link, int *ret_cqe)
6945 {
6946         struct io_uring_cqe *cqe;
6947         struct io_uring_sqe *sqe;
6948         int ret, i, to_submit = 1;
6949
6950         if (with_link) {
6951                 sqe = io_uring_get_sqe(ring);
6952                 if (!sqe)
6953                         return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6954                 io_uring_prep_nop(sqe);
6955                 sqe->flags |= IOSQE_IO_LINK;
6956                 sqe->user_data = 1;
6957                 to_submit++;
6958         }
6959
6960         sqe = io_uring_get_sqe(ring);
6961         if (!sqe)
6962                 return log_error_errno(-EINVAL, EINVAL, "failure: io_uring_sqe");
6963         io_uring_prep_openat(sqe, dfd, path, O_RDONLY | O_CLOEXEC, 0);
6964         sqe->user_data = 2;
6965
6966         if (cred_id != -1)
6967                 sqe->personality = cred_id;
6968
6969         ret = io_uring_submit(ring);
6970         if (ret != to_submit) {
6971                 log_stderr("failure: io_uring_submit");
6972                 goto out;
6973         }
6974
6975         for (i = 0; i < to_submit; i++) {
6976                 ret = io_uring_wait_cqe(ring, &cqe);
6977                 if (ret < 0) {
6978                         log_stderr("failure: io_uring_wait_cqe");
6979                         goto out;
6980                 }
6981
6982                 ret = cqe->res;
6983                 /*
6984                  * Make sure caller can identify that this is a proper io_uring
6985                  * failure and not some earlier error.
6986                  */
6987                 if (ret_cqe)
6988                         *ret_cqe = ret;
6989                 io_uring_cqe_seen(ring, cqe);
6990         }
6991         log_debug("Ran test");
6992 out:
6993         return ret;
6994 }
6995
6996 static int io_uring(void)
6997 {
6998         int fret = -1;
6999         int file1_fd = -EBADF;
7000         struct io_uring *ring;
7001         int cred_id, ret, ret_cqe;
7002         pid_t pid;
7003
7004         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7005                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7006         if (!ring)
7007                 return log_errno(-1, "failure: io_uring_queue_init");
7008
7009         ret = io_uring_queue_init(8, ring, 0);
7010         if (ret) {
7011                 log_stderr("failure: io_uring_queue_init");
7012                 goto out_unmap;
7013         }
7014
7015         ret = io_uring_register_personality(ring);
7016         if (ret < 0) {
7017                 fret = 0;
7018                 goto out_unmap; /* personalities not supported */
7019         }
7020         cred_id = ret;
7021
7022         /* create file only owner can open */
7023         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7024         if (file1_fd < 0) {
7025                 log_stderr("failure: openat");
7026                 goto out;
7027         }
7028         if (fchown(file1_fd, 0, 0)) {
7029                 log_stderr("failure: fchown");
7030                 goto out;
7031         }
7032         if (fchmod(file1_fd, 0600)) {
7033                 log_stderr("failure: fchmod");
7034                 goto out;
7035         }
7036         safe_close(file1_fd);
7037
7038         pid = fork();
7039         if (pid < 0) {
7040                 log_stderr("failure: fork");
7041                 goto out;
7042         }
7043         if (pid == 0) {
7044                 /* Verify we can open it with our current credentials. */
7045                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7046                                                       -1, false, NULL);
7047                 if (file1_fd < 0)
7048                         die("failure: io_uring_open_file");
7049
7050                 exit(EXIT_SUCCESS);
7051         }
7052         if (wait_for_pid(pid)) {
7053                 log_stderr("failure: wait_for_pid");
7054                 goto out;
7055         }
7056
7057         pid = fork();
7058         if (pid < 0) {
7059                 log_stderr("failure: fork");
7060                 goto out;
7061         }
7062         if (pid == 0) {
7063                 if (!switch_ids(1000, 1000))
7064                         die("failure: switch_ids");
7065
7066                 /* Verify we can't open it with our current credentials. */
7067                 ret_cqe = 0;
7068                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7069                                                       -1, false, &ret_cqe);
7070                 if (file1_fd >= 0)
7071                         die("failure: io_uring_open_file");
7072                 if (ret_cqe == 0)
7073                         die("failure: non-open() related io_uring_open_file failure %d", ret_cqe);
7074                 if (ret_cqe != -EACCES)
7075                         die("failure: errno(%d)", abs(ret_cqe));
7076
7077                 exit(EXIT_SUCCESS);
7078         }
7079         if (wait_for_pid(pid)) {
7080                 log_stderr("failure: wait_for_pid");
7081                 goto out;
7082         }
7083
7084         pid = fork();
7085         if (pid < 0) {
7086                 log_stderr("failure: fork");
7087                 goto out;
7088         }
7089         if (pid == 0) {
7090                 if (!switch_ids(1000, 1000))
7091                         die("failure: switch_ids");
7092
7093                 /* Verify we can open it with the registered credentials. */
7094                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7095                                                       cred_id, false, NULL);
7096                 if (file1_fd < 0)
7097                         die("failure: io_uring_open_file");
7098
7099                 /* Verify we can open it with the registered credentials and as
7100                  * a link.
7101                  */
7102                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7103                                                       cred_id, true, NULL);
7104                 if (file1_fd < 0)
7105                         die("failure: io_uring_open_file");
7106
7107                 exit(EXIT_SUCCESS);
7108         }
7109         if (wait_for_pid(pid)) {
7110                 log_stderr("failure: wait_for_pid");
7111                 goto out;
7112         }
7113
7114         fret = 0;
7115         log_debug("Ran test");
7116 out:
7117         ret = io_uring_unregister_personality(ring, cred_id);
7118         if (ret)
7119                 log_stderr("failure: io_uring_unregister_personality");
7120
7121 out_unmap:
7122         munmap(ring, sizeof(struct io_uring));
7123
7124         safe_close(file1_fd);
7125
7126         return fret;
7127 }
7128
7129 static int io_uring_userns(void)
7130 {
7131         int fret = -1;
7132         int file1_fd = -EBADF, userns_fd = -EBADF;
7133         struct io_uring *ring;
7134         int cred_id, ret, ret_cqe;
7135         pid_t pid;
7136
7137         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7138                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7139         if (!ring)
7140                 return log_errno(-1, "failure: io_uring_queue_init");
7141
7142         ret = io_uring_queue_init(8, ring, 0);
7143         if (ret) {
7144                 log_stderr("failure: io_uring_queue_init");
7145                 goto out_unmap;
7146         }
7147
7148         ret = io_uring_register_personality(ring);
7149         if (ret < 0) {
7150                 fret = 0;
7151                 goto out_unmap; /* personalities not supported */
7152         }
7153         cred_id = ret;
7154
7155         /* create file only owner can open */
7156         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7157         if (file1_fd < 0) {
7158                 log_stderr("failure: openat");
7159                 goto out;
7160         }
7161         if (fchown(file1_fd, 0, 0)) {
7162                 log_stderr("failure: fchown");
7163                 goto out;
7164         }
7165         if (fchmod(file1_fd, 0600)) {
7166                 log_stderr("failure: fchmod");
7167                 goto out;
7168         }
7169         safe_close(file1_fd);
7170
7171         userns_fd = get_userns_fd(0, 10000, 10000);
7172         if (userns_fd < 0) {
7173                 log_stderr("failure: get_userns_fd");
7174                 goto out;
7175         }
7176
7177         pid = fork();
7178         if (pid < 0) {
7179                 log_stderr("failure: fork");
7180                 goto out;
7181         }
7182         if (pid == 0) {
7183                 /* Verify we can open it with our current credentials. */
7184                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7185                                                       -1, false, NULL);
7186                 if (file1_fd < 0)
7187                         die("failure: io_uring_open_file");
7188
7189                 exit(EXIT_SUCCESS);
7190         }
7191         if (wait_for_pid(pid)) {
7192                 log_stderr("failure: wait_for_pid");
7193                 goto out;
7194         }
7195
7196         pid = fork();
7197         if (pid < 0) {
7198                 log_stderr("failure: fork");
7199                 goto out;
7200         }
7201         if (pid == 0) {
7202                 if (!switch_userns(userns_fd, 0, 0, false))
7203                         die("failure: switch_userns");
7204
7205                 /* Verify we can't open it with our current credentials. */
7206                 ret_cqe = 0;
7207                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7208                                                       -1, false, &ret_cqe);
7209                 if (file1_fd >= 0)
7210                         die("failure: io_uring_open_file");
7211                 if (ret_cqe == 0)
7212                         die("failure: non-open() related io_uring_open_file failure");
7213                 if (ret_cqe != -EACCES)
7214                         die("failure: errno(%d)", abs(ret_cqe));
7215
7216                 exit(EXIT_SUCCESS);
7217         }
7218         if (wait_for_pid(pid)) {
7219                 log_stderr("failure: wait_for_pid");
7220                 goto out;
7221         }
7222
7223         pid = fork();
7224         if (pid < 0) {
7225                 log_stderr("failure: fork");
7226                 goto out;
7227         }
7228         if (pid == 0) {
7229                 if (!switch_userns(userns_fd, 0, 0, false))
7230                         die("failure: switch_userns");
7231
7232                 /* Verify we can open it with the registered credentials. */
7233                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7234                                                       cred_id, false, NULL);
7235                 if (file1_fd < 0)
7236                         die("failure: io_uring_open_file");
7237
7238                 /* Verify we can open it with the registered credentials and as
7239                  * a link.
7240                  */
7241                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7242                                                       cred_id, true, NULL);
7243                 if (file1_fd < 0)
7244                         die("failure: io_uring_open_file");
7245
7246                 exit(EXIT_SUCCESS);
7247         }
7248         if (wait_for_pid(pid)) {
7249                 log_stderr("failure: wait_for_pid");
7250                 goto out;
7251         }
7252
7253         fret = 0;
7254         log_debug("Ran test");
7255 out:
7256         ret = io_uring_unregister_personality(ring, cred_id);
7257         if (ret)
7258                 log_stderr("failure: io_uring_unregister_personality");
7259
7260 out_unmap:
7261         munmap(ring, sizeof(struct io_uring));
7262
7263         safe_close(file1_fd);
7264         safe_close(userns_fd);
7265
7266         return fret;
7267 }
7268
7269 static int io_uring_idmapped(void)
7270 {
7271         int fret = -1;
7272         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7273         struct io_uring *ring;
7274         struct mount_attr attr = {
7275                 .attr_set = MOUNT_ATTR_IDMAP,
7276         };
7277         int cred_id, ret;
7278         pid_t pid;
7279
7280         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7281                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7282         if (!ring)
7283                 return log_errno(-1, "failure: io_uring_queue_init");
7284
7285         ret = io_uring_queue_init(8, ring, 0);
7286         if (ret) {
7287                 log_stderr("failure: io_uring_queue_init");
7288                 goto out_unmap;
7289         }
7290
7291         ret = io_uring_register_personality(ring);
7292         if (ret < 0) {
7293                 fret = 0;
7294                 goto out_unmap; /* personalities not supported */
7295         }
7296         cred_id = ret;
7297
7298         /* create file only owner can open */
7299         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7300         if (file1_fd < 0) {
7301                 log_stderr("failure: openat");
7302                 goto out;
7303         }
7304         if (fchown(file1_fd, 0, 0)) {
7305                 log_stderr("failure: fchown");
7306                 goto out;
7307         }
7308         if (fchmod(file1_fd, 0600)) {
7309                 log_stderr("failure: fchmod");
7310                 goto out;
7311         }
7312         safe_close(file1_fd);
7313
7314         /* Changing mount properties on a detached mount. */
7315         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7316         if (attr.userns_fd < 0)
7317                 return log_errno(-1, "failure: create user namespace");
7318
7319         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7320                                      AT_EMPTY_PATH |
7321                                      AT_NO_AUTOMOUNT |
7322                                      AT_SYMLINK_NOFOLLOW |
7323                                      OPEN_TREE_CLOEXEC |
7324                                      OPEN_TREE_CLONE);
7325         if (open_tree_fd < 0)
7326                 return log_errno(-1, "failure: create detached mount");
7327
7328         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7329                 return log_errno(-1, "failure: set mount attributes");
7330
7331         pid = fork();
7332         if (pid < 0) {
7333                 log_stderr("failure: fork");
7334                 goto out;
7335         }
7336         if (pid == 0) {
7337                 if (!switch_ids(10000, 10000))
7338                         die("failure: switch_ids");
7339
7340                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7341                                                       -1, false, NULL);
7342                 if (file1_fd < 0)
7343                         die("failure: io_uring_open_file");
7344
7345                 exit(EXIT_SUCCESS);
7346         }
7347         if (wait_for_pid(pid)) {
7348                 log_stderr("failure: wait_for_pid");
7349                 goto out;
7350         }
7351
7352         pid = fork();
7353         if (pid < 0) {
7354                 log_stderr("failure: fork");
7355                 goto out;
7356         }
7357         if (pid == 0) {
7358                 if (!switch_ids(10001, 10001))
7359                         die("failure: switch_ids");
7360
7361                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7362                                                       cred_id, false, NULL);
7363                 if (file1_fd < 0)
7364                         die("failure: io_uring_open_file");
7365
7366                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7367                                                       cred_id, true, NULL);
7368                 if (file1_fd < 0)
7369                         die("failure: io_uring_open_file");
7370
7371                 exit(EXIT_SUCCESS);
7372         }
7373         if (wait_for_pid(pid)) {
7374                 log_stderr("failure: wait_for_pid");
7375                 goto out;
7376         }
7377
7378         fret = 0;
7379         log_debug("Ran test");
7380 out:
7381         ret = io_uring_unregister_personality(ring, cred_id);
7382         if (ret)
7383                 log_stderr("failure: io_uring_unregister_personality");
7384
7385 out_unmap:
7386         munmap(ring, sizeof(struct io_uring));
7387
7388         safe_close(attr.userns_fd);
7389         safe_close(file1_fd);
7390         safe_close(open_tree_fd);
7391
7392         return fret;
7393 }
7394
7395 /*
7396  * Create an idmapped mount where the we leave the owner of the file unmapped.
7397  * In no circumstances, even with recorded credentials can it be allowed to
7398  * open the file.
7399  */
7400 static int io_uring_idmapped_unmapped(void)
7401 {
7402         int fret = -1;
7403         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7404         struct io_uring *ring;
7405         struct mount_attr attr = {
7406                 .attr_set = MOUNT_ATTR_IDMAP,
7407         };
7408         int cred_id, ret, ret_cqe;
7409         pid_t pid;
7410
7411         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7412                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7413         if (!ring)
7414                 return log_errno(-1, "failure: io_uring_queue_init");
7415
7416         ret = io_uring_queue_init(8, ring, 0);
7417         if (ret) {
7418                 log_stderr("failure: io_uring_queue_init");
7419                 goto out_unmap;
7420         }
7421
7422         ret = io_uring_register_personality(ring);
7423         if (ret < 0) {
7424                 fret = 0;
7425                 goto out_unmap; /* personalities not supported */
7426         }
7427         cred_id = ret;
7428
7429         /* create file only owner can open */
7430         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7431         if (file1_fd < 0) {
7432                 log_stderr("failure: openat");
7433                 goto out;
7434         }
7435         if (fchown(file1_fd, 0, 0)) {
7436                 log_stderr("failure: fchown");
7437                 goto out;
7438         }
7439         if (fchmod(file1_fd, 0600)) {
7440                 log_stderr("failure: fchmod");
7441                 goto out;
7442         }
7443         safe_close(file1_fd);
7444
7445         /* Changing mount properties on a detached mount. */
7446         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7447         if (attr.userns_fd < 0)
7448                 return log_errno(-1, "failure: create user namespace");
7449
7450         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7451                                      AT_EMPTY_PATH |
7452                                      AT_NO_AUTOMOUNT |
7453                                      AT_SYMLINK_NOFOLLOW |
7454                                      OPEN_TREE_CLOEXEC |
7455                                      OPEN_TREE_CLONE);
7456         if (open_tree_fd < 0)
7457                 return log_errno(-1, "failure: create detached mount");
7458
7459         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7460                 return log_errno(-1, "failure: set mount attributes");
7461
7462         pid = fork();
7463         if (pid < 0) {
7464                 log_stderr("failure: fork");
7465                 goto out;
7466         }
7467         if (pid == 0) {
7468                 if (!switch_ids(10000, 10000))
7469                         die("failure: switch_ids");
7470
7471                 ret_cqe = 0;
7472                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7473                                                       cred_id, false, &ret_cqe);
7474                 if (file1_fd >= 0)
7475                         die("failure: io_uring_open_file");
7476                 if (ret_cqe == 0)
7477                         die("failure: non-open() related io_uring_open_file failure");
7478                 if (ret_cqe != -EACCES)
7479                         die("failure: errno(%d)", abs(ret_cqe));
7480
7481                 ret_cqe = 0;
7482                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7483                                                       cred_id, true, &ret_cqe);
7484                 if (file1_fd >= 0)
7485                         die("failure: io_uring_open_file");
7486                 if (ret_cqe == 0)
7487                         die("failure: non-open() related io_uring_open_file failure");
7488                 if (ret_cqe != -EACCES)
7489                         die("failure: errno(%d)", abs(ret_cqe));
7490
7491                 exit(EXIT_SUCCESS);
7492         }
7493         if (wait_for_pid(pid)) {
7494                 log_stderr("failure: wait_for_pid");
7495                 goto out;
7496         }
7497
7498         fret = 0;
7499         log_debug("Ran test");
7500 out:
7501         ret = io_uring_unregister_personality(ring, cred_id);
7502         if (ret)
7503                 log_stderr("failure: io_uring_unregister_personality");
7504
7505 out_unmap:
7506         munmap(ring, sizeof(struct io_uring));
7507
7508         safe_close(attr.userns_fd);
7509         safe_close(file1_fd);
7510         safe_close(open_tree_fd);
7511
7512         return fret;
7513 }
7514
7515 static int io_uring_idmapped_userns(void)
7516 {
7517         int fret = -1;
7518         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7519         struct io_uring *ring;
7520         struct mount_attr attr = {
7521                 .attr_set = MOUNT_ATTR_IDMAP,
7522         };
7523         int cred_id, ret, ret_cqe;
7524         pid_t pid;
7525
7526         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7527                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7528         if (!ring)
7529                 return log_errno(-1, "failure: io_uring_queue_init");
7530
7531         ret = io_uring_queue_init(8, ring, 0);
7532         if (ret) {
7533                 log_stderr("failure: io_uring_queue_init");
7534                 goto out_unmap;
7535         }
7536
7537         ret = io_uring_register_personality(ring);
7538         if (ret < 0) {
7539                 fret = 0;
7540                 goto out_unmap; /* personalities not supported */
7541         }
7542         cred_id = ret;
7543
7544         /* create file only owner can open */
7545         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7546         if (file1_fd < 0) {
7547                 log_stderr("failure: openat");
7548                 goto out;
7549         }
7550         if (fchown(file1_fd, 0, 0)) {
7551                 log_stderr("failure: fchown");
7552                 goto out;
7553         }
7554         if (fchmod(file1_fd, 0600)) {
7555                 log_stderr("failure: fchmod");
7556                 goto out;
7557         }
7558         safe_close(file1_fd);
7559
7560         /* Changing mount properties on a detached mount. */
7561         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7562         if (attr.userns_fd < 0)
7563                 return log_errno(-1, "failure: create user namespace");
7564
7565         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7566                                      AT_EMPTY_PATH |
7567                                      AT_NO_AUTOMOUNT |
7568                                      AT_SYMLINK_NOFOLLOW |
7569                                      OPEN_TREE_CLOEXEC |
7570                                      OPEN_TREE_CLONE);
7571         if (open_tree_fd < 0)
7572                 return log_errno(-1, "failure: create detached mount");
7573
7574         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7575                 return log_errno(-1, "failure: set mount attributes");
7576
7577         pid = fork();
7578         if (pid < 0) {
7579                 log_stderr("failure: fork");
7580                 goto out;
7581         }
7582         if (pid == 0) {
7583                 if (!switch_userns(attr.userns_fd, 0, 0, false))
7584                         die("failure: switch_userns");
7585
7586                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7587                                                       -1, false, NULL);
7588                 if (file1_fd < 0)
7589                         die("failure: io_uring_open_file");
7590
7591                 exit(EXIT_SUCCESS);
7592         }
7593         if (wait_for_pid(pid)) {
7594                 log_stderr("failure: wait_for_pid");
7595                 goto out;
7596         }
7597
7598         pid = fork();
7599         if (pid < 0) {
7600                 log_stderr("failure: fork");
7601                 goto out;
7602         }
7603         if (pid == 0) {
7604                 if (!caps_supported()) {
7605                         log_debug("skip: capability library not installed");
7606                         exit(EXIT_SUCCESS);
7607                 }
7608
7609                 if (!switch_userns(attr.userns_fd, 1000, 1000, true))
7610                         die("failure: switch_userns");
7611
7612                 ret_cqe = 0;
7613                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7614                                                       -1, false, &ret_cqe);
7615                 if (file1_fd >= 0)
7616                         die("failure: io_uring_open_file");
7617                 if (ret_cqe == 0)
7618                         die("failure: non-open() related io_uring_open_file failure");
7619                 if (ret_cqe != -EACCES)
7620                         die("failure: errno(%d)", abs(ret_cqe));
7621
7622                 ret_cqe = 0;
7623                 file1_fd = io_uring_openat_with_creds(ring, t_dir1_fd, FILE1,
7624                                                       -1, true, &ret_cqe);
7625                 if (file1_fd >= 0)
7626                         die("failure: io_uring_open_file");
7627                 if (ret_cqe == 0)
7628                         die("failure: non-open() related io_uring_open_file failure");
7629                 if (ret_cqe != -EACCES)
7630                         die("failure: errno(%d)", abs(ret_cqe));
7631
7632                 ret_cqe = 0;
7633                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7634                                                       -1, false, &ret_cqe);
7635                 if (file1_fd >= 0)
7636                         die("failure: io_uring_open_file");
7637                 if (ret_cqe == 0)
7638                         die("failure: non-open() related io_uring_open_file failure");
7639                 if (ret_cqe != -EACCES)
7640                         die("failure: errno(%d)", abs(ret_cqe));
7641
7642                 ret_cqe = 0;
7643                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7644                                                       -1, true, &ret_cqe);
7645                 if (file1_fd >= 0)
7646                         die("failure: io_uring_open_file");
7647                 if (ret_cqe == 0)
7648                         die("failure: non-open() related io_uring_open_file failure");
7649                 if (ret_cqe != -EACCES)
7650                         die("failure: errno(%d)", abs(ret_cqe));
7651
7652                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7653                                                       cred_id, false, NULL);
7654                 if (file1_fd < 0)
7655                         die("failure: io_uring_open_file");
7656
7657                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7658                                                       cred_id, true, NULL);
7659                 if (file1_fd < 0)
7660                         die("failure: io_uring_open_file");
7661
7662                 exit(EXIT_SUCCESS);
7663         }
7664         if (wait_for_pid(pid)) {
7665                 log_stderr("failure: wait_for_pid");
7666                 goto out;
7667         }
7668
7669         fret = 0;
7670         log_debug("Ran test");
7671 out:
7672         ret = io_uring_unregister_personality(ring, cred_id);
7673         if (ret)
7674                 log_stderr("failure: io_uring_unregister_personality");
7675
7676 out_unmap:
7677         munmap(ring, sizeof(struct io_uring));
7678
7679         safe_close(attr.userns_fd);
7680         safe_close(file1_fd);
7681         safe_close(open_tree_fd);
7682
7683         return fret;
7684 }
7685
7686 static int io_uring_idmapped_unmapped_userns(void)
7687 {
7688         int fret = -1;
7689         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7690         struct io_uring *ring;
7691         struct mount_attr attr = {
7692                 .attr_set = MOUNT_ATTR_IDMAP,
7693         };
7694         int cred_id, ret, ret_cqe;
7695         pid_t pid;
7696
7697         ring = mmap(0, sizeof(struct io_uring), PROT_READ|PROT_WRITE,
7698                    MAP_SHARED | MAP_ANONYMOUS, 0, 0);
7699         if (!ring)
7700                 return log_errno(-1, "failure: io_uring_queue_init");
7701
7702         ret = io_uring_queue_init(8, ring, 0);
7703         if (ret) {
7704                 log_stderr("failure: io_uring_queue_init");
7705                 goto out_unmap;
7706         }
7707
7708         ret = io_uring_register_personality(ring);
7709         if (ret < 0) {
7710                 fret = 0;
7711                 goto out_unmap; /* personalities not supported */
7712         }
7713         cred_id = ret;
7714
7715         /* create file only owner can open */
7716         file1_fd = openat(t_dir1_fd, FILE1, O_RDONLY | O_CREAT | O_EXCL | O_CLOEXEC, 0000);
7717         if (file1_fd < 0) {
7718                 log_stderr("failure: openat");
7719                 goto out;
7720         }
7721         if (fchown(file1_fd, 0, 0)) {
7722                 log_stderr("failure: fchown");
7723                 goto out;
7724         }
7725         if (fchmod(file1_fd, 0600)) {
7726                 log_stderr("failure: fchmod");
7727                 goto out;
7728         }
7729         safe_close(file1_fd);
7730
7731         /* Changing mount properties on a detached mount. */
7732         attr.userns_fd  = get_userns_fd(1, 10000, 10000);
7733         if (attr.userns_fd < 0)
7734                 return log_errno(-1, "failure: create user namespace");
7735
7736         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7737                                      AT_EMPTY_PATH |
7738                                      AT_NO_AUTOMOUNT |
7739                                      AT_SYMLINK_NOFOLLOW |
7740                                      OPEN_TREE_CLOEXEC |
7741                                      OPEN_TREE_CLONE);
7742         if (open_tree_fd < 0)
7743                 return log_errno(-1, "failure: create detached mount");
7744
7745         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)))
7746                 return log_errno(-1, "failure: set mount attributes");
7747
7748         pid = fork();
7749         if (pid < 0) {
7750                 log_stderr("failure: fork");
7751                 goto out;
7752         }
7753         if (pid == 0) {
7754                 if (!caps_supported()) {
7755                         log_debug("skip: capability library not installed");
7756                         exit(EXIT_SUCCESS);
7757                 }
7758
7759                 if (!switch_userns(attr.userns_fd, 10000, 10000, true))
7760                         die("failure: switch_ids");
7761
7762                 ret_cqe = 0;
7763                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7764                                                       cred_id, false, &ret_cqe);
7765                 if (file1_fd >= 0)
7766                         die("failure: io_uring_open_file");
7767                 if (ret_cqe == 0)
7768                         die("failure: non-open() related io_uring_open_file failure");
7769                 if (ret_cqe != -EACCES)
7770                         die("failure: errno(%d)", abs(ret_cqe));
7771
7772                 ret_cqe = 0;
7773                 file1_fd = io_uring_openat_with_creds(ring, open_tree_fd, FILE1,
7774                                                       cred_id, true, &ret_cqe);
7775                 if (file1_fd >= 0)
7776                         die("failure: io_uring_open_file");
7777                 if (ret_cqe == 0)
7778                         die("failure: non-open() related io_uring_open_file failure");
7779                 if (ret_cqe != -EACCES)
7780                         die("failure: errno(%d)", abs(ret_cqe));
7781
7782                 exit(EXIT_SUCCESS);
7783         }
7784         if (wait_for_pid(pid)) {
7785                 log_stderr("failure: wait_for_pid");
7786                 goto out;
7787         }
7788
7789         fret = 0;
7790         log_debug("Ran test");
7791 out:
7792         ret = io_uring_unregister_personality(ring, cred_id);
7793         if (ret)
7794                 log_stderr("failure: io_uring_unregister_personality");
7795
7796 out_unmap:
7797         munmap(ring, sizeof(struct io_uring));
7798
7799         safe_close(attr.userns_fd);
7800         safe_close(file1_fd);
7801         safe_close(open_tree_fd);
7802
7803         return fret;
7804 }
7805 #endif /* HAVE_LIBURING_H */
7806
7807 /* The following tests are concerned with setgid inheritance. These can be
7808  * filesystem type specific. For xfs, if a new file or directory is created
7809  * within a setgid directory and irix_sgid_inhiert is set then inherit the
7810  * setgid bit if the caller is in the group of the directory.
7811  */
7812 static int setgid_create(void)
7813 {
7814         int fret = -1;
7815         int file1_fd = -EBADF;
7816         pid_t pid;
7817
7818         if (!caps_supported())
7819                 return 0;
7820
7821         if (fchmod(t_dir1_fd, S_IRUSR |
7822                               S_IWUSR |
7823                               S_IRGRP |
7824                               S_IWGRP |
7825                               S_IROTH |
7826                               S_IWOTH |
7827                               S_IXUSR |
7828                               S_IXGRP |
7829                               S_IXOTH |
7830                               S_ISGID), 0) {
7831                 log_stderr("failure: fchmod");
7832                 goto out;
7833         }
7834
7835         /* Verify that the setgid bit got raised. */
7836         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7837                 log_stderr("failure: is_setgid");
7838                 goto out;
7839         }
7840
7841         pid = fork();
7842         if (pid < 0) {
7843                 log_stderr("failure: fork");
7844                 goto out;
7845         }
7846         if (pid == 0) {
7847                 /* create regular file via open() */
7848                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7849                 if (file1_fd < 0)
7850                         die("failure: create");
7851
7852                 /* We're capable_wrt_inode_uidgid() and also our fsgid matches
7853                  * the directories gid.
7854                  */
7855                 if (!is_setgid(t_dir1_fd, FILE1, 0))
7856                         die("failure: is_setgid");
7857
7858                 /* create directory */
7859                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7860                         die("failure: create");
7861
7862                 /* Directories always inherit the setgid bit. */
7863                 if (!is_setgid(t_dir1_fd, DIR1, 0))
7864                         die("failure: is_setgid");
7865
7866                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0))
7867                         die("failure: check ownership");
7868
7869                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0))
7870                         die("failure: check ownership");
7871
7872                 if (unlinkat(t_dir1_fd, FILE1, 0))
7873                         die("failure: delete");
7874
7875                 if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR))
7876                         die("failure: delete");
7877
7878                 exit(EXIT_SUCCESS);
7879         }
7880         if (wait_for_pid(pid))
7881                 goto out;
7882
7883         pid = fork();
7884         if (pid < 0) {
7885                 log_stderr("failure: fork");
7886                 goto out;
7887         }
7888         if (pid == 0) {
7889                 if (!switch_ids(0, 10000))
7890                         die("failure: switch_ids");
7891
7892                 if (!caps_down())
7893                         die("failure: caps_down");
7894
7895                 /* create regular file via open() */
7896                 file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
7897                 if (file1_fd < 0)
7898                         die("failure: create");
7899
7900                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
7901                  * bit needs to be stripped.
7902                  */
7903                 if (is_setgid(t_dir1_fd, FILE1, 0))
7904                         die("failure: is_setgid");
7905
7906                 /* create directory */
7907                 if (mkdirat(t_dir1_fd, DIR1, 0000))
7908                         die("failure: create");
7909
7910                 if (xfs_irix_sgid_inherit_enabled()) {
7911                         /* We're not in_group_p(). */
7912                         if (is_setgid(t_dir1_fd, DIR1, 0))
7913                                 die("failure: is_setgid");
7914                 } else {
7915                         /* Directories always inherit the setgid bit. */
7916                         if (!is_setgid(t_dir1_fd, DIR1, 0))
7917                                 die("failure: is_setgid");
7918                 }
7919
7920                 /*
7921                  * In setgid directories newly created files always inherit the
7922                  * gid from the parent directory. Verify that the file is owned
7923                  * by gid 0, not by gid 10000.
7924                  */
7925                 if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0))
7926                         die("failure: check ownership");
7927
7928                 /*
7929                  * In setgid directories newly created directories always
7930                  * inherit the gid from the parent directory. Verify that the
7931                  * directory is owned by gid 0, not by gid 10000.
7932                  */
7933                 if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0))
7934                         die("failure: check ownership");
7935
7936                 exit(EXIT_SUCCESS);
7937         }
7938         if (wait_for_pid(pid))
7939                 goto out;
7940
7941         fret = 0;
7942         log_debug("Ran test");
7943 out:
7944         safe_close(file1_fd);
7945
7946         return fret;
7947 }
7948
7949 static int setgid_create_idmapped(void)
7950 {
7951         int fret = -1;
7952         int file1_fd = -EBADF, open_tree_fd = -EBADF;
7953         struct mount_attr attr = {
7954                 .attr_set = MOUNT_ATTR_IDMAP,
7955         };
7956         pid_t pid;
7957
7958         if (!caps_supported())
7959                 return 0;
7960
7961         if (fchmod(t_dir1_fd, S_IRUSR |
7962                               S_IWUSR |
7963                               S_IRGRP |
7964                               S_IWGRP |
7965                               S_IROTH |
7966                               S_IWOTH |
7967                               S_IXUSR |
7968                               S_IXGRP |
7969                               S_IXOTH |
7970                               S_ISGID), 0) {
7971                 log_stderr("failure: fchmod");
7972                 goto out;
7973         }
7974
7975         /* Verify that the sid bits got raised. */
7976         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
7977                 log_stderr("failure: is_setgid");
7978                 goto out;
7979         }
7980
7981         /* Changing mount properties on a detached mount. */
7982         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
7983         if (attr.userns_fd < 0) {
7984                 log_stderr("failure: get_userns_fd");
7985                 goto out;
7986         }
7987
7988         open_tree_fd = sys_open_tree(t_dir1_fd, "",
7989                                      AT_EMPTY_PATH |
7990                                      AT_NO_AUTOMOUNT |
7991                                      AT_SYMLINK_NOFOLLOW |
7992                                      OPEN_TREE_CLOEXEC |
7993                                      OPEN_TREE_CLONE);
7994         if (open_tree_fd < 0) {
7995                 log_stderr("failure: sys_open_tree");
7996                 goto out;
7997         }
7998
7999         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8000                 log_stderr("failure: sys_mount_setattr");
8001                 goto out;
8002         }
8003
8004         pid = fork();
8005         if (pid < 0) {
8006                 log_stderr("failure: fork");
8007                 goto out;
8008         }
8009         if (pid == 0) {
8010                 if (!switch_ids(10000, 11000))
8011                         die("failure: switch fsids");
8012
8013                 /* create regular file via open() */
8014                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8015                 if (file1_fd < 0)
8016                         die("failure: create");
8017
8018                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8019                  * bit needs to be stripped.
8020                  */
8021                 if (is_setgid(open_tree_fd, FILE1, 0))
8022                         die("failure: is_setgid");
8023
8024                 /* create directory */
8025                 if (mkdirat(open_tree_fd, DIR1, 0000))
8026                         die("failure: create");
8027
8028                 if (xfs_irix_sgid_inherit_enabled()) {
8029                         /* We're not in_group_p(). */
8030                         if (is_setgid(open_tree_fd, DIR1, 0))
8031                                 die("failure: is_setgid");
8032                 } else {
8033                         /* Directories always inherit the setgid bit. */
8034                         if (!is_setgid(open_tree_fd, DIR1, 0))
8035                                 die("failure: is_setgid");
8036                 }
8037
8038                 /*
8039                  * In setgid directories newly created files always inherit the
8040                  * gid from the parent directory. Verify that the file is owned
8041                  * by gid 10000, not by gid 11000.
8042                  */
8043                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8044                         die("failure: check ownership");
8045
8046                 /*
8047                  * In setgid directories newly created directories always
8048                  * inherit the gid from the parent directory. Verify that the
8049                  * directory is owned by gid 10000, not by gid 11000.
8050                  */
8051                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000))
8052                         die("failure: check ownership");
8053
8054                 exit(EXIT_SUCCESS);
8055         }
8056         if (wait_for_pid(pid))
8057                 goto out;
8058
8059         fret = 0;
8060         log_debug("Ran test");
8061 out:
8062         safe_close(attr.userns_fd);
8063         safe_close(file1_fd);
8064         safe_close(open_tree_fd);
8065
8066         return fret;
8067 }
8068
8069 static int setgid_create_idmapped_in_userns(void)
8070 {
8071         int fret = -1;
8072         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8073         struct mount_attr attr = {
8074                 .attr_set = MOUNT_ATTR_IDMAP,
8075         };
8076         pid_t pid;
8077
8078         if (!caps_supported())
8079                 return 0;
8080
8081         if (fchmod(t_dir1_fd, S_IRUSR |
8082                               S_IWUSR |
8083                               S_IRGRP |
8084                               S_IWGRP |
8085                               S_IROTH |
8086                               S_IWOTH |
8087                               S_IXUSR |
8088                               S_IXGRP |
8089                               S_IXOTH |
8090                               S_ISGID), 0) {
8091                 log_stderr("failure: fchmod");
8092                 goto out;
8093         }
8094
8095         /* Verify that the sid bits got raised. */
8096         if (!is_setgid(t_dir1_fd, "", AT_EMPTY_PATH)) {
8097                 log_stderr("failure: is_setgid");
8098                 goto out;
8099         }
8100
8101         /* Changing mount properties on a detached mount. */
8102         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8103         if (attr.userns_fd < 0) {
8104                 log_stderr("failure: get_userns_fd");
8105                 goto out;
8106         }
8107
8108         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8109                                      AT_EMPTY_PATH |
8110                                      AT_NO_AUTOMOUNT |
8111                                      AT_SYMLINK_NOFOLLOW |
8112                                      OPEN_TREE_CLOEXEC |
8113                                      OPEN_TREE_CLONE);
8114         if (open_tree_fd < 0) {
8115                 log_stderr("failure: sys_open_tree");
8116                 goto out;
8117         }
8118
8119         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8120                 log_stderr("failure: sys_mount_setattr");
8121                 goto out;
8122         }
8123
8124         pid = fork();
8125         if (pid < 0) {
8126                 log_stderr("failure: fork");
8127                 goto out;
8128         }
8129         if (pid == 0) {
8130                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8131                         die("failure: switch_userns");
8132
8133                 /* create regular file via open() */
8134                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8135                 if (file1_fd < 0)
8136                         die("failure: create");
8137
8138                 /* We're in_group_p() and capable_wrt_inode_uidgid() so setgid
8139                  * bit needs to be set.
8140                  */
8141                 if (!is_setgid(open_tree_fd, FILE1, 0))
8142                         die("failure: is_setgid");
8143
8144                 /* create directory */
8145                 if (mkdirat(open_tree_fd, DIR1, 0000))
8146                         die("failure: create");
8147
8148                 /* Directories always inherit the setgid bit. */
8149                 if (!is_setgid(open_tree_fd, DIR1, 0))
8150                         die("failure: is_setgid");
8151
8152                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8153                         die("failure: check ownership");
8154
8155                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8156                         die("failure: check ownership");
8157
8158                 if (unlinkat(open_tree_fd, FILE1, 0))
8159                         die("failure: delete");
8160
8161                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8162                         die("failure: delete");
8163
8164                 exit(EXIT_SUCCESS);
8165         }
8166         if (wait_for_pid(pid))
8167                 goto out;
8168
8169         /*
8170          * Below we verify that setgid inheritance for a newly created file or
8171          * directory works correctly. As part of this we need to verify that
8172          * newly created files or directories inherit their gid from their
8173          * parent directory. So we change the parent directorie's gid to 1000
8174          * and create a file with fs{g,u}id 0 and verify that the newly created
8175          * file and directory inherit gid 1000, not 0.
8176          */
8177         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8178                 log_stderr("failure: fchownat");
8179                 goto out;
8180         }
8181
8182         pid = fork();
8183         if (pid < 0) {
8184                 log_stderr("failure: fork");
8185                 goto out;
8186         }
8187         if (pid == 0) {
8188                 if (!caps_supported()) {
8189                         log_debug("skip: capability library not installed");
8190                         exit(EXIT_SUCCESS);
8191                 }
8192
8193                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8194                         die("failure: switch_userns");
8195
8196                 /* create regular file via open() */
8197                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8198                 if (file1_fd < 0)
8199                         die("failure: create");
8200
8201                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8202                  * bit needs to be stripped.
8203                  */
8204                 if (is_setgid(open_tree_fd, FILE1, 0))
8205                         die("failure: is_setgid");
8206
8207                 /* create directory */
8208                 if (mkdirat(open_tree_fd, DIR1, 0000))
8209                         die("failure: create");
8210
8211                 if (xfs_irix_sgid_inherit_enabled()) {
8212                         /* We're not in_group_p(). */
8213                         if (is_setgid(open_tree_fd, DIR1, 0))
8214                                 die("failure: is_setgid");
8215                 } else {
8216                         /* Directories always inherit the setgid bit. */
8217                         if (!is_setgid(open_tree_fd, DIR1, 0))
8218                                 die("failure: is_setgid");
8219                 }
8220
8221                 /*
8222                  * In setgid directories newly created files always inherit the
8223                  * gid from the parent directory. Verify that the file is owned
8224                  * by gid 1000, not by gid 0.
8225                  */
8226                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8227                         die("failure: check ownership");
8228
8229                 /*
8230                  * In setgid directories newly created directories always
8231                  * inherit the gid from the parent directory. Verify that the
8232                  * directory is owned by gid 1000, not by gid 0.
8233                  */
8234                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 1000))
8235                         die("failure: check ownership");
8236
8237                 if (unlinkat(open_tree_fd, FILE1, 0))
8238                         die("failure: delete");
8239
8240                 if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR))
8241                         die("failure: delete");
8242
8243                 exit(EXIT_SUCCESS);
8244         }
8245         if (wait_for_pid(pid))
8246                 goto out;
8247
8248         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8249                 log_stderr("failure: fchownat");
8250                 goto out;
8251         }
8252
8253         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8254                 log_stderr("failure: fchownat");
8255                 goto out;
8256         }
8257
8258         pid = fork();
8259         if (pid < 0) {
8260                 log_stderr("failure: fork");
8261                 goto out;
8262         }
8263         if (pid == 0) {
8264                 if (!caps_supported()) {
8265                         log_debug("skip: capability library not installed");
8266                         exit(EXIT_SUCCESS);
8267                 }
8268
8269                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8270                         die("failure: switch_userns");
8271
8272                 /* create regular file via open() */
8273                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID);
8274                 if (file1_fd < 0)
8275                         die("failure: create");
8276
8277                 /* Neither in_group_p() nor capable_wrt_inode_uidgid() so setgid
8278                  * bit needs to be stripped.
8279                  */
8280                 if (is_setgid(open_tree_fd, FILE1, 0))
8281                         die("failure: is_setgid");
8282
8283                 /* create directory */
8284                 if (mkdirat(open_tree_fd, DIR1, 0000))
8285                         die("failure: create");
8286
8287                 /* Directories always inherit the setgid bit. */
8288                 if (xfs_irix_sgid_inherit_enabled()) {
8289                         /* We're not in_group_p(). */
8290                         if (is_setgid(open_tree_fd, DIR1, 0))
8291                                 die("failure: is_setgid");
8292                 } else {
8293                         /* Directories always inherit the setgid bit. */
8294                         if (!is_setgid(open_tree_fd, DIR1, 0))
8295                                 die("failure: is_setgid");
8296                 }
8297
8298                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8299                         die("failure: check ownership");
8300
8301                 if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0))
8302                         die("failure: check ownership");
8303
8304                 exit(EXIT_SUCCESS);
8305         }
8306         if (wait_for_pid(pid))
8307                 goto out;
8308
8309         fret = 0;
8310         log_debug("Ran test");
8311 out:
8312         safe_close(attr.userns_fd);
8313         safe_close(file1_fd);
8314         safe_close(open_tree_fd);
8315
8316         return fret;
8317 }
8318
8319 #define PTR_TO_INT(p) ((int)((intptr_t)(p)))
8320 #define INT_TO_PTR(u) ((void *)((intptr_t)(u)))
8321
8322 static void *idmapped_mount_create_cb(void *data)
8323 {
8324         int fret = EXIT_FAILURE, open_tree_fd = PTR_TO_INT(data);
8325         struct mount_attr attr = {
8326                 .attr_set = MOUNT_ATTR_IDMAP,
8327         };
8328
8329         /* Changing mount properties on a detached mount. */
8330         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8331         if (attr.userns_fd < 0) {
8332                 log_stderr("failure: get_userns_fd");
8333                 goto out;
8334         }
8335
8336         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8337                 log_stderr("failure: sys_mount_setattr");
8338                 goto out;
8339         }
8340
8341         fret = EXIT_SUCCESS;
8342
8343 out:
8344         safe_close(attr.userns_fd);
8345         pthread_exit(INT_TO_PTR(fret));
8346 }
8347
8348 /* This tries to verify that we never see an inconistent ownership on-disk and
8349  * can't write invalid ids to disk. To do this we create a race between
8350  * idmapping a mount and creating files on it.
8351  * Note, while it is perfectly fine to see overflowuid and overflowgid as owner
8352  * if we create files through the open_tree_fd before the mount is idmapped but
8353  * look at the files after the mount has been idmapped in this test it can never
8354  * be the case that we see overflowuid and overflowgid when we access the file
8355  * through a non-idmapped mount (in the initial user namespace).
8356  */
8357 static void *idmapped_mount_operations_cb(void *data)
8358 {
8359         int file1_fd = -EBADF, file2_fd = -EBADF, dir1_fd = -EBADF,
8360             dir1_fd2 = -EBADF, fret = EXIT_FAILURE,
8361             open_tree_fd = PTR_TO_INT(data);
8362
8363         if (!switch_fsids(10000, 10000)) {
8364                 log_stderr("failure: switch fsids");
8365                 goto out;
8366         }
8367
8368         file1_fd = openat(open_tree_fd, FILE1,
8369                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8370         if (file1_fd < 0) {
8371                 log_stderr("failure: openat");
8372                 goto out;
8373         }
8374
8375         file2_fd = openat(open_tree_fd, FILE2,
8376                           O_CREAT | O_EXCL | O_CLOEXEC, 0644);
8377         if (file2_fd < 0) {
8378                 log_stderr("failure: openat");
8379                 goto out;
8380         }
8381
8382         if (mkdirat(open_tree_fd, DIR1, 0777)) {
8383                 log_stderr("failure: mkdirat");
8384                 goto out;
8385         }
8386
8387         dir1_fd = openat(open_tree_fd, DIR1,
8388                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8389         if (dir1_fd < 0) {
8390                 log_stderr("failure: openat");
8391                 goto out;
8392         }
8393
8394         if (!__expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0, false) &&
8395             !__expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000, false) &&
8396             !__expected_uid_gid(open_tree_fd, FILE1, 0, t_overflowuid, t_overflowgid, false)) {
8397                 log_stderr("failure: expected_uid_gid");
8398                 goto out;
8399         }
8400
8401         if (!__expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0, false) &&
8402             !__expected_uid_gid(open_tree_fd, FILE2, 0, 10000, 10000, false) &&
8403             !__expected_uid_gid(open_tree_fd, FILE2, 0, t_overflowuid, t_overflowgid, false)) {
8404                 log_stderr("failure: expected_uid_gid");
8405                 goto out;
8406         }
8407
8408         if (!__expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0, false) &&
8409             !__expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000, false) &&
8410             !__expected_uid_gid(open_tree_fd, DIR1, 0, t_overflowuid, t_overflowgid, false)) {
8411                 log_stderr("failure: expected_uid_gid");
8412                 goto out;
8413         }
8414
8415         if (!__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 0, 0, false) &&
8416             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, 10000, 10000, false) &&
8417             !__expected_uid_gid(dir1_fd, "", AT_EMPTY_PATH, t_overflowuid, t_overflowgid, false)) {
8418                 log_stderr("failure: expected_uid_gid");
8419                 goto out;
8420         }
8421
8422         dir1_fd2 = openat(t_dir1_fd, DIR1,
8423                          O_RDONLY | O_DIRECTORY | O_CLOEXEC);
8424         if (dir1_fd2 < 0) {
8425                 log_stderr("failure: openat");
8426                 goto out;
8427         }
8428
8429         if (!__expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0, false) &&
8430             !__expected_uid_gid(t_dir1_fd, FILE1, 0, 10000, 10000, false)) {
8431                 log_stderr("failure: expected_uid_gid");
8432                 goto out;
8433         }
8434
8435         if (!__expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0, false) &&
8436             !__expected_uid_gid(t_dir1_fd, FILE2, 0, 10000, 10000, false)) {
8437                 log_stderr("failure: expected_uid_gid");
8438                 goto out;
8439         }
8440
8441         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8442             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8443                 log_stderr("failure: expected_uid_gid");
8444                 goto out;
8445         }
8446
8447         if (!__expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0, false) &&
8448             !__expected_uid_gid(t_dir1_fd, DIR1, 0, 10000, 10000, false)) {
8449                 log_stderr("failure: expected_uid_gid");
8450                 goto out;
8451         }
8452
8453         if (!__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 0, 0, false) &&
8454             !__expected_uid_gid(dir1_fd2, "", AT_EMPTY_PATH, 10000, 10000, false)) {
8455                 log_stderr("failure: expected_uid_gid");
8456                 goto out;
8457         }
8458
8459         fret = EXIT_SUCCESS;
8460
8461 out:
8462         safe_close(file1_fd);
8463         safe_close(file2_fd);
8464         safe_close(dir1_fd);
8465         safe_close(dir1_fd2);
8466
8467         pthread_exit(INT_TO_PTR(fret));
8468 }
8469
8470 static int threaded_idmapped_mount_interactions(void)
8471 {
8472         int i;
8473         int fret = -1;
8474         pid_t pid;
8475         pthread_attr_t thread_attr;
8476         pthread_t threads[2];
8477
8478         pthread_attr_init(&thread_attr);
8479
8480         for (i = 0; i < 1000; i++) {
8481                 int ret1 = 0, ret2 = 0, tret1 = 0, tret2 = 0;
8482
8483                 pid = fork();
8484                 if (pid < 0) {
8485                         log_stderr("failure: fork");
8486                         goto out;
8487                 }
8488                 if (pid == 0) {
8489                         int open_tree_fd = -EBADF;
8490
8491                         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8492                                                      AT_EMPTY_PATH |
8493                                                      AT_NO_AUTOMOUNT |
8494                                                      AT_SYMLINK_NOFOLLOW |
8495                                                      OPEN_TREE_CLOEXEC |
8496                                                      OPEN_TREE_CLONE);
8497                         if (open_tree_fd < 0)
8498                                 die("failure: sys_open_tree");
8499
8500                         if (pthread_create(&threads[0], &thread_attr,
8501                                            idmapped_mount_create_cb,
8502                                            INT_TO_PTR(open_tree_fd)))
8503                                 die("failure: pthread_create");
8504
8505                         if (pthread_create(&threads[1], &thread_attr,
8506                                            idmapped_mount_operations_cb,
8507                                            INT_TO_PTR(open_tree_fd)))
8508                                 die("failure: pthread_create");
8509
8510                         ret1 = pthread_join(threads[0], INT_TO_PTR(tret1));
8511                         ret2 = pthread_join(threads[1], INT_TO_PTR(tret2));
8512
8513                         if (ret1) {
8514                                 errno = ret1;
8515                                 die("failure: pthread_join");
8516                         }
8517
8518                         if (ret2) {
8519                                 errno = ret2;
8520                                 die("failure: pthread_join");
8521                         }
8522
8523                         if (tret1 || tret2)
8524                                 exit(EXIT_FAILURE);
8525
8526                         exit(EXIT_SUCCESS);
8527
8528                 }
8529
8530                 if (wait_for_pid(pid)) {
8531                         log_stderr("failure: iteration %d", i);
8532                         goto out;
8533                 }
8534
8535                 rm_r(t_dir1_fd, ".");
8536
8537         }
8538
8539         fret = 0;
8540         log_debug("Ran test");
8541
8542 out:
8543         return fret;
8544 }
8545
8546 static int setattr_truncate(void)
8547 {
8548         int fret = -1;
8549         int file1_fd = -EBADF;
8550
8551         /* create regular file via open() */
8552         file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8553         if (file1_fd < 0) {
8554                 log_stderr("failure: create");
8555                 goto out;
8556         }
8557
8558         if (ftruncate(file1_fd, 10000)) {
8559                 log_stderr("failure: ftruncate");
8560                 goto out;
8561         }
8562
8563         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8564                 log_stderr("failure: check ownership");
8565                 goto out;
8566         }
8567
8568         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 10000)) {
8569                 log_stderr("failure: expected_file_size");
8570                 goto out;
8571         }
8572
8573         if (ftruncate(file1_fd, 0)) {
8574                 log_stderr("failure: ftruncate");
8575                 goto out;
8576         }
8577
8578         if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) {
8579                 log_stderr("failure: check ownership");
8580                 goto out;
8581         }
8582
8583         if (!expected_file_size(file1_fd, "", AT_EMPTY_PATH, 0)) {
8584                 log_stderr("failure: expected_file_size");
8585                 goto out;
8586         }
8587
8588         if (unlinkat(t_dir1_fd, FILE1, 0)) {
8589                 log_stderr("failure: remove");
8590                 goto out;
8591         }
8592
8593         fret = 0;
8594         log_debug("Ran test");
8595 out:
8596         safe_close(file1_fd);
8597
8598         return fret;
8599 }
8600
8601 static int setattr_truncate_idmapped(void)
8602 {
8603         int fret = -1;
8604         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8605         pid_t pid;
8606         struct mount_attr attr = {
8607                 .attr_set = MOUNT_ATTR_IDMAP,
8608         };
8609
8610         /* Changing mount properties on a detached mount. */
8611         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8612         if (attr.userns_fd < 0) {
8613                 log_stderr("failure: get_userns_fd");
8614                 goto out;
8615         }
8616
8617         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8618                                      AT_EMPTY_PATH |
8619                                      AT_NO_AUTOMOUNT |
8620                                      AT_SYMLINK_NOFOLLOW |
8621                                      OPEN_TREE_CLOEXEC |
8622                                      OPEN_TREE_CLONE);
8623         if (open_tree_fd < 0) {
8624                 log_stderr("failure: sys_open_tree");
8625                 goto out;
8626         }
8627
8628         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8629                 log_stderr("failure: sys_mount_setattr");
8630                 goto out;
8631         }
8632
8633         pid = fork();
8634         if (pid < 0) {
8635                 log_stderr("failure: fork");
8636                 goto out;
8637         }
8638         if (pid == 0) {
8639                 if (!switch_ids(10000, 10000))
8640                         die("failure: switch_ids");
8641
8642                 /* create regular file via open() */
8643                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8644                 if (file1_fd < 0)
8645                         die("failure: create");
8646
8647                 if (ftruncate(file1_fd, 10000))
8648                         die("failure: ftruncate");
8649
8650                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8651                         die("failure: check ownership");
8652
8653                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8654                         die("failure: expected_file_size");
8655
8656                 if (ftruncate(file1_fd, 0))
8657                         die("failure: ftruncate");
8658
8659                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8660                         die("failure: check ownership");
8661
8662                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8663                         die("failure: expected_file_size");
8664
8665                 exit(EXIT_SUCCESS);
8666         }
8667         if (wait_for_pid(pid))
8668                 goto out;
8669
8670         pid = fork();
8671         if (pid < 0) {
8672                 log_stderr("failure: fork");
8673                 goto out;
8674         }
8675         if (pid == 0) {
8676                 int file1_fd2 = -EBADF;
8677
8678                 /* create regular file via open() */
8679                 file1_fd2 = openat(open_tree_fd, FILE1, O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8680                 if (file1_fd2 < 0)
8681                         die("failure: create");
8682
8683                 if (ftruncate(file1_fd2, 10000))
8684                         die("failure: ftruncate");
8685
8686                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8687                         die("failure: check ownership");
8688
8689                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8690                         die("failure: expected_file_size");
8691
8692                 if (ftruncate(file1_fd2, 0))
8693                         die("failure: ftruncate");
8694
8695                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 10000, 10000))
8696                         die("failure: check ownership");
8697
8698                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8699                         die("failure: expected_file_size");
8700
8701                 exit(EXIT_SUCCESS);
8702         }
8703         if (wait_for_pid(pid))
8704                 goto out;
8705
8706         fret = 0;
8707         log_debug("Ran test");
8708 out:
8709         safe_close(file1_fd);
8710         safe_close(open_tree_fd);
8711
8712         return fret;
8713 }
8714
8715 static int setattr_truncate_idmapped_in_userns(void)
8716 {
8717         int fret = -1;
8718         int file1_fd = -EBADF, open_tree_fd = -EBADF;
8719         struct mount_attr attr = {
8720                 .attr_set = MOUNT_ATTR_IDMAP,
8721         };
8722         pid_t pid;
8723
8724         /* Changing mount properties on a detached mount. */
8725         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
8726         if (attr.userns_fd < 0) {
8727                 log_stderr("failure: get_userns_fd");
8728                 goto out;
8729         }
8730
8731         open_tree_fd = sys_open_tree(t_dir1_fd, "",
8732                                      AT_EMPTY_PATH |
8733                                      AT_NO_AUTOMOUNT |
8734                                      AT_SYMLINK_NOFOLLOW |
8735                                      OPEN_TREE_CLOEXEC |
8736                                      OPEN_TREE_CLONE);
8737         if (open_tree_fd < 0) {
8738                 log_stderr("failure: sys_open_tree");
8739                 goto out;
8740         }
8741
8742         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
8743                 log_stderr("failure: sys_mount_setattr");
8744                 goto out;
8745         }
8746
8747         pid = fork();
8748         if (pid < 0) {
8749                 log_stderr("failure: fork");
8750                 goto out;
8751         }
8752         if (pid == 0) {
8753                 if (!switch_userns(attr.userns_fd, 0, 0, false))
8754                         die("failure: switch_userns");
8755
8756                 /* create regular file via open() */
8757                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8758                 if (file1_fd < 0)
8759                         die("failure: create");
8760
8761                 if (ftruncate(file1_fd, 10000))
8762                         die("failure: ftruncate");
8763
8764                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8765                         die("failure: check ownership");
8766
8767                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8768                         die("failure: expected_file_size");
8769
8770                 if (ftruncate(file1_fd, 0))
8771                         die("failure: ftruncate");
8772
8773                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8774                         die("failure: check ownership");
8775
8776                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8777                         die("failure: expected_file_size");
8778
8779                 if (unlinkat(open_tree_fd, FILE1, 0))
8780                         die("failure: delete");
8781
8782                 exit(EXIT_SUCCESS);
8783         }
8784         if (wait_for_pid(pid))
8785                 goto out;
8786
8787         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8788                 log_stderr("failure: fchownat");
8789                 goto out;
8790         }
8791
8792         if (fchownat(t_dir1_fd, "", -1, 1000, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8793                 log_stderr("failure: fchownat");
8794                 goto out;
8795         }
8796
8797         pid = fork();
8798         if (pid < 0) {
8799                 log_stderr("failure: fork");
8800                 goto out;
8801         }
8802         if (pid == 0) {
8803                 if (!caps_supported()) {
8804                         log_debug("skip: capability library not installed");
8805                         exit(EXIT_SUCCESS);
8806                 }
8807
8808                 if (!switch_userns(attr.userns_fd, 0, 0, true))
8809                         die("failure: switch_userns");
8810
8811                 /* create regular file via open() */
8812                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8813                 if (file1_fd < 0)
8814                         die("failure: create");
8815
8816                 if (ftruncate(file1_fd, 10000))
8817                         die("failure: ftruncate");
8818
8819                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8820                         die("failure: check ownership");
8821
8822                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8823                         die("failure: expected_file_size");
8824
8825                 if (ftruncate(file1_fd, 0))
8826                         die("failure: ftruncate");
8827
8828                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0))
8829                         die("failure: check ownership");
8830
8831                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8832                         die("failure: expected_file_size");
8833
8834                 if (unlinkat(open_tree_fd, FILE1, 0))
8835                         die("failure: delete");
8836
8837                 exit(EXIT_SUCCESS);
8838         }
8839         if (wait_for_pid(pid))
8840                 goto out;
8841
8842         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8843                 log_stderr("failure: fchownat");
8844                 goto out;
8845         }
8846
8847         if (fchownat(t_dir1_fd, "", -1, 0, AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) {
8848                 log_stderr("failure: fchownat");
8849                 goto out;
8850         }
8851
8852         pid = fork();
8853         if (pid < 0) {
8854                 log_stderr("failure: fork");
8855                 goto out;
8856         }
8857         if (pid == 0) {
8858                 if (!caps_supported()) {
8859                         log_debug("skip: capability library not installed");
8860                         exit(EXIT_SUCCESS);
8861                 }
8862
8863                 if (!switch_userns(attr.userns_fd, 0, 1000, true))
8864                         die("failure: switch_userns");
8865
8866                 /* create regular file via open() */
8867                 file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_RDWR | O_CLOEXEC, S_IXGRP | S_ISGID);
8868                 if (file1_fd < 0)
8869                         die("failure: create");
8870
8871                 if (ftruncate(file1_fd, 10000))
8872                         die("failure: ftruncate");
8873
8874                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8875                         die("failure: check ownership");
8876
8877                 if (!expected_file_size(open_tree_fd, FILE1, 0, 10000))
8878                         die("failure: expected_file_size");
8879
8880                 if (ftruncate(file1_fd, 0))
8881                         die("failure: ftruncate");
8882
8883                 if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 1000))
8884                         die("failure: check ownership");
8885
8886                 if (!expected_file_size(open_tree_fd, FILE1, 0, 0))
8887                         die("failure: expected_file_size");
8888
8889                 if (unlinkat(open_tree_fd, FILE1, 0))
8890                         die("failure: delete");
8891
8892                 exit(EXIT_SUCCESS);
8893         }
8894         if (wait_for_pid(pid))
8895                 goto out;
8896
8897         fret = 0;
8898         log_debug("Ran test");
8899 out:
8900         safe_close(attr.userns_fd);
8901         safe_close(file1_fd);
8902         safe_close(open_tree_fd);
8903
8904         return fret;
8905 }
8906
8907 static int nested_userns(void)
8908 {
8909         int fret = -1;
8910         int ret;
8911         pid_t pid;
8912         unsigned int id;
8913         struct list *it, *next;
8914         struct userns_hierarchy hierarchy[] = {
8915                 { .level = 1, .fd_userns = -EBADF, },
8916                 { .level = 2, .fd_userns = -EBADF, },
8917                 { .level = 3, .fd_userns = -EBADF, },
8918                 { .level = 4, .fd_userns = -EBADF, },
8919                 /* Dummy entry that marks the end. */
8920                 { .level = MAX_USERNS_LEVEL, .fd_userns = -EBADF, },
8921         };
8922         struct mount_attr attr_level1 = {
8923                 .attr_set       = MOUNT_ATTR_IDMAP,
8924                 .userns_fd      = -EBADF,
8925         };
8926         struct mount_attr attr_level2 = {
8927                 .attr_set       = MOUNT_ATTR_IDMAP,
8928                 .userns_fd      = -EBADF,
8929         };
8930         struct mount_attr attr_level3 = {
8931                 .attr_set       = MOUNT_ATTR_IDMAP,
8932                 .userns_fd      = -EBADF,
8933         };
8934         struct mount_attr attr_level4 = {
8935                 .attr_set       = MOUNT_ATTR_IDMAP,
8936                 .userns_fd      = -EBADF,
8937         };
8938         int fd_dir1 = -EBADF,
8939             fd_open_tree_level1 = -EBADF,
8940             fd_open_tree_level2 = -EBADF,
8941             fd_open_tree_level3 = -EBADF,
8942             fd_open_tree_level4 = -EBADF;
8943         const unsigned int id_file_range = 10000;
8944
8945         list_init(&hierarchy[0].id_map);
8946         list_init(&hierarchy[1].id_map);
8947         list_init(&hierarchy[2].id_map);
8948         list_init(&hierarchy[3].id_map);
8949
8950         /*
8951          * Give a large map to the outermost user namespace so we can create
8952          * comfortable nested maps.
8953          */
8954         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_UID);
8955         if (ret) {
8956                 log_stderr("failure: adding uidmap for userns at level 1");
8957                 goto out;
8958         }
8959
8960         ret = add_map_entry(&hierarchy[0].id_map, 1000000, 0, 1000000000, ID_TYPE_GID);
8961         if (ret) {
8962                 log_stderr("failure: adding gidmap for userns at level 1");
8963                 goto out;
8964         }
8965
8966         /* This is uid:0->2000000:100000000 in init userns. */
8967         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_UID);
8968         if (ret) {
8969                 log_stderr("failure: adding uidmap for userns at level 2");
8970                 goto out;
8971         }
8972
8973         /* This is gid:0->2000000:100000000 in init userns. */
8974         ret = add_map_entry(&hierarchy[1].id_map, 1000000, 0, 100000000, ID_TYPE_GID);
8975         if (ret) {
8976                 log_stderr("failure: adding gidmap for userns at level 2");
8977                 goto out;
8978         }
8979
8980         /* This is uid:0->3000000:999 in init userns. */
8981         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_UID);
8982         if (ret) {
8983                 log_stderr("failure: adding uidmap for userns at level 3");
8984                 goto out;
8985         }
8986
8987         /* This is gid:0->3000000:999 in the init userns. */
8988         ret = add_map_entry(&hierarchy[2].id_map, 1000000, 0, 999, ID_TYPE_GID);
8989         if (ret) {
8990                 log_stderr("failure: adding gidmap for userns at level 3");
8991                 goto out;
8992         }
8993
8994         /* id 999 will remain unmapped. */
8995
8996         /* This is uid:1000->2001000:1 in init userns. */
8997         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_UID);
8998         if (ret) {
8999                 log_stderr("failure: adding uidmap for userns at level 3");
9000                 goto out;
9001         }
9002
9003         /* This is gid:1000->2001000:1 in init userns. */
9004         ret = add_map_entry(&hierarchy[2].id_map, 1000, 1000, 1, ID_TYPE_GID);
9005         if (ret) {
9006                 log_stderr("failure: adding gidmap for userns at level 3");
9007                 goto out;
9008         }
9009
9010         /* This is uid:1001->3001001:10000 in init userns. */
9011         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_UID);
9012         if (ret) {
9013                 log_stderr("failure: adding uidmap for userns at level 3");
9014                 goto out;
9015         }
9016
9017         /* This is gid:1001->3001001:10000 in init userns. */
9018         ret = add_map_entry(&hierarchy[2].id_map, 1001001, 1001, 10000000, ID_TYPE_GID);
9019         if (ret) {
9020                 log_stderr("failure: adding gidmap for userns at level 3");
9021                 goto out;
9022         }
9023
9024         /* Don't write a mapping in the 4th userns. */
9025         list_empty(&hierarchy[4].id_map);
9026
9027         /* Create the actual userns hierarchy. */
9028         ret = create_userns_hierarchy(hierarchy);
9029         if (ret) {
9030                 log_stderr("failure: create userns hierarchy");
9031                 goto out;
9032         }
9033
9034         attr_level1.userns_fd = hierarchy[0].fd_userns;
9035         attr_level2.userns_fd = hierarchy[1].fd_userns;
9036         attr_level3.userns_fd = hierarchy[2].fd_userns;
9037         attr_level4.userns_fd = hierarchy[3].fd_userns;
9038
9039         /*
9040          * Create one directory where we create files for each uid/gid within
9041          * the first userns.
9042          */
9043         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
9044                 log_stderr("failure: mkdirat");
9045                 goto out;
9046         }
9047
9048         fd_dir1 = openat(t_dir1_fd, DIR1, O_DIRECTORY | O_CLOEXEC);
9049         if (fd_dir1 < 0) {
9050                 log_stderr("failure: openat");
9051                 goto out;
9052         }
9053
9054         for (id = 0; id <= id_file_range; id++) {
9055                 char file[256];
9056
9057                 snprintf(file, sizeof(file), DIR1 "/" FILE1 "_%u", id);
9058
9059                 if (mknodat(t_dir1_fd, file, S_IFREG | 0644, 0)) {
9060                         log_stderr("failure: create %s", file);
9061                         goto out;
9062                 }
9063
9064                 if (fchownat(t_dir1_fd, file, id, id, AT_SYMLINK_NOFOLLOW)) {
9065                         log_stderr("failure: fchownat %s", file);
9066                         goto out;
9067                 }
9068
9069                 if (!expected_uid_gid(t_dir1_fd, file, 0, id, id)) {
9070                         log_stderr("failure: check ownership %s", file);
9071                         goto out;
9072                 }
9073         }
9074
9075         /* Create detached mounts for all the user namespaces. */
9076         fd_open_tree_level1 = sys_open_tree(t_dir1_fd, DIR1,
9077                                             AT_NO_AUTOMOUNT |
9078                                             AT_SYMLINK_NOFOLLOW |
9079                                             OPEN_TREE_CLOEXEC |
9080                                             OPEN_TREE_CLONE);
9081         if (fd_open_tree_level1 < 0) {
9082                 log_stderr("failure: sys_open_tree");
9083                 goto out;
9084         }
9085
9086         fd_open_tree_level2 = sys_open_tree(t_dir1_fd, DIR1,
9087                                             AT_NO_AUTOMOUNT |
9088                                             AT_SYMLINK_NOFOLLOW |
9089                                             OPEN_TREE_CLOEXEC |
9090                                             OPEN_TREE_CLONE);
9091         if (fd_open_tree_level2 < 0) {
9092                 log_stderr("failure: sys_open_tree");
9093                 goto out;
9094         }
9095
9096         fd_open_tree_level3 = sys_open_tree(t_dir1_fd, DIR1,
9097                                             AT_NO_AUTOMOUNT |
9098                                             AT_SYMLINK_NOFOLLOW |
9099                                             OPEN_TREE_CLOEXEC |
9100                                             OPEN_TREE_CLONE);
9101         if (fd_open_tree_level3 < 0) {
9102                 log_stderr("failure: sys_open_tree");
9103                 goto out;
9104         }
9105
9106         fd_open_tree_level4 = sys_open_tree(t_dir1_fd, DIR1,
9107                                             AT_NO_AUTOMOUNT |
9108                                             AT_SYMLINK_NOFOLLOW |
9109                                             OPEN_TREE_CLOEXEC |
9110                                             OPEN_TREE_CLONE);
9111         if (fd_open_tree_level4 < 0) {
9112                 log_stderr("failure: sys_open_tree");
9113                 goto out;
9114         }
9115
9116         /* Turn detached mounts into detached idmapped mounts. */
9117         if (sys_mount_setattr(fd_open_tree_level1, "", AT_EMPTY_PATH,
9118                               &attr_level1, sizeof(attr_level1))) {
9119                 log_stderr("failure: sys_mount_setattr");
9120                 goto out;
9121         }
9122
9123         if (sys_mount_setattr(fd_open_tree_level2, "", AT_EMPTY_PATH,
9124                               &attr_level2, sizeof(attr_level2))) {
9125                 log_stderr("failure: sys_mount_setattr");
9126                 goto out;
9127         }
9128
9129         if (sys_mount_setattr(fd_open_tree_level3, "", AT_EMPTY_PATH,
9130                               &attr_level3, sizeof(attr_level3))) {
9131                 log_stderr("failure: sys_mount_setattr");
9132                 goto out;
9133         }
9134
9135         if (sys_mount_setattr(fd_open_tree_level4, "", AT_EMPTY_PATH,
9136                               &attr_level4, sizeof(attr_level4))) {
9137                 log_stderr("failure: sys_mount_setattr");
9138                 goto out;
9139         }
9140
9141         /* Verify that ownership looks correct for callers in the init userns. */
9142         for (id = 0; id <= id_file_range; id++) {
9143                 bool bret;
9144                 unsigned int id_level1, id_level2, id_level3;
9145                 char file[256];
9146
9147                 snprintf(file, sizeof(file), FILE1 "_%u", id);
9148
9149                 id_level1 = id + 1000000;
9150                 if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1)) {
9151                         log_stderr("failure: check ownership %s", file);
9152                         goto out;
9153                 }
9154
9155                 id_level2 = id + 2000000;
9156                 if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2)) {
9157                         log_stderr("failure: check ownership %s", file);
9158                         goto out;
9159                 }
9160
9161                 if (id == 999) {
9162                         /* This id is unmapped. */
9163                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9164                 } else if (id == 1000) {
9165                         id_level3 = id + 2000000; /* We punched a hole in the map at 1000. */
9166                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9167                 } else {
9168                         id_level3 = id + 3000000; /* Rest is business as usual. */
9169                         bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9170                 }
9171                 if (!bret) {
9172                         log_stderr("failure: check ownership %s", file);
9173                         goto out;
9174                 }
9175
9176                 if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid)) {
9177                         log_stderr("failure: check ownership %s", file);
9178                         goto out;
9179                 }
9180         }
9181
9182         /* Verify that ownership looks correct for callers in the first userns. */
9183         pid = fork();
9184         if (pid < 0) {
9185                 log_stderr("failure: fork");
9186                 goto out;
9187         }
9188         if (pid == 0) {
9189                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9190                         die("failure: switch_userns");
9191
9192                 for (id = 0; id <= id_file_range; id++) {
9193                         bool bret;
9194                         unsigned int id_level1, id_level2, id_level3;
9195                         char file[256];
9196
9197                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9198
9199                         id_level1 = id;
9200                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9201                                 die("failure: check ownership %s", file);
9202
9203                         id_level2 = id + 1000000;
9204                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9205                                 die("failure: check ownership %s", file);
9206
9207                         if (id == 999) {
9208                                 /* This id is unmapped. */
9209                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9210                         } else if (id == 1000) {
9211                                 id_level3 = id + 1000000; /* We punched a hole in the map at 1000. */
9212                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9213                         } else {
9214                                 id_level3 = id + 2000000; /* Rest is business as usual. */
9215                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9216                         }
9217                         if (!bret)
9218                                 die("failure: check ownership %s", file);
9219
9220                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9221                                 die("failure: check ownership %s", file);
9222                 }
9223
9224                 exit(EXIT_SUCCESS);
9225         }
9226         if (wait_for_pid(pid))
9227                 goto out;
9228
9229         /* Verify that ownership looks correct for callers in the second userns. */
9230         pid = fork();
9231         if (pid < 0) {
9232                 log_stderr("failure: fork");
9233                 goto out;
9234         }
9235         if (pid == 0) {
9236                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9237                         die("failure: switch_userns");
9238
9239                 for (id = 0; id <= id_file_range; id++) {
9240                         bool bret;
9241                         unsigned int id_level2, id_level3;
9242                         char file[256];
9243
9244                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9245
9246                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9247                                 die("failure: check ownership %s", file);
9248
9249                         id_level2 = id;
9250                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9251                                 die("failure: check ownership %s", file);
9252
9253                         if (id == 999) {
9254                                 /* This id is unmapped. */
9255                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9256                         } else if (id == 1000) {
9257                                 id_level3 = id; /* We punched a hole in the map at 1000. */
9258                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9259                         } else {
9260                                 id_level3 = id + 1000000; /* Rest is business as usual. */
9261                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9262                         }
9263                         if (!bret)
9264                                 die("failure: check ownership %s", file);
9265
9266                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9267                                 die("failure: check ownership %s", file);
9268                 }
9269
9270                 exit(EXIT_SUCCESS);
9271         }
9272         if (wait_for_pid(pid))
9273                 goto out;
9274
9275         /* Verify that ownership looks correct for callers in the third userns. */
9276         pid = fork();
9277         if (pid < 0) {
9278                 log_stderr("failure: fork");
9279                 goto out;
9280         }
9281         if (pid == 0) {
9282                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9283                         die("failure: switch_userns");
9284
9285                 for (id = 0; id <= id_file_range; id++) {
9286                         bool bret;
9287                         unsigned int id_level2, id_level3;
9288                         char file[256];
9289
9290                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9291
9292                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9293                                 die("failure: check ownership %s", file);
9294
9295                         if (id == 1000) {
9296                                 /*
9297                                  * The idmapping of the third userns has a hole
9298                                  * at uid/gid 1000. That means:
9299                                  * - 1000->userns_0(2000000) // init userns
9300                                  * - 1000->userns_1(2000000) // level 1
9301                                  * - 1000->userns_2(1000000) // level 2
9302                                  * - 1000->userns_3(1000)    // level 3 (because level 3 has a hole)
9303                                  */
9304                                 id_level2 = id;
9305                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2);
9306                         } else {
9307                                 bret = expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid);
9308                         }
9309                         if (!bret)
9310                                 die("failure: check ownership %s", file);
9311
9312
9313                         if (id == 999) {
9314                                 /* This id is unmapped. */
9315                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9316                         } else {
9317                                 id_level3 = id; /* Rest is business as usual. */
9318                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9319                         }
9320                         if (!bret)
9321                                 die("failure: check ownership %s", file);
9322
9323                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9324                                 die("failure: check ownership %s", file);
9325                 }
9326
9327                 exit(EXIT_SUCCESS);
9328         }
9329         if (wait_for_pid(pid))
9330                 goto out;
9331
9332         /* Verify that ownership looks correct for callers in the fourth userns. */
9333         pid = fork();
9334         if (pid < 0) {
9335                 log_stderr("failure: fork");
9336                 goto out;
9337         }
9338         if (pid == 0) {
9339                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9340                         die("failure: switch_userns");
9341
9342                 for (id = 0; id <= id_file_range; id++) {
9343                         char file[256];
9344
9345                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9346
9347                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9348                                 die("failure: check ownership %s", file);
9349
9350                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9351                                 die("failure: check ownership %s", file);
9352
9353                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9354                                 die("failure: check ownership %s", file);
9355
9356                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9357                                 die("failure: check ownership %s", file);
9358                 }
9359
9360                 exit(EXIT_SUCCESS);
9361         }
9362         if (wait_for_pid(pid))
9363                 goto out;
9364
9365         /* Verify that chown works correctly for callers in the first userns. */
9366         pid = fork();
9367         if (pid < 0) {
9368                 log_stderr("failure: fork");
9369                 goto out;
9370         }
9371         if (pid == 0) {
9372                 if (!switch_userns(attr_level1.userns_fd, 0, 0, false))
9373                         die("failure: switch_userns");
9374
9375                 for (id = 0; id <= id_file_range; id++) {
9376                         bool bret;
9377                         unsigned int id_level1, id_level2, id_level3, id_new;
9378                         char file[256];
9379
9380                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9381
9382                         id_new = id + 1;
9383                         if (fchownat(fd_open_tree_level1, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9384                                 die("failure: fchownat %s", file);
9385
9386                         id_level1 = id_new;
9387                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, id_level1, id_level1))
9388                                 die("failure: check ownership %s", file);
9389
9390                         id_level2 = id_new + 1000000;
9391                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9392                                 die("failure: check ownership %s", file);
9393
9394                         if (id_new == 999) {
9395                                 /* This id is unmapped. */
9396                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9397                         } else if (id_new == 1000) {
9398                                 id_level3 = id_new + 1000000; /* We punched a hole in the map at 1000. */
9399                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9400                         } else {
9401                                 id_level3 = id_new + 2000000; /* Rest is business as usual. */
9402                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9403                         }
9404                         if (!bret)
9405                                 die("failure: check ownership %s", file);
9406
9407                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9408                                 die("failure: check ownership %s", file);
9409
9410                         /* Revert ownership. */
9411                         if (fchownat(fd_open_tree_level1, file, id, id, AT_SYMLINK_NOFOLLOW))
9412                                 die("failure: fchownat %s", file);
9413                 }
9414
9415                 exit(EXIT_SUCCESS);
9416         }
9417         if (wait_for_pid(pid))
9418                 goto out;
9419
9420         /* Verify that chown works correctly for callers in the second userns. */
9421         pid = fork();
9422         if (pid < 0) {
9423                 log_stderr("failure: fork");
9424                 goto out;
9425         }
9426         if (pid == 0) {
9427                 if (!switch_userns(attr_level2.userns_fd, 0, 0, false))
9428                         die("failure: switch_userns");
9429
9430                 for (id = 0; id <= id_file_range; id++) {
9431                         bool bret;
9432                         unsigned int id_level2, id_level3, id_new;
9433                         char file[256];
9434
9435                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9436
9437                         id_new = id + 1;
9438                         if (fchownat(fd_open_tree_level2, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9439                                 die("failure: fchownat %s", file);
9440
9441                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9442                                 die("failure: check ownership %s", file);
9443
9444                         id_level2 = id_new;
9445                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, id_level2, id_level2))
9446                                 die("failure: check ownership %s", file);
9447
9448                         if (id_new == 999) {
9449                                 /* This id is unmapped. */
9450                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid);
9451                         } else if (id_new == 1000) {
9452                                 id_level3 = id_new; /* We punched a hole in the map at 1000. */
9453                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9454                         } else {
9455                                 id_level3 = id_new + 1000000; /* Rest is business as usual. */
9456                                 bret = expected_uid_gid(fd_open_tree_level3, file, 0, id_level3, id_level3);
9457                         }
9458                         if (!bret)
9459                                 die("failure: check ownership %s", file);
9460
9461                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9462                                 die("failure: check ownership %s", file);
9463
9464                         /* Revert ownership. */
9465                         if (fchownat(fd_open_tree_level2, file, id, id, AT_SYMLINK_NOFOLLOW))
9466                                 die("failure: fchownat %s", file);
9467                 }
9468
9469                 exit(EXIT_SUCCESS);
9470         }
9471         if (wait_for_pid(pid))
9472                 goto out;
9473
9474         /* Verify that chown works correctly for callers in the third userns. */
9475         pid = fork();
9476         if (pid < 0) {
9477                 log_stderr("failure: fork");
9478                 goto out;
9479         }
9480         if (pid == 0) {
9481                 if (!switch_userns(attr_level3.userns_fd, 0, 0, false))
9482                         die("failure: switch_userns");
9483
9484                 for (id = 0; id <= id_file_range; id++) {
9485                         unsigned int id_new;
9486                         char file[256];
9487
9488                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9489
9490                         id_new = id + 1;
9491                         if (id_new == 999 || id_new == 1000) {
9492                                 /*
9493                                  * We can't change ownership as we can't
9494                                  * chown from or to an unmapped id.
9495                                  */
9496                                 if (!fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9497                                         die("failure: fchownat %s", file);
9498                         } else {
9499                                 if (fchownat(fd_open_tree_level3, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9500                                         die("failure: fchownat %s", file);
9501                         }
9502
9503                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9504                                 die("failure: check ownership %s", file);
9505
9506                         /* There's no id 1000 anymore as we changed ownership for id 1000 to 1001 above. */
9507                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9508                                 die("failure: check ownership %s", file);
9509
9510                         if (id_new == 999) {
9511                                 /*
9512                                  * We did not change ownership as we can't
9513                                  * chown to an unmapped id.
9514                                  */
9515                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id, id))
9516                                         die("failure: check ownership %s", file);
9517                         } else if (id_new == 1000) {
9518                                 /*
9519                                  * We did not change ownership as we can't
9520                                  * chown from an unmapped id.
9521                                  */
9522                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9523                                         die("failure: check ownership %s", file);
9524                         } else {
9525                                 if (!expected_uid_gid(fd_open_tree_level3, file, 0, id_new, id_new))
9526                                         die("failure: check ownership %s", file);
9527                         }
9528
9529                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9530                                 die("failure: check ownership %s", file);
9531
9532                         /* Revert ownership. */
9533                         if (id_new != 999 && id_new != 1000) {
9534                                 if (fchownat(fd_open_tree_level3, file, id, id, AT_SYMLINK_NOFOLLOW))
9535                                         die("failure: fchownat %s", file);
9536                         }
9537                 }
9538
9539                 exit(EXIT_SUCCESS);
9540         }
9541         if (wait_for_pid(pid))
9542                 goto out;
9543
9544         /* Verify that chown works correctly for callers in the fourth userns. */
9545         pid = fork();
9546         if (pid < 0) {
9547                 log_stderr("failure: fork");
9548                 goto out;
9549         }
9550         if (pid == 0) {
9551                 if (setns(attr_level4.userns_fd, CLONE_NEWUSER))
9552                         die("failure: switch_userns");
9553
9554                 for (id = 0; id <= id_file_range; id++) {
9555                         char file[256];
9556                         unsigned long id_new;
9557
9558                         snprintf(file, sizeof(file), FILE1 "_%u", id);
9559
9560                         id_new = id + 1;
9561                         if (!fchownat(fd_open_tree_level4, file, id_new, id_new, AT_SYMLINK_NOFOLLOW))
9562                                 die("failure: fchownat %s", file);
9563
9564                         if (!expected_uid_gid(fd_open_tree_level1, file, 0, t_overflowuid, t_overflowgid))
9565                                 die("failure: check ownership %s", file);
9566
9567                         if (!expected_uid_gid(fd_open_tree_level2, file, 0, t_overflowuid, t_overflowgid))
9568                                 die("failure: check ownership %s", file);
9569
9570                         if (!expected_uid_gid(fd_open_tree_level3, file, 0, t_overflowuid, t_overflowgid))
9571                                 die("failure: check ownership %s", file);
9572
9573                         if (!expected_uid_gid(fd_open_tree_level4, file, 0, t_overflowuid, t_overflowgid))
9574                                 die("failure: check ownership %s", file);
9575
9576                 }
9577
9578                 exit(EXIT_SUCCESS);
9579         }
9580         if (wait_for_pid(pid))
9581                 goto out;
9582
9583         fret = 0;
9584         log_debug("Ran test");
9585
9586 out:
9587         list_for_each_safe(it, &hierarchy[0].id_map, next) {
9588                 list_del(it);
9589                 free(it->elem);
9590                 free(it);
9591         }
9592
9593         list_for_each_safe(it, &hierarchy[1].id_map, next) {
9594                 list_del(it);
9595                 free(it->elem);
9596                 free(it);
9597         }
9598
9599         list_for_each_safe(it, &hierarchy[2].id_map, next) {
9600                 list_del(it);
9601                 free(it->elem);
9602                 free(it);
9603         }
9604
9605         safe_close(hierarchy[0].fd_userns);
9606         safe_close(hierarchy[1].fd_userns);
9607         safe_close(hierarchy[2].fd_userns);
9608         safe_close(fd_dir1);
9609         safe_close(fd_open_tree_level1);
9610         safe_close(fd_open_tree_level2);
9611         safe_close(fd_open_tree_level3);
9612         safe_close(fd_open_tree_level4);
9613         return fret;
9614 }
9615
9616 #ifndef HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS
9617
9618 #ifndef BTRFS_PATH_NAME_MAX
9619 #define BTRFS_PATH_NAME_MAX 4087
9620 #endif
9621
9622 struct btrfs_ioctl_vol_args {
9623         __s64 fd;
9624         char name[BTRFS_PATH_NAME_MAX + 1];
9625 };
9626 #endif
9627
9628 #ifndef HAVE_STRUCT_BTRFS_QGROUP_LIMIT
9629 struct btrfs_qgroup_limit {
9630         __u64 flags;
9631         __u64 max_rfer;
9632         __u64 max_excl;
9633         __u64 rsv_rfer;
9634         __u64 rsv_excl;
9635 };
9636 #endif
9637
9638 #ifndef HAVE_STRUCT_BTRFS_QGROUP_INHERIT
9639 struct btrfs_qgroup_inherit {
9640         __u64 flags;
9641         __u64 num_qgroups;
9642         __u64 num_ref_copies;
9643         __u64 num_excl_copies;
9644         struct btrfs_qgroup_limit lim;
9645         __u64 qgroups[0];
9646 };
9647 #endif
9648
9649 #if !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2) || !defined(HAVE_STRUCT_BTRFS_IOCTL_VOL_ARGS_V2_SUBVOLID)
9650
9651 #ifndef BTRFS_SUBVOL_NAME_MAX
9652 #define BTRFS_SUBVOL_NAME_MAX 4039
9653 #endif
9654
9655 struct btrfs_ioctl_vol_args_v2 {
9656         __s64 fd;
9657         __u64 transid;
9658         __u64 flags;
9659         union {
9660                 struct {
9661                         __u64 size;
9662                         struct btrfs_qgroup_inherit *qgroup_inherit;
9663                 };
9664                 __u64 unused[4];
9665         };
9666         union {
9667                 char name[BTRFS_SUBVOL_NAME_MAX + 1];
9668                 __u64 devid;
9669                 __u64 subvolid;
9670         };
9671 };
9672 #endif
9673
9674 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_ARGS
9675
9676 #ifndef BTRFS_INO_LOOKUP_PATH_MAX
9677 #define BTRFS_INO_LOOKUP_PATH_MAX 4080
9678 #endif
9679 struct btrfs_ioctl_ino_lookup_args {
9680         __u64 treeid;
9681         __u64 objectid;
9682         char name[BTRFS_INO_LOOKUP_PATH_MAX];
9683 };
9684 #endif
9685
9686 #ifndef HAVE_STRUCT_BTRFS_IOCTL_INO_LOOKUP_USER_ARGS
9687
9688 #ifndef BTRFS_VOL_NAME_MAX
9689 #define BTRFS_VOL_NAME_MAX 255
9690 #endif
9691
9692 #ifndef BTRFS_INO_LOOKUP_USER_PATH_MAX
9693 #define BTRFS_INO_LOOKUP_USER_PATH_MAX (4080 - BTRFS_VOL_NAME_MAX - 1)
9694 #endif
9695
9696 struct btrfs_ioctl_ino_lookup_user_args {
9697         __u64 dirid;
9698         __u64 treeid;
9699         char name[BTRFS_VOL_NAME_MAX + 1];
9700         char path[BTRFS_INO_LOOKUP_USER_PATH_MAX];
9701 };
9702 #endif
9703
9704 #ifndef HAVE_STRUCT_BTRFS_IOCTL_GET_SUBVOL_ROOTREF_ARGS
9705
9706 #ifndef BTRFS_MAX_ROOTREF_BUFFER_NUM
9707 #define BTRFS_MAX_ROOTREF_BUFFER_NUM 255
9708 #endif
9709
9710 struct btrfs_ioctl_get_subvol_rootref_args {
9711         __u64 min_treeid;
9712         struct {
9713                 __u64 treeid;
9714                 __u64 dirid;
9715         } rootref[BTRFS_MAX_ROOTREF_BUFFER_NUM];
9716         __u8 num_items;
9717         __u8 align[7];
9718 };
9719 #endif
9720
9721 #ifndef BTRFS_IOCTL_MAGIC
9722 #define BTRFS_IOCTL_MAGIC 0x94
9723 #endif
9724
9725 #ifndef BTRFS_IOC_SNAP_DESTROY
9726 #define BTRFS_IOC_SNAP_DESTROY \
9727         _IOW(BTRFS_IOCTL_MAGIC, 15, struct btrfs_ioctl_vol_args)
9728 #endif
9729
9730 #ifndef BTRFS_IOC_SNAP_DESTROY_V2
9731 #define BTRFS_IOC_SNAP_DESTROY_V2 \
9732         _IOW(BTRFS_IOCTL_MAGIC, 63, struct btrfs_ioctl_vol_args_v2)
9733 #endif
9734
9735 #ifndef BTRFS_IOC_SNAP_CREATE_V2
9736 #define BTRFS_IOC_SNAP_CREATE_V2 \
9737         _IOW(BTRFS_IOCTL_MAGIC, 23, struct btrfs_ioctl_vol_args_v2)
9738 #endif
9739
9740 #ifndef BTRFS_IOC_SUBVOL_CREATE_V2
9741 #define BTRFS_IOC_SUBVOL_CREATE_V2 \
9742         _IOW(BTRFS_IOCTL_MAGIC, 24, struct btrfs_ioctl_vol_args_v2)
9743 #endif
9744
9745 #ifndef BTRFS_IOC_SUBVOL_GETFLAGS
9746 #define BTRFS_IOC_SUBVOL_GETFLAGS _IOR(BTRFS_IOCTL_MAGIC, 25, __u64)
9747 #endif
9748
9749 #ifndef BTRFS_IOC_SUBVOL_SETFLAGS
9750 #define BTRFS_IOC_SUBVOL_SETFLAGS _IOW(BTRFS_IOCTL_MAGIC, 26, __u64)
9751 #endif
9752
9753 #ifndef BTRFS_IOC_INO_LOOKUP
9754 #define BTRFS_IOC_INO_LOOKUP \
9755         _IOWR(BTRFS_IOCTL_MAGIC, 18, struct btrfs_ioctl_ino_lookup_args)
9756 #endif
9757
9758 #ifndef BTRFS_IOC_INO_LOOKUP_USER
9759 #define BTRFS_IOC_INO_LOOKUP_USER \
9760         _IOWR(BTRFS_IOCTL_MAGIC, 62, struct btrfs_ioctl_ino_lookup_user_args)
9761 #endif
9762
9763 #ifndef BTRFS_IOC_GET_SUBVOL_ROOTREF
9764 #define BTRFS_IOC_GET_SUBVOL_ROOTREF \
9765         _IOWR(BTRFS_IOCTL_MAGIC, 61, struct btrfs_ioctl_get_subvol_rootref_args)
9766 #endif
9767
9768 #ifndef BTRFS_SUBVOL_RDONLY
9769 #define BTRFS_SUBVOL_RDONLY (1ULL << 1)
9770 #endif
9771
9772 #ifndef BTRFS_SUBVOL_SPEC_BY_ID
9773 #define BTRFS_SUBVOL_SPEC_BY_ID (1ULL << 4)
9774 #endif
9775
9776 #ifndef BTRFS_FIRST_FREE_OBJECTID
9777 #define BTRFS_FIRST_FREE_OBJECTID 256ULL
9778 #endif
9779
9780 static int btrfs_delete_subvolume(int parent_fd, const char *name)
9781 {
9782         struct btrfs_ioctl_vol_args args = {};
9783         size_t len;
9784         int ret;
9785
9786         len = strlen(name);
9787         if (len >= sizeof(args.name))
9788                 return -ENAMETOOLONG;
9789
9790         memcpy(args.name, name, len);
9791         args.name[len] = '\0';
9792
9793         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY, &args);
9794         if (ret < 0)
9795                 return -1;
9796
9797         return 0;
9798 }
9799
9800 static int btrfs_delete_subvolume_id(int parent_fd, uint64_t subvolid)
9801 {
9802         struct btrfs_ioctl_vol_args_v2 args = {};
9803         int ret;
9804
9805         args.flags = BTRFS_SUBVOL_SPEC_BY_ID;
9806         args.subvolid = subvolid;
9807
9808         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_DESTROY_V2, &args);
9809         if (ret < 0)
9810                 return -1;
9811
9812         return 0;
9813 }
9814
9815 static int btrfs_create_subvolume(int parent_fd, const char *name)
9816 {
9817         struct btrfs_ioctl_vol_args_v2 args = {};
9818         size_t len;
9819         int ret;
9820
9821         len = strlen(name);
9822         if (len >= sizeof(args.name))
9823                 return -ENAMETOOLONG;
9824
9825         memcpy(args.name, name, len);
9826         args.name[len] = '\0';
9827
9828         ret = ioctl(parent_fd, BTRFS_IOC_SUBVOL_CREATE_V2, &args);
9829         if (ret < 0)
9830                 return -1;
9831
9832         return 0;
9833 }
9834
9835 static int btrfs_create_snapshot(int fd, int parent_fd, const char *name,
9836                                  int flags)
9837 {
9838         struct btrfs_ioctl_vol_args_v2 args = {
9839                 .fd = fd,
9840         };
9841         size_t len;
9842         int ret;
9843
9844         if (flags & ~BTRFS_SUBVOL_RDONLY)
9845                 return -EINVAL;
9846
9847         len = strlen(name);
9848         if (len >= sizeof(args.name))
9849                 return -ENAMETOOLONG;
9850         memcpy(args.name, name, len);
9851         args.name[len] = '\0';
9852
9853         if (flags & BTRFS_SUBVOL_RDONLY)
9854                 args.flags |= BTRFS_SUBVOL_RDONLY;
9855         ret = ioctl(parent_fd, BTRFS_IOC_SNAP_CREATE_V2, &args);
9856         if (ret < 0)
9857                 return -1;
9858
9859         return 0;
9860 }
9861
9862 static int btrfs_get_subvolume_ro(int fd, bool *read_only_ret)
9863 {
9864         uint64_t flags;
9865         int ret;
9866
9867         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9868         if (ret < 0)
9869                 return -1;
9870
9871         *read_only_ret = flags & BTRFS_SUBVOL_RDONLY;
9872         return 0;
9873 }
9874
9875 static int btrfs_set_subvolume_ro(int fd, bool read_only)
9876 {
9877         uint64_t flags;
9878         int ret;
9879
9880         ret = ioctl(fd, BTRFS_IOC_SUBVOL_GETFLAGS, &flags);
9881         if (ret < 0)
9882                 return -1;
9883
9884         if (read_only)
9885                 flags |= BTRFS_SUBVOL_RDONLY;
9886         else
9887                 flags &= ~BTRFS_SUBVOL_RDONLY;
9888
9889         ret = ioctl(fd, BTRFS_IOC_SUBVOL_SETFLAGS, &flags);
9890         if (ret < 0)
9891                 return -1;
9892
9893         return 0;
9894 }
9895
9896 static int btrfs_get_subvolume_id(int fd, uint64_t *id_ret)
9897 {
9898         struct btrfs_ioctl_ino_lookup_args args = {
9899             .treeid = 0,
9900             .objectid = BTRFS_FIRST_FREE_OBJECTID,
9901         };
9902         int ret;
9903
9904         ret = ioctl(fd, BTRFS_IOC_INO_LOOKUP, &args);
9905         if (ret < 0)
9906                 return -1;
9907
9908         *id_ret = args.treeid;
9909
9910         return 0;
9911 }
9912
9913 /*
9914  * The following helpers are adapted from the btrfsutils library. We can't use
9915  * the library directly since we need full control over how the subvolume
9916  * iteration happens. We need to be able to check whether unprivileged
9917  * subvolume iteration is possible, i.e. whether BTRFS_IOC_INO_LOOKUP_USER is
9918  * available and also ensure that it is actually used when looking up paths.
9919  */
9920 struct btrfs_stack {
9921         uint64_t tree_id;
9922         struct btrfs_ioctl_get_subvol_rootref_args rootref_args;
9923         size_t items_pos;
9924         size_t path_len;
9925 };
9926
9927 struct btrfs_iter {
9928         int fd;
9929         int cur_fd;
9930
9931         struct btrfs_stack *search_stack;
9932         size_t stack_len;
9933         size_t stack_capacity;
9934
9935         char *cur_path;
9936         size_t cur_path_capacity;
9937 };
9938
9939 static struct btrfs_stack *top_stack_entry(struct btrfs_iter *iter)
9940 {
9941         return &iter->search_stack[iter->stack_len - 1];
9942 }
9943
9944 static int pop_stack(struct btrfs_iter *iter)
9945 {
9946         struct btrfs_stack *top, *parent;
9947         int fd, parent_fd;
9948         size_t i;
9949
9950         if (iter->stack_len == 1) {
9951                 iter->stack_len--;
9952                 return 0;
9953         }
9954
9955         top = top_stack_entry(iter);
9956         iter->stack_len--;
9957         parent = top_stack_entry(iter);
9958
9959         fd = iter->cur_fd;
9960         for (i = parent->path_len; i < top->path_len; i++) {
9961                 if (i == 0 || iter->cur_path[i] == '/') {
9962                         parent_fd = openat(fd, "..", O_RDONLY);
9963                         if (fd != iter->cur_fd)
9964                                 close(fd);
9965                         if (parent_fd == -1)
9966                                 return -1;
9967                         fd = parent_fd;
9968                 }
9969         }
9970         if (iter->cur_fd != iter->fd)
9971                 close(iter->cur_fd);
9972         iter->cur_fd = fd;
9973
9974         return 0;
9975 }
9976
9977 static int append_stack(struct btrfs_iter *iter, uint64_t tree_id, size_t path_len)
9978 {
9979         struct btrfs_stack *entry;
9980
9981         if (iter->stack_len >= iter->stack_capacity) {
9982                 size_t new_capacity = iter->stack_capacity * 2;
9983                 struct btrfs_stack *new_search_stack;
9984 #ifdef HAVE_REALLOCARRAY
9985                 new_search_stack = reallocarray(iter->search_stack, new_capacity,
9986                                                 sizeof(*iter->search_stack));
9987 #else
9988                 new_search_stack = realloc(iter->search_stack, new_capacity * sizeof(*iter->search_stack));
9989 #endif
9990                 if (!new_search_stack)
9991                         return -ENOMEM;
9992
9993                 iter->stack_capacity = new_capacity;
9994                 iter->search_stack = new_search_stack;
9995         }
9996
9997         entry = &iter->search_stack[iter->stack_len];
9998
9999         memset(entry, 0, sizeof(*entry));
10000         entry->path_len = path_len;
10001         entry->tree_id = tree_id;
10002
10003         if (iter->stack_len) {
10004                 struct btrfs_stack *top;
10005                 char *path;
10006                 int fd;
10007
10008                 top = top_stack_entry(iter);
10009                 path = &iter->cur_path[top->path_len];
10010                 if (*path == '/')
10011                         path++;
10012                 fd = openat(iter->cur_fd, path, O_RDONLY);
10013                 if (fd == -1)
10014                         return -errno;
10015
10016                 close(iter->cur_fd);
10017                 iter->cur_fd = fd;
10018         }
10019
10020         iter->stack_len++;
10021
10022         return 0;
10023 }
10024
10025 static int btrfs_iterator_start(int fd, uint64_t top, struct btrfs_iter **ret)
10026 {
10027         struct btrfs_iter *iter;
10028         int err;
10029
10030         iter = malloc(sizeof(*iter));
10031         if (!iter)
10032                 return -ENOMEM;
10033
10034         iter->fd = fd;
10035         iter->cur_fd = fd;
10036
10037         iter->stack_len = 0;
10038         iter->stack_capacity = 4;
10039         iter->search_stack = malloc(sizeof(*iter->search_stack) *
10040                                     iter->stack_capacity);
10041         if (!iter->search_stack) {
10042                 err = -ENOMEM;
10043                 goto out_iter;
10044         }
10045
10046         iter->cur_path_capacity = 256;
10047         iter->cur_path = malloc(iter->cur_path_capacity);
10048         if (!iter->cur_path) {
10049                 err = -ENOMEM;
10050                 goto out_search_stack;
10051         }
10052
10053         err = append_stack(iter, top, 0);
10054         if (err)
10055                 goto out_cur_path;
10056
10057         *ret = iter;
10058
10059         return 0;
10060
10061 out_cur_path:
10062         free(iter->cur_path);
10063 out_search_stack:
10064         free(iter->search_stack);
10065 out_iter:
10066         free(iter);
10067         return err;
10068 }
10069
10070 static void btrfs_iterator_end(struct btrfs_iter *iter)
10071 {
10072         if (iter) {
10073                 free(iter->cur_path);
10074                 free(iter->search_stack);
10075                 if (iter->cur_fd != iter->fd)
10076                         close(iter->cur_fd);
10077                 close(iter->fd);
10078                 free(iter);
10079         }
10080 }
10081
10082 static int __append_path(struct btrfs_iter *iter, const char *name,
10083                          size_t name_len, const char *dir, size_t dir_len,
10084                          size_t *path_len_ret)
10085 {
10086         struct btrfs_stack *top = top_stack_entry(iter);
10087         size_t path_len;
10088         char *p;
10089
10090         path_len = top->path_len;
10091         /*
10092          * We need a joining slash if we have a current path and a subdirectory.
10093          */
10094         if (top->path_len && dir_len)
10095                 path_len++;
10096         path_len += dir_len;
10097         /*
10098          * We need another joining slash if we have a current path and a name,
10099          * but not if we have a subdirectory, because the lookup ioctl includes
10100          * a trailing slash.
10101          */
10102         if (top->path_len && !dir_len && name_len)
10103                 path_len++;
10104         path_len += name_len;
10105
10106         /* We need one extra character for the NUL terminator. */
10107         if (path_len + 1 > iter->cur_path_capacity) {
10108                 char *tmp = realloc(iter->cur_path, path_len + 1);
10109
10110                 if (!tmp)
10111                         return -ENOMEM;
10112                 iter->cur_path = tmp;
10113                 iter->cur_path_capacity = path_len + 1;
10114         }
10115
10116         p = iter->cur_path + top->path_len;
10117         if (top->path_len && dir_len)
10118                 *p++ = '/';
10119         memcpy(p, dir, dir_len);
10120         p += dir_len;
10121         if (top->path_len && !dir_len && name_len)
10122                 *p++ = '/';
10123         memcpy(p, name, name_len);
10124         p += name_len;
10125         *p = '\0';
10126
10127         *path_len_ret = path_len;
10128
10129         return 0;
10130 }
10131
10132 static int get_subvolume_path(struct btrfs_iter *iter, uint64_t treeid,
10133                               uint64_t dirid, size_t *path_len_ret)
10134 {
10135         struct btrfs_ioctl_ino_lookup_user_args args = {
10136                 .treeid = treeid,
10137                 .dirid = dirid,
10138         };
10139         int ret;
10140
10141         ret = ioctl(iter->cur_fd, BTRFS_IOC_INO_LOOKUP_USER, &args);
10142         if (ret == -1)
10143                 return -1;
10144
10145         return __append_path(iter, args.name, strlen(args.name), args.path,
10146                              strlen(args.path), path_len_ret);
10147 }
10148
10149 static int btrfs_iterator_next(struct btrfs_iter *iter, char **path_ret,
10150                                uint64_t *id_ret)
10151 {
10152         struct btrfs_stack *top;
10153         uint64_t treeid, dirid;
10154         size_t path_len;
10155         int ret, err;
10156
10157         for (;;) {
10158                 for (;;) {
10159                         if (iter->stack_len == 0)
10160                                 return 1;
10161
10162                         top = top_stack_entry(iter);
10163                         if (top->items_pos < top->rootref_args.num_items) {
10164                                 break;
10165                         } else {
10166                                 ret = ioctl(iter->cur_fd,
10167                                             BTRFS_IOC_GET_SUBVOL_ROOTREF,
10168                                             &top->rootref_args);
10169                                 if (ret == -1 && errno != EOVERFLOW)
10170                                         return -1;
10171                                 top->items_pos = 0;
10172
10173                                 if (top->rootref_args.num_items == 0) {
10174                                         err = pop_stack(iter);
10175                                         if (err)
10176                                                 return err;
10177                                 }
10178                         }
10179                 }
10180
10181                 treeid = top->rootref_args.rootref[top->items_pos].treeid;
10182                 dirid = top->rootref_args.rootref[top->items_pos].dirid;
10183                 top->items_pos++;
10184                 err = get_subvolume_path(iter, treeid, dirid, &path_len);
10185                 if (err) {
10186                         /* Skip the subvolume if we can't access it. */
10187                         if (errno == EACCES)
10188                                 continue;
10189                         return err;
10190                 }
10191
10192                 err = append_stack(iter, treeid, path_len);
10193                 if (err) {
10194                         /*
10195                          * Skip the subvolume if it does not exist (which can
10196                          * happen if there is another filesystem mounted over a
10197                          * parent directory) or we don't have permission to
10198                          * access it.
10199                          */
10200                         if (errno == ENOENT || errno == EACCES)
10201                                 continue;
10202                         return err;
10203                 }
10204
10205                 top = top_stack_entry(iter);
10206                 goto out;
10207         }
10208
10209 out:
10210         if (path_ret) {
10211                 *path_ret = malloc(top->path_len + 1);
10212                 if (!*path_ret)
10213                         return -ENOMEM;
10214                 memcpy(*path_ret, iter->cur_path, top->path_len);
10215                 (*path_ret)[top->path_len] = '\0';
10216         }
10217         if (id_ret)
10218                 *id_ret = top->tree_id;
10219         return 0;
10220 }
10221
10222 #define BTRFS_SUBVOLUME1 "subvol1"
10223 #define BTRFS_SUBVOLUME1_SNAPSHOT1 "subvol1_snapshot1"
10224 #define BTRFS_SUBVOLUME1_SNAPSHOT1_RO "subvol1_snapshot1_ro"
10225 #define BTRFS_SUBVOLUME1_RENAME "subvol1_rename"
10226 #define BTRFS_SUBVOLUME2 "subvol2"
10227
10228 static int btrfs_subvolumes_fsids_mapped(void)
10229 {
10230         int fret = -1;
10231         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10232         struct mount_attr attr = {
10233                 .attr_set = MOUNT_ATTR_IDMAP,
10234         };
10235         pid_t pid;
10236
10237         if (!caps_supported())
10238                 return 0;
10239
10240         /* Changing mount properties on a detached mount. */
10241         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10242         if (attr.userns_fd < 0) {
10243                 log_stderr("failure: get_userns_fd");
10244                 goto out;
10245         }
10246
10247         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10248                                      AT_EMPTY_PATH |
10249                                      AT_NO_AUTOMOUNT |
10250                                      AT_SYMLINK_NOFOLLOW |
10251                                      OPEN_TREE_CLOEXEC |
10252                                      OPEN_TREE_CLONE);
10253         if (open_tree_fd < 0) {
10254                 log_stderr("failure: sys_open_tree");
10255                 goto out;
10256         }
10257
10258         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10259                 log_stderr("failure: sys_mount_setattr");
10260                 goto out;
10261         }
10262
10263         /*
10264          * The open_tree() syscall returns an O_PATH file descriptor which we
10265          * can't use with ioctl(). So let's reopen it as a proper file
10266          * descriptor.
10267          */
10268         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10269         if (tree_fd < 0) {
10270                 log_stderr("failure: openat");
10271                 goto out;
10272         }
10273
10274         pid = fork();
10275         if (pid < 0) {
10276                 log_stderr("failure: fork");
10277                 goto out;
10278         }
10279         if (pid == 0) {
10280                 if (!switch_fsids(10000, 10000))
10281                         die("failure: switch fsids");
10282
10283                 if (!caps_up())
10284                         die("failure: raise caps");
10285
10286                 /*
10287                  * The caller's fsids now have mappings in the idmapped mount so
10288                  * any file creation must succeed.
10289                  */
10290
10291                 /* create subvolume */
10292                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10293                         die("failure: btrfs_create_subvolume");
10294
10295                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10296                         die("failure: check ownership");
10297
10298                 /* remove subvolume */
10299                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10300                         die("failure: btrfs_delete_subvolume");
10301
10302                 /* create subvolume */
10303                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10304                         die("failure: btrfs_create_subvolume");
10305
10306                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10307                         die("failure: check ownership");
10308
10309                 if (!caps_down())
10310                         die("failure: lower caps");
10311
10312                 /*
10313                  * The filesystem is not mounted with user_subvol_rm_allowed so
10314                  * subvolume deletion must fail.
10315                  */
10316                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10317                         die("failure: btrfs_delete_subvolume");
10318                 if (errno != EPERM)
10319                         die("failure: errno");
10320
10321                 exit(EXIT_SUCCESS);
10322         }
10323         if (wait_for_pid(pid))
10324                 goto out;
10325
10326         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10327                 die("failure: check ownership");
10328
10329         /* remove subvolume */
10330         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10331                 log_stderr("failure: btrfs_delete_subvolume");
10332                 goto out;
10333         }
10334
10335         fret = 0;
10336         log_debug("Ran test");
10337 out:
10338         safe_close(attr.userns_fd);
10339         safe_close(open_tree_fd);
10340         safe_close(tree_fd);
10341
10342         return fret;
10343 }
10344
10345 static int btrfs_subvolumes_fsids_mapped_userns(void)
10346 {
10347         int fret = -1;
10348         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10349         struct mount_attr attr = {
10350                 .attr_set = MOUNT_ATTR_IDMAP,
10351         };
10352         pid_t pid;
10353
10354         if (!caps_supported())
10355                 return 0;
10356
10357         /* Changing mount properties on a detached mount. */
10358         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10359         if (attr.userns_fd < 0) {
10360                 log_stderr("failure: get_userns_fd");
10361                 goto out;
10362         }
10363
10364         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10365                                      AT_EMPTY_PATH |
10366                                      AT_NO_AUTOMOUNT |
10367                                      AT_SYMLINK_NOFOLLOW |
10368                                      OPEN_TREE_CLOEXEC |
10369                                      OPEN_TREE_CLONE);
10370         if (open_tree_fd < 0) {
10371                 log_stderr("failure: sys_open_tree");
10372                 goto out;
10373         }
10374
10375         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10376                 log_stderr("failure: sys_mount_setattr");
10377                 goto out;
10378         }
10379
10380         /*
10381          * The open_tree() syscall returns an O_PATH file descriptor which we
10382          * can't use with ioctl(). So let's reopen it as a proper file
10383          * descriptor.
10384          */
10385         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10386         if (tree_fd < 0) {
10387                 log_stderr("failure: openat");
10388                 goto out;
10389         }
10390
10391         pid = fork();
10392         if (pid < 0) {
10393                 log_stderr("failure: fork");
10394                 goto out;
10395         }
10396         if (pid == 0) {
10397                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10398                         die("failure: switch_userns");
10399
10400                 /* The caller's fsids now have mappings in the idmapped mount so
10401                  * any file creation must fail.
10402                  */
10403
10404                 /* create subvolume */
10405                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10406                         die("failure: btrfs_create_subvolume");
10407
10408                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10409                         die("failure: check ownership");
10410
10411                 /* remove subvolume */
10412                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10413                         die("failure: btrfs_delete_subvolume");
10414
10415                 exit(EXIT_SUCCESS);
10416         }
10417         if (wait_for_pid(pid))
10418                 goto out;
10419
10420         /* remove subvolume */
10421         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10422                 log_stderr("failure: btrfs_delete_subvolume");
10423                 goto out;
10424         }
10425
10426         fret = 0;
10427         log_debug("Ran test");
10428 out:
10429         safe_close(attr.userns_fd);
10430         safe_close(open_tree_fd);
10431         safe_close(tree_fd);
10432
10433         return fret;
10434 }
10435
10436 static int btrfs_subvolumes_fsids_unmapped(void)
10437 {
10438         int fret = -1;
10439         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10440         struct mount_attr attr = {
10441                 .attr_set = MOUNT_ATTR_IDMAP,
10442         };
10443
10444         /* create directory for rename test */
10445         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10446                 log_stderr("failure: btrfs_create_subvolume");
10447                 goto out;
10448         }
10449
10450         /* change ownership of all files to uid 0 */
10451         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10452                 log_stderr("failure: fchownat");
10453                 goto out;
10454         }
10455
10456         /* Changing mount properties on a detached mount. */
10457         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10458         if (attr.userns_fd < 0) {
10459                 log_stderr("failure: get_userns_fd");
10460                 goto out;
10461         }
10462
10463         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10464                                      AT_EMPTY_PATH |
10465                                      AT_NO_AUTOMOUNT |
10466                                      AT_SYMLINK_NOFOLLOW |
10467                                      OPEN_TREE_CLOEXEC |
10468                                      OPEN_TREE_CLONE);
10469         if (open_tree_fd < 0) {
10470                 log_stderr("failure: sys_open_tree");
10471                 goto out;
10472         }
10473
10474         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10475                 log_stderr("failure: sys_mount_setattr");
10476                 goto out;
10477         }
10478
10479         if (!switch_fsids(0, 0)) {
10480                 log_stderr("failure: switch_fsids");
10481                 goto out;
10482         }
10483
10484         /*
10485          * The caller's fsids don't have a mappings in the idmapped mount so
10486          * any file creation must fail.
10487          */
10488
10489         /*
10490          * The open_tree() syscall returns an O_PATH file descriptor which we
10491          * can't use with ioctl(). So let's reopen it as a proper file
10492          * descriptor.
10493          */
10494         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10495         if (tree_fd < 0) {
10496                 log_stderr("failure: openat");
10497                 goto out;
10498         }
10499
10500         /* create subvolume */
10501         if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2)) {
10502                 log_stderr("failure: btrfs_create_subvolume");
10503                 goto out;
10504         }
10505         if (errno != EOVERFLOW) {
10506                 log_stderr("failure: errno");
10507                 goto out;
10508         }
10509
10510         /* try to rename a subvolume */
10511         if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10512                        BTRFS_SUBVOLUME1_RENAME)) {
10513                 log_stderr("failure: renameat");
10514                 goto out;
10515         }
10516         if (errno != EOVERFLOW) {
10517                 log_stderr("failure: errno");
10518                 goto out;
10519         }
10520
10521         /* The caller is privileged over the inode so file deletion must work. */
10522
10523         /* remove subvolume */
10524         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10525                 log_stderr("failure: btrfs_delete_subvolume");
10526                 goto out;
10527         }
10528
10529         fret = 0;
10530         log_debug("Ran test");
10531 out:
10532         safe_close(attr.userns_fd);
10533         safe_close(open_tree_fd);
10534         safe_close(tree_fd);
10535
10536         return fret;
10537 }
10538
10539 static int btrfs_subvolumes_fsids_unmapped_userns(void)
10540 {
10541         int fret = -1;
10542         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
10543         struct mount_attr attr = {
10544                 .attr_set = MOUNT_ATTR_IDMAP,
10545         };
10546         pid_t pid;
10547
10548         /* create directory for rename test */
10549         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10550                 log_stderr("failure: btrfs_create_subvolume");
10551                 goto out;
10552         }
10553
10554         /* change ownership of all files to uid 0 */
10555         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10556                 log_stderr("failure: fchownat");
10557                 goto out;
10558         }
10559
10560         /* Changing mount properties on a detached mount. */
10561         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10562         if (attr.userns_fd < 0) {
10563                 log_stderr("failure: get_userns_fd");
10564                 goto out;
10565         }
10566
10567         /* Changing mount properties on a detached mount. */
10568         userns_fd = get_userns_fd(0, 30000, 10000);
10569         if (userns_fd < 0) {
10570                 log_stderr("failure: get_userns_fd");
10571                 goto out;
10572         }
10573
10574         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10575                                      AT_EMPTY_PATH |
10576                                      AT_NO_AUTOMOUNT |
10577                                      AT_SYMLINK_NOFOLLOW |
10578                                      OPEN_TREE_CLOEXEC |
10579                                      OPEN_TREE_CLONE);
10580         if (open_tree_fd < 0) {
10581                 log_stderr("failure: sys_open_tree");
10582                 goto out;
10583         }
10584
10585         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10586                 log_stderr("failure: sys_mount_setattr");
10587                 goto out;
10588         }
10589
10590         /*
10591          * The open_tree() syscall returns an O_PATH file descriptor which we
10592          * can't use with ioctl(). So let's reopen it as a proper file
10593          * descriptor.
10594          */
10595         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10596         if (tree_fd < 0) {
10597                 log_stderr("failure: openat");
10598                 goto out;
10599         }
10600
10601         pid = fork();
10602         if (pid < 0) {
10603                 log_stderr("failure: fork");
10604                 goto out;
10605         }
10606         if (pid == 0) {
10607                 if (!switch_userns(userns_fd, 0, 0, false))
10608                         die("failure: switch_userns");
10609
10610                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
10611                                       t_overflowuid, t_overflowgid))
10612                         die("failure: expected_uid_gid");
10613
10614                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
10615                                       t_overflowuid, t_overflowgid))
10616                         die("failure: expected_uid_gid");
10617
10618                 /*
10619                  * The caller's fsids don't have a mappings in the idmapped mount so
10620                  * any file creation must fail.
10621                  */
10622
10623                 /* create subvolume */
10624                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
10625                         die("failure: btrfs_create_subvolume");
10626                 if (errno != EOVERFLOW)
10627                         die("failure: errno");
10628
10629                 /* try to rename a subvolume */
10630                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
10631                                         BTRFS_SUBVOLUME1_RENAME))
10632                         die("failure: renameat");
10633                 if (errno != EOVERFLOW)
10634                         die("failure: errno");
10635
10636                 /*
10637                  * The caller is not privileged over the inode so subvolume
10638                  * deletion must fail.
10639                  */
10640
10641                 /* remove subvolume */
10642                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10643                         die("failure: btrfs_delete_subvolume");
10644
10645                 exit(EXIT_SUCCESS);
10646         }
10647         if (wait_for_pid(pid))
10648                 goto out;
10649
10650         /* remove subvolume */
10651         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10652                 log_stderr("failure: btrfs_delete_subvolume");
10653                 goto out;
10654         }
10655
10656         fret = 0;
10657         log_debug("Ran test");
10658 out:
10659         safe_close(attr.userns_fd);
10660         safe_close(open_tree_fd);
10661         safe_close(tree_fd);
10662         safe_close(userns_fd);
10663
10664         return fret;
10665 }
10666
10667 static int btrfs_snapshots_fsids_mapped(void)
10668 {
10669         int fret = -1;
10670         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10671         struct mount_attr attr = {
10672                 .attr_set = MOUNT_ATTR_IDMAP,
10673         };
10674         pid_t pid;
10675
10676         if (!caps_supported())
10677                 return 0;
10678
10679         /* Changing mount properties on a detached mount. */
10680         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10681         if (attr.userns_fd < 0) {
10682                 log_stderr("failure: get_userns_fd");
10683                 goto out;
10684         }
10685
10686         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10687                                      AT_EMPTY_PATH |
10688                                      AT_NO_AUTOMOUNT |
10689                                      AT_SYMLINK_NOFOLLOW |
10690                                      OPEN_TREE_CLOEXEC |
10691                                      OPEN_TREE_CLONE);
10692         if (open_tree_fd < 0) {
10693                 log_stderr("failure: sys_open_tree");
10694                 goto out;
10695         }
10696
10697         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10698                 log_stderr("failure: sys_mount_setattr");
10699                 goto out;
10700         }
10701
10702         /*
10703          * The open_tree() syscall returns an O_PATH file descriptor which we
10704          * can't use with ioctl(). So let's reopen it as a proper file
10705          * descriptor.
10706          */
10707         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10708         if (tree_fd < 0) {
10709                 log_stderr("failure: openat");
10710                 goto out;
10711         }
10712
10713         pid = fork();
10714         if (pid < 0) {
10715                 log_stderr("failure: fork");
10716                 goto out;
10717         }
10718         if (pid == 0) {
10719                 int subvolume_fd = -EBADF;
10720
10721                 if (!switch_fsids(10000, 10000))
10722                         die("failure: switch fsids");
10723
10724                 if (!caps_up())
10725                         die("failure: raise caps");
10726
10727                 /* The caller's fsids now have mappings in the idmapped mount so
10728                  * any file creation must fail.
10729                  */
10730
10731                 /* create subvolume */
10732                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10733                         die("failure: btrfs_create_subvolume");
10734
10735                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10736                         die("failure: expected_uid_gid");
10737
10738                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10739                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10740                 if (subvolume_fd < 0)
10741                         die("failure: openat");
10742
10743                 /* create read-write snapshot */
10744                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10745                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10746                         die("failure: btrfs_create_snapshot");
10747
10748                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10749                         die("failure: expected_uid_gid");
10750
10751                 /* create read-only snapshot */
10752                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10753                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10754                                           BTRFS_SUBVOL_RDONLY))
10755                         die("failure: btrfs_create_snapshot");
10756
10757                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10758                         die("failure: expected_uid_gid");
10759
10760                 safe_close(subvolume_fd);
10761
10762                 /* remove subvolume */
10763                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10764                         die("failure: btrfs_delete_subvolume");
10765
10766                 /* remove read-write snapshot */
10767                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
10768                         die("failure: btrfs_delete_subvolume");
10769
10770                 /* remove read-only snapshot */
10771                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
10772                         die("failure: btrfs_delete_subvolume");
10773
10774                 /* create directory */
10775                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10776                         die("failure: btrfs_create_subvolume");
10777
10778                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10779                         die("failure: expected_uid_gid");
10780
10781                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10782                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10783                 if (subvolume_fd < 0)
10784                         die("failure: openat");
10785
10786                 /* create read-write snapshot */
10787                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10788                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10789                         die("failure: btrfs_create_snapshot");
10790
10791                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10792                         die("failure: expected_uid_gid");
10793
10794                 /* create read-only snapshot */
10795                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10796                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10797                                           BTRFS_SUBVOL_RDONLY))
10798                         die("failure: btrfs_create_snapshot");
10799
10800                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10801                         die("failure: expected_uid_gid");
10802
10803                 safe_close(subvolume_fd);
10804
10805                 exit(EXIT_SUCCESS);
10806         }
10807         if (wait_for_pid(pid))
10808                 goto out;
10809
10810         /* remove directory */
10811         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10812                 log_stderr("failure: btrfs_delete_subvolume");
10813                 goto out;
10814         }
10815
10816         /* remove read-write snapshot */
10817         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10818                 log_stderr("failure: btrfs_delete_subvolume");
10819                 goto out;
10820         }
10821
10822         /* remove read-only snapshot */
10823         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10824                 log_stderr("failure: btrfs_delete_subvolume");
10825                 goto out;
10826         }
10827
10828         fret = 0;
10829         log_debug("Ran test");
10830 out:
10831         safe_close(attr.userns_fd);
10832         safe_close(open_tree_fd);
10833         safe_close(tree_fd);
10834
10835         return fret;
10836 }
10837
10838 static int btrfs_snapshots_fsids_mapped_userns(void)
10839 {
10840         int fret = -1;
10841         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10842         struct mount_attr attr = {
10843                 .attr_set = MOUNT_ATTR_IDMAP,
10844         };
10845         pid_t pid;
10846
10847         if (!caps_supported())
10848                 return 0;
10849
10850         /* Changing mount properties on a detached mount. */
10851         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10852         if (attr.userns_fd < 0) {
10853                 log_stderr("failure: get_userns_fd");
10854                 goto out;
10855         }
10856
10857         open_tree_fd = sys_open_tree(t_dir1_fd, "",
10858                                      AT_EMPTY_PATH |
10859                                      AT_NO_AUTOMOUNT |
10860                                      AT_SYMLINK_NOFOLLOW |
10861                                      OPEN_TREE_CLOEXEC |
10862                                      OPEN_TREE_CLONE);
10863         if (open_tree_fd < 0) {
10864                 log_stderr("failure: sys_open_tree");
10865                 goto out;
10866         }
10867
10868         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
10869                 log_stderr("failure: sys_mount_setattr");
10870                 goto out;
10871         }
10872
10873         /*
10874          * The open_tree() syscall returns an O_PATH file descriptor which we
10875          * can't use with ioctl(). So let's reopen it as a proper file
10876          * descriptor.
10877          */
10878         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10879         if (tree_fd < 0) {
10880                 log_stderr("failure: openat");
10881                 goto out;
10882         }
10883
10884         pid = fork();
10885         if (pid < 0) {
10886                 log_stderr("failure: fork");
10887                 goto out;
10888         }
10889         if (pid == 0) {
10890                 int subvolume_fd = -EBADF;
10891
10892                 if (!switch_userns(attr.userns_fd, 0, 0, false))
10893                         die("failure: switch_userns");
10894
10895                 /* create subvolume */
10896                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
10897                         die("failure: btrfs_create_subvolume");
10898
10899                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
10900                         die("failure: expected_uid_gid");
10901
10902                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
10903                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
10904                 if (subvolume_fd < 0)
10905                         die("failure: openat");
10906
10907                 /* create read-write snapshot */
10908                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10909                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
10910                         die("failure: btrfs_create_snapshot");
10911
10912                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
10913                         die("failure: expected_uid_gid");
10914
10915                 /* create read-only snapshot */
10916                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
10917                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
10918                                           BTRFS_SUBVOL_RDONLY))
10919                         die("failure: btrfs_create_snapshot");
10920
10921                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
10922                         die("failure: expected_uid_gid");
10923
10924                 safe_close(subvolume_fd);
10925
10926                 exit(EXIT_SUCCESS);
10927         }
10928         if (wait_for_pid(pid))
10929                 goto out;
10930
10931         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
10932                 die("failure: expected_uid_gid");
10933
10934         /* remove directory */
10935         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
10936                 log_stderr("failure: btrfs_delete_subvolume");
10937                 goto out;
10938         }
10939
10940         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
10941                 die("failure: expected_uid_gid");
10942
10943         /* remove read-write snapshot */
10944         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
10945                 log_stderr("failure: btrfs_delete_subvolume");
10946                 goto out;
10947         }
10948
10949         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
10950                 die("failure: expected_uid_gid");
10951
10952         /* remove read-only snapshot */
10953         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO)) {
10954                 log_stderr("failure: btrfs_delete_subvolume");
10955                 goto out;
10956         }
10957
10958         fret = 0;
10959         log_debug("Ran test");
10960 out:
10961         safe_close(attr.userns_fd);
10962         safe_close(open_tree_fd);
10963         safe_close(tree_fd);
10964
10965         return fret;
10966 }
10967
10968 static int btrfs_snapshots_fsids_unmapped(void)
10969 {
10970         int fret = -1;
10971         int open_tree_fd = -EBADF, tree_fd = -EBADF;
10972         struct mount_attr attr = {
10973                 .attr_set = MOUNT_ATTR_IDMAP,
10974         };
10975         pid_t pid;
10976
10977         if (!caps_supported())
10978                 return 0;
10979
10980         /* create directory for rename test */
10981         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
10982                 log_stderr("failure: btrfs_create_subvolume");
10983                 goto out;
10984         }
10985
10986         /* change ownership of all files to uid 0 */
10987         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
10988                 log_stderr("failure: fchownat");
10989                 goto out;
10990         }
10991
10992         /* Changing mount properties on a detached mount. */
10993         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
10994         if (attr.userns_fd < 0) {
10995                 log_stderr("failure: get_userns_fd");
10996                 goto out;
10997         }
10998
10999         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11000                                      AT_EMPTY_PATH |
11001                                      AT_NO_AUTOMOUNT |
11002                                      AT_SYMLINK_NOFOLLOW |
11003                                      OPEN_TREE_CLOEXEC |
11004                                      OPEN_TREE_CLONE);
11005         if (open_tree_fd < 0) {
11006                 log_stderr("failure: sys_open_tree");
11007                 goto out;
11008         }
11009
11010         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
11011                               sizeof(attr))) {
11012                 log_stderr("failure: sys_mount_setattr");
11013                 goto out;
11014         }
11015
11016         pid = fork();
11017         if (pid < 0) {
11018                 log_stderr("failure: fork");
11019                 goto out;
11020         }
11021         if (pid == 0) {
11022                 int subvolume_fd = -EBADF;
11023
11024                 if (!switch_fsids(0, 0)) {
11025                         log_stderr("failure: switch_fsids");
11026                         goto out;
11027                 }
11028
11029                 /*
11030                  * The caller's fsids don't have a mappings in the idmapped
11031                  * mount so any file creation must fail.
11032                  */
11033
11034                 /*
11035                  * The open_tree() syscall returns an O_PATH file descriptor
11036                  * which we can't use with ioctl(). So let's reopen it as a
11037                  * proper file descriptor.
11038                  */
11039                 tree_fd = openat(open_tree_fd, ".",
11040                                  O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11041                 if (tree_fd < 0)
11042                         die("failure: openat");
11043
11044                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11045                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11046                 if (subvolume_fd < 0)
11047                         die("failure: openat");
11048
11049                 /* create directory */
11050                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
11051                         die("failure: btrfs_create_subvolume");
11052                 if (errno != EOVERFLOW)
11053                         die("failure: errno");
11054
11055                 /* create read-write snapshot */
11056                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11057                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11058                         die("failure: btrfs_create_snapshot");
11059                 if (errno != EOVERFLOW)
11060                         die("failure: errno");
11061
11062                 /* create read-only snapshot */
11063                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11064                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11065                                            BTRFS_SUBVOL_RDONLY))
11066                         die("failure: btrfs_create_snapshot");
11067                 if (errno != EOVERFLOW)
11068                         die("failure: errno");
11069
11070                 /* try to rename a directory */
11071                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11072                                BTRFS_SUBVOLUME1_RENAME))
11073                         die("failure: renameat");
11074                 if (errno != EOVERFLOW)
11075                         die("failure: errno");
11076
11077                 if (!caps_down())
11078                         die("failure: caps_down");
11079
11080                 /* create read-write snapshot */
11081                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11082                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11083                         die("failure: btrfs_create_snapshot");
11084                 if (errno != EPERM)
11085                         die("failure: errno");
11086
11087                 /* create read-only snapshot */
11088                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11089                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11090                                            BTRFS_SUBVOL_RDONLY))
11091                         die("failure: btrfs_create_snapshot");
11092                 if (errno != EPERM)
11093                         die("failure: errno");
11094
11095                 /*
11096                  * The caller is not privileged over the inode so subvolume
11097                  * deletion must fail.
11098                  */
11099
11100                 /* remove directory */
11101                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11102                         die("failure: btrfs_delete_subvolume");
11103                 if (errno != EPERM)
11104                         die("failure: errno");
11105
11106                 if (!caps_up())
11107                         die("failure: caps_down");
11108
11109                 /*
11110                  * The caller is privileged over the inode so subvolume
11111                  * deletion must work.
11112                  */
11113
11114                 /* remove directory */
11115                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11116                         die("failure: btrfs_delete_subvolume");
11117
11118                 exit(EXIT_SUCCESS);
11119         }
11120         if (wait_for_pid(pid))
11121                 goto out;
11122
11123         fret = 0;
11124         log_debug("Ran test");
11125 out:
11126         safe_close(attr.userns_fd);
11127         safe_close(open_tree_fd);
11128         safe_close(tree_fd);
11129
11130         return fret;
11131 }
11132
11133 static int btrfs_snapshots_fsids_unmapped_userns(void)
11134 {
11135         int fret = -1;
11136         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
11137             userns_fd = -EBADF;
11138         struct mount_attr attr = {
11139                 .attr_set = MOUNT_ATTR_IDMAP,
11140         };
11141         pid_t pid;
11142
11143         if (!caps_supported())
11144                 return 0;
11145
11146         /* create directory for rename test */
11147         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
11148                 log_stderr("failure: btrfs_create_subvolume");
11149                 goto out;
11150         }
11151
11152         /* change ownership of all files to uid 0 */
11153         if (fchownat(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
11154                 log_stderr("failure: fchownat");
11155                 goto out;
11156         }
11157
11158         /* Changing mount properties on a detached mount. */
11159         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11160         if (attr.userns_fd < 0) {
11161                 log_stderr("failure: get_userns_fd");
11162                 goto out;
11163         }
11164
11165         /* Changing mount properties on a detached mount. */
11166         userns_fd = get_userns_fd(0, 30000, 10000);
11167         if (userns_fd < 0) {
11168                 log_stderr("failure: get_userns_fd");
11169                 goto out;
11170         }
11171
11172         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11173                                      AT_EMPTY_PATH |
11174                                      AT_NO_AUTOMOUNT |
11175                                      AT_SYMLINK_NOFOLLOW |
11176                                      OPEN_TREE_CLOEXEC |
11177                                      OPEN_TREE_CLONE);
11178         if (open_tree_fd < 0) {
11179                 log_stderr("failure: sys_open_tree");
11180                 goto out;
11181         }
11182
11183         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
11184                               sizeof(attr))) {
11185                 log_stderr("failure: sys_mount_setattr");
11186                 goto out;
11187         }
11188
11189         /*
11190          * The open_tree() syscall returns an O_PATH file descriptor
11191          * which we can't use with ioctl(). So let's reopen it as a
11192          * proper file descriptor.
11193          */
11194         tree_fd = openat(open_tree_fd, ".",
11195                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11196         if (tree_fd < 0) {
11197                 log_stderr("failure: openat");
11198                 goto out;
11199         }
11200
11201         subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11202                         O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11203         if (subvolume_fd < 0) {
11204                 log_stderr("failure: openat");
11205                 goto out;
11206         }
11207
11208         pid = fork();
11209         if (pid < 0) {
11210                 log_stderr("failure: fork");
11211                 goto out;
11212         }
11213         if (pid == 0) {
11214                 if (!switch_userns(userns_fd, 0, 0, false))
11215                         die("failure: switch_userns");
11216
11217                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
11218                                       t_overflowuid, t_overflowgid))
11219                         die("failure: expected_uid_gid");
11220
11221                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
11222                                       t_overflowuid, t_overflowgid))
11223                         die("failure: expected_uid_gid");
11224
11225                 /*
11226                  * The caller's fsids don't have a mappings in the idmapped
11227                  * mount so any file creation must fail.
11228                  */
11229
11230                 /* create directory */
11231                 if (!btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME2))
11232                         die("failure: btrfs_create_subvolume");
11233                 if (errno != EOVERFLOW)
11234                         die("failure: errno");
11235
11236                 /* create read-write snapshot */
11237                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11238                                            BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11239                         die("failure: btrfs_create_snapshot");
11240                 if (errno != EPERM)
11241                         die("failure: errno");
11242
11243                 /* create read-only snapshot */
11244                 if (!btrfs_create_snapshot(subvolume_fd, tree_fd,
11245                                            BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11246                                            BTRFS_SUBVOL_RDONLY))
11247                         die("failure: btrfs_create_snapshot");
11248                 if (errno != EPERM)
11249                         die("failure: errno");
11250
11251                 /* try to rename a directory */
11252                 if (!renameat(open_tree_fd, BTRFS_SUBVOLUME1, open_tree_fd,
11253                                BTRFS_SUBVOLUME1_RENAME))
11254                         die("failure: renameat");
11255                 if (errno != EOVERFLOW)
11256                         die("failure: errno");
11257
11258                 /*
11259                  * The caller is not privileged over the inode so subvolume
11260                  * deletion must fail.
11261                  */
11262
11263                 /* remove directory */
11264                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11265                         die("failure: btrfs_delete_subvolume");
11266                 if (errno != EPERM)
11267                         die("failure: errno");
11268
11269                 exit(EXIT_SUCCESS);
11270         }
11271         if (wait_for_pid(pid))
11272                 goto out;
11273
11274         /* remove directory */
11275         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11276                 die("failure: btrfs_delete_subvolume");
11277
11278         fret = 0;
11279         log_debug("Ran test");
11280 out:
11281         safe_close(attr.userns_fd);
11282         safe_close(open_tree_fd);
11283         safe_close(subvolume_fd);
11284         safe_close(tree_fd);
11285
11286         return fret;
11287 }
11288
11289 static int btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed(void)
11290 {
11291         int fret = -1;
11292         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11293         struct mount_attr attr = {
11294                 .attr_set = MOUNT_ATTR_IDMAP,
11295         };
11296         pid_t pid;
11297
11298         if (!caps_supported())
11299                 return 0;
11300
11301         /* Changing mount properties on a detached mount. */
11302         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11303         if (attr.userns_fd < 0) {
11304                 log_stderr("failure: get_userns_fd");
11305                 goto out;
11306         }
11307
11308         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11309                                      AT_EMPTY_PATH |
11310                                      AT_NO_AUTOMOUNT |
11311                                      AT_SYMLINK_NOFOLLOW |
11312                                      OPEN_TREE_CLOEXEC |
11313                                      OPEN_TREE_CLONE);
11314         if (open_tree_fd < 0) {
11315                 log_stderr("failure: sys_open_tree");
11316                 goto out;
11317         }
11318
11319         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11320                 log_stderr("failure: sys_mount_setattr");
11321                 goto out;
11322         }
11323
11324         /*
11325          * The open_tree() syscall returns an O_PATH file descriptor which we
11326          * can't use with ioctl(). So let's reopen it as a proper file
11327          * descriptor.
11328          */
11329         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11330         if (tree_fd < 0) {
11331                 log_stderr("failure: openat");
11332                 goto out;
11333         }
11334
11335         pid = fork();
11336         if (pid < 0) {
11337                 log_stderr("failure: fork");
11338                 goto out;
11339         }
11340         if (pid == 0) {
11341                 if (!switch_fsids(10000, 10000))
11342                         die("failure: switch fsids");
11343
11344                 if (!caps_down())
11345                         die("failure: raise caps");
11346
11347                 /*
11348                  * The caller's fsids now have mappings in the idmapped mount so
11349                  * any file creation must succedd.
11350                  */
11351
11352                 /* create subvolume */
11353                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11354                         die("failure: btrfs_create_subvolume");
11355
11356                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11357                         die("failure: check ownership");
11358
11359                 /*
11360                  * The scratch device is mounted with user_subvol_rm_allowed so
11361                  * subvolume deletion must succeed.
11362                  */
11363                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11364                         die("failure: btrfs_delete_subvolume");
11365
11366                 exit(EXIT_SUCCESS);
11367         }
11368         if (wait_for_pid(pid))
11369                 goto out;
11370
11371         fret = 0;
11372         log_debug("Ran test");
11373 out:
11374         safe_close(attr.userns_fd);
11375         safe_close(open_tree_fd);
11376         safe_close(tree_fd);
11377
11378         return fret;
11379 }
11380
11381 static int btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed(void)
11382 {
11383         int fret = -1;
11384         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11385         struct mount_attr attr = {
11386                 .attr_set = MOUNT_ATTR_IDMAP,
11387         };
11388         pid_t pid;
11389
11390         if (!caps_supported())
11391                 return 0;
11392
11393         /* Changing mount properties on a detached mount. */
11394         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11395         if (attr.userns_fd < 0) {
11396                 log_stderr("failure: get_userns_fd");
11397                 goto out;
11398         }
11399
11400         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11401                                      AT_EMPTY_PATH |
11402                                      AT_NO_AUTOMOUNT |
11403                                      AT_SYMLINK_NOFOLLOW |
11404                                      OPEN_TREE_CLOEXEC |
11405                                      OPEN_TREE_CLONE);
11406         if (open_tree_fd < 0) {
11407                 log_stderr("failure: sys_open_tree");
11408                 goto out;
11409         }
11410
11411         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11412                 log_stderr("failure: sys_mount_setattr");
11413                 goto out;
11414         }
11415
11416         /*
11417          * The open_tree() syscall returns an O_PATH file descriptor which we
11418          * can't use with ioctl(). So let's reopen it as a proper file
11419          * descriptor.
11420          */
11421         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11422         if (tree_fd < 0) {
11423                 log_stderr("failure: openat");
11424                 goto out;
11425         }
11426
11427         pid = fork();
11428         if (pid < 0) {
11429                 log_stderr("failure: fork");
11430                 goto out;
11431         }
11432         if (pid == 0) {
11433                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11434                         die("failure: switch_userns");
11435
11436                 /* The caller's fsids now have mappings in the idmapped mount so
11437                  * any file creation must fail.
11438                  */
11439
11440                 /* create subvolume */
11441                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11442                         die("failure: btrfs_create_subvolume");
11443
11444                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11445                         die("failure: check ownership");
11446
11447                 /*
11448                  * The scratch device is mounted with user_subvol_rm_allowed so
11449                  * subvolume deletion must succeed.
11450                  */
11451                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11452                         die("failure: btrfs_delete_subvolume");
11453
11454                 exit(EXIT_SUCCESS);
11455         }
11456         if (wait_for_pid(pid))
11457                 goto out;
11458
11459         fret = 0;
11460         log_debug("Ran test");
11461 out:
11462         safe_close(attr.userns_fd);
11463         safe_close(open_tree_fd);
11464         safe_close(tree_fd);
11465
11466         return fret;
11467 }
11468
11469 static int btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed(void)
11470 {
11471         int fret = -1;
11472         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11473         struct mount_attr attr = {
11474                 .attr_set = MOUNT_ATTR_IDMAP,
11475         };
11476         pid_t pid;
11477
11478         if (!caps_supported())
11479                 return 0;
11480
11481         /* Changing mount properties on a detached mount. */
11482         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11483         if (attr.userns_fd < 0) {
11484                 log_stderr("failure: get_userns_fd");
11485                 goto out;
11486         }
11487
11488         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11489                                      AT_EMPTY_PATH |
11490                                      AT_NO_AUTOMOUNT |
11491                                      AT_SYMLINK_NOFOLLOW |
11492                                      OPEN_TREE_CLOEXEC |
11493                                      OPEN_TREE_CLONE);
11494         if (open_tree_fd < 0) {
11495                 log_stderr("failure: sys_open_tree");
11496                 goto out;
11497         }
11498
11499         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11500                 log_stderr("failure: sys_mount_setattr");
11501                 goto out;
11502         }
11503
11504         /*
11505          * The open_tree() syscall returns an O_PATH file descriptor which we
11506          * can't use with ioctl(). So let's reopen it as a proper file
11507          * descriptor.
11508          */
11509         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11510         if (tree_fd < 0) {
11511                 log_stderr("failure: openat");
11512                 goto out;
11513         }
11514
11515         pid = fork();
11516         if (pid < 0) {
11517                 log_stderr("failure: fork");
11518                 goto out;
11519         }
11520         if (pid == 0) {
11521                 int subvolume_fd = -EBADF;
11522
11523                 if (!switch_fsids(10000, 10000))
11524                         die("failure: switch fsids");
11525
11526                 if (!caps_down())
11527                         die("failure: raise caps");
11528
11529                 /*
11530                  * The caller's fsids now have mappings in the idmapped mount so
11531                  * any file creation must succeed.
11532                  */
11533
11534                 /* create subvolume */
11535                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11536                         die("failure: btrfs_create_subvolume");
11537
11538                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11539                         die("failure: expected_uid_gid");
11540
11541                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11542                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11543                 if (subvolume_fd < 0)
11544                         die("failure: openat");
11545
11546                 /* create read-write snapshot */
11547                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11548                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11549                         die("failure: btrfs_create_snapshot");
11550
11551                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
11552                         die("failure: expected_uid_gid");
11553
11554                 /* create read-only snapshot */
11555                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11556                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11557                                           BTRFS_SUBVOL_RDONLY))
11558                         die("failure: btrfs_create_snapshot");
11559
11560                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 10000, 10000))
11561                         die("failure: expected_uid_gid");
11562
11563                 safe_close(subvolume_fd);
11564
11565                 /* remove subvolume */
11566                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11567                         die("failure: btrfs_delete_subvolume");
11568
11569                 /* remove read-write snapshot */
11570                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11571                         die("failure: btrfs_delete_subvolume");
11572
11573                 /* remove read-only snapshot */
11574                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11575                         die("failure: btrfs_delete_subvolume");
11576
11577                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11578                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11579                 if (subvolume_fd < 0)
11580                         die("failure: openat");
11581
11582                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11583                         die("failure: btrfs_set_subvolume_ro");
11584
11585                 safe_close(subvolume_fd);
11586
11587                 /* remove read-only snapshot */
11588                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11589                         die("failure: btrfs_delete_subvolume");
11590
11591                 exit(EXIT_SUCCESS);
11592         }
11593         if (wait_for_pid(pid))
11594                 goto out;
11595
11596         fret = 0;
11597         log_debug("Ran test");
11598 out:
11599         safe_close(attr.userns_fd);
11600         safe_close(open_tree_fd);
11601         safe_close(tree_fd);
11602
11603         return fret;
11604 }
11605
11606 static int btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed(void)
11607 {
11608         int fret = -1;
11609         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11610         struct mount_attr attr = {
11611                 .attr_set = MOUNT_ATTR_IDMAP,
11612         };
11613         pid_t pid;
11614
11615         if (!caps_supported())
11616                 return 0;
11617
11618         /* Changing mount properties on a detached mount. */
11619         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11620         if (attr.userns_fd < 0) {
11621                 log_stderr("failure: get_userns_fd");
11622                 goto out;
11623         }
11624
11625         open_tree_fd = sys_open_tree(t_mnt_scratch_fd, "",
11626                                      AT_EMPTY_PATH |
11627                                      AT_NO_AUTOMOUNT |
11628                                      AT_SYMLINK_NOFOLLOW |
11629                                      OPEN_TREE_CLOEXEC |
11630                                      OPEN_TREE_CLONE);
11631         if (open_tree_fd < 0) {
11632                 log_stderr("failure: sys_open_tree");
11633                 goto out;
11634         }
11635
11636         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11637                 log_stderr("failure: sys_mount_setattr");
11638                 goto out;
11639         }
11640
11641         /*
11642          * The open_tree() syscall returns an O_PATH file descriptor which we
11643          * can't use with ioctl(). So let's reopen it as a proper file
11644          * descriptor.
11645          */
11646         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11647         if (tree_fd < 0) {
11648                 log_stderr("failure: openat");
11649                 goto out;
11650         }
11651
11652         pid = fork();
11653         if (pid < 0) {
11654                 log_stderr("failure: fork");
11655                 goto out;
11656         }
11657         if (pid == 0) {
11658                 int subvolume_fd = -EBADF;
11659
11660                 if (!switch_userns(attr.userns_fd, 0, 0, false))
11661                         die("failure: switch_userns");
11662
11663                 /* create subvolume */
11664                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11665                         die("failure: btrfs_create_subvolume");
11666
11667                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
11668                         die("failure: expected_uid_gid");
11669
11670                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11671                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11672                 if (subvolume_fd < 0)
11673                         die("failure: openat");
11674
11675                 /* create read-write snapshot */
11676                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11677                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
11678                         die("failure: btrfs_create_snapshot");
11679
11680                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11681                         die("failure: expected_uid_gid");
11682
11683                 /* create read-only snapshot */
11684                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
11685                                           BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11686                                           BTRFS_SUBVOL_RDONLY))
11687                         die("failure: btrfs_create_snapshot");
11688
11689                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11690                         die("failure: expected_uid_gid");
11691
11692                 /* remove directory */
11693                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11694                         die("failure: btrfs_delete_subvolume");
11695
11696                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
11697                         die("failure: expected_uid_gid");
11698
11699                 /* remove read-write snapshot */
11700                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1))
11701                         die("failure: btrfs_delete_subvolume");
11702
11703                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO, 0, 0, 0))
11704                         die("failure: expected_uid_gid");
11705
11706                 /* remove read-only snapshot */
11707                 if (!btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11708                         die("failure: btrfs_delete_subvolume");
11709
11710                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO,
11711                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11712                 if (subvolume_fd < 0)
11713                         die("failure: openat");
11714
11715                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11716                         die("failure: btrfs_set_subvolume_ro");
11717
11718                 safe_close(subvolume_fd);
11719
11720                 /* remove read-only snapshot */
11721                 if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1_RO))
11722                         die("failure: btrfs_delete_subvolume");
11723
11724                 exit(EXIT_SUCCESS);
11725         }
11726         if (wait_for_pid(pid))
11727                 goto out;
11728
11729         fret = 0;
11730         log_debug("Ran test");
11731 out:
11732         safe_close(attr.userns_fd);
11733         safe_close(open_tree_fd);
11734         safe_close(tree_fd);
11735
11736         return fret;
11737 }
11738
11739 static int btrfs_delete_by_spec_id(void)
11740 {
11741         int fret = -1;
11742         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
11743         uint64_t subvolume_id1 = -EINVAL, subvolume_id2 = -EINVAL;
11744         struct mount_attr attr = {
11745                 .attr_set = MOUNT_ATTR_IDMAP,
11746         };
11747         pid_t pid;
11748
11749         /* Changing mount properties on a detached mount. */
11750         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11751         if (attr.userns_fd < 0) {
11752                 log_stderr("failure: get_userns_fd");
11753                 goto out;
11754         }
11755
11756         /* create subvolume */
11757         if (btrfs_create_subvolume(t_mnt_scratch_fd, "A")) {
11758                 log_stderr("failure: btrfs_create_subvolume");
11759                 goto out;
11760         }
11761
11762         /* create subvolume */
11763         if (btrfs_create_subvolume(t_mnt_scratch_fd, "B")) {
11764                 log_stderr("failure: btrfs_create_subvolume");
11765                 goto out;
11766         }
11767
11768         subvolume_fd = openat(t_mnt_scratch_fd, "B", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11769         if (subvolume_fd < 0) {
11770                 log_stderr("failure: openat");
11771                 goto out;
11772         }
11773
11774         /* create subvolume */
11775         if (btrfs_create_subvolume(subvolume_fd, "C")) {
11776                 log_stderr("failure: btrfs_create_subvolume");
11777                 goto out;
11778         }
11779
11780         safe_close(subvolume_fd);
11781
11782         subvolume_fd = openat(t_mnt_scratch_fd, "A", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11783         if (subvolume_fd < 0) {
11784                 log_stderr("failure: openat");
11785                 goto out;
11786         }
11787
11788         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id1)) {
11789                 log_stderr("failure: btrfs_get_subvolume_id");
11790                 goto out;
11791         }
11792
11793         subvolume_fd = openat(t_mnt_scratch_fd, "B/C", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11794         if (subvolume_fd < 0) {
11795                 log_stderr("failure: openat");
11796                 goto out;
11797         }
11798
11799         if (btrfs_get_subvolume_id(subvolume_fd, &subvolume_id2)) {
11800                 log_stderr("failure: btrfs_get_subvolume_id");
11801                 goto out;
11802         }
11803
11804         if (sys_mount(t_device_scratch, t_mountpoint, "btrfs", 0, "subvol=B/C")) {
11805                 log_stderr("failure: mount");
11806                 goto out;
11807         }
11808
11809         open_tree_fd = sys_open_tree(-EBADF, t_mountpoint,
11810                                      AT_NO_AUTOMOUNT |
11811                                      AT_SYMLINK_NOFOLLOW |
11812                                      OPEN_TREE_CLOEXEC |
11813                                      OPEN_TREE_CLONE);
11814         if (open_tree_fd < 0) {
11815                 log_stderr("failure: sys_open_tree");
11816                 goto out;
11817         }
11818
11819         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11820                 log_stderr("failure: sys_mount_setattr");
11821                 goto out;
11822         }
11823
11824         /*
11825          * The open_tree() syscall returns an O_PATH file descriptor which we
11826          * can't use with ioctl(). So let's reopen it as a proper file
11827          * descriptor.
11828          */
11829         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11830         if (tree_fd < 0) {
11831                 log_stderr("failure: openat");
11832                 goto out;
11833         }
11834
11835         pid = fork();
11836         if (pid < 0) {
11837                 log_stderr("failure: fork");
11838                 goto out;
11839         }
11840         if (pid == 0) {
11841                 /*
11842                  * The subvolume isn't exposed in the idmapped mount so
11843                  * delation via spec id must fail.
11844                  */
11845                 if (!btrfs_delete_subvolume_id(tree_fd, subvolume_id1))
11846                         die("failure: btrfs_delete_subvolume_id");
11847                 if (errno != EOPNOTSUPP)
11848                         die("failure: errno");
11849
11850                 if (btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id1))
11851                         die("failure: btrfs_delete_subvolume_id");
11852
11853                 exit(EXIT_SUCCESS);
11854         }
11855         if (wait_for_pid(pid))
11856                 goto out;
11857
11858         fret = 0;
11859         log_debug("Ran test");
11860 out:
11861         safe_close(attr.userns_fd);
11862         safe_close(open_tree_fd);
11863         safe_close(tree_fd);
11864         sys_umount2(t_mountpoint, MNT_DETACH);
11865         btrfs_delete_subvolume_id(t_mnt_scratch_fd, subvolume_id2);
11866         btrfs_delete_subvolume(t_mnt_scratch_fd, "B");
11867
11868         return fret;
11869 }
11870
11871 static int btrfs_subvolumes_setflags_fsids_mapped(void)
11872 {
11873         int fret = -1;
11874         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11875         struct mount_attr attr = {
11876                 .attr_set = MOUNT_ATTR_IDMAP,
11877         };
11878         pid_t pid;
11879
11880         if (!caps_supported())
11881                 return 0;
11882
11883         /* Changing mount properties on a detached mount. */
11884         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
11885         if (attr.userns_fd < 0) {
11886                 log_stderr("failure: get_userns_fd");
11887                 goto out;
11888         }
11889
11890         open_tree_fd = sys_open_tree(t_dir1_fd, "",
11891                                      AT_EMPTY_PATH |
11892                                      AT_NO_AUTOMOUNT |
11893                                      AT_SYMLINK_NOFOLLOW |
11894                                      OPEN_TREE_CLOEXEC |
11895                                      OPEN_TREE_CLONE);
11896         if (open_tree_fd < 0) {
11897                 log_stderr("failure: sys_open_tree");
11898                 goto out;
11899         }
11900
11901         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
11902                 log_stderr("failure: sys_mount_setattr");
11903                 goto out;
11904         }
11905
11906         /*
11907          * The open_tree() syscall returns an O_PATH file descriptor which we
11908          * can't use with ioctl(). So let's reopen it as a proper file
11909          * descriptor.
11910          */
11911         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11912         if (tree_fd < 0) {
11913                 log_stderr("failure: openat");
11914                 goto out;
11915         }
11916
11917         pid = fork();
11918         if (pid < 0) {
11919                 log_stderr("failure: fork");
11920                 goto out;
11921         }
11922         if (pid == 0) {
11923                 int subvolume_fd = -EBADF;
11924                 bool read_only = false;
11925
11926                 if (!switch_fsids(10000, 10000))
11927                         die("failure: switch fsids");
11928
11929                 if (!caps_down())
11930                         die("failure: raise caps");
11931
11932                 /* The caller's fsids now have mappings in the idmapped mount so
11933                  * any file creation must fail.
11934                  */
11935
11936                 /* create subvolume */
11937                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
11938                         die("failure: btrfs_create_subvolume");
11939
11940                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
11941                         die("failure: expected_uid_gid");
11942
11943                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
11944                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
11945                 if (subvolume_fd < 0)
11946                         die("failure: openat");
11947
11948                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11949                         die("failure: btrfs_get_subvolume_ro");
11950
11951                 if (read_only)
11952                         die("failure: read_only");
11953
11954                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
11955                         die("failure: btrfs_set_subvolume_ro");
11956
11957                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11958                         die("failure: btrfs_get_subvolume_ro");
11959
11960                 if (!read_only)
11961                         die("failure: not read_only");
11962
11963                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
11964                         die("failure: btrfs_set_subvolume_ro");
11965
11966                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
11967                         die("failure: btrfs_get_subvolume_ro");
11968
11969                 if (read_only)
11970                         die("failure: read_only");
11971
11972                 safe_close(subvolume_fd);
11973
11974                 exit(EXIT_SUCCESS);
11975         }
11976         if (wait_for_pid(pid))
11977                 goto out;
11978
11979         /* remove directory */
11980         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
11981                 log_stderr("failure: btrfs_delete_subvolume");
11982                 goto out;
11983         }
11984
11985         fret = 0;
11986         log_debug("Ran test");
11987 out:
11988         safe_close(attr.userns_fd);
11989         safe_close(open_tree_fd);
11990         safe_close(tree_fd);
11991
11992         return fret;
11993 }
11994
11995 static int btrfs_subvolumes_setflags_fsids_mapped_userns(void)
11996 {
11997         int fret = -1;
11998         int open_tree_fd = -EBADF, tree_fd = -EBADF;
11999         struct mount_attr attr = {
12000                 .attr_set = MOUNT_ATTR_IDMAP,
12001         };
12002         pid_t pid;
12003
12004         if (!caps_supported())
12005                 return 0;
12006
12007         /* Changing mount properties on a detached mount. */
12008         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12009         if (attr.userns_fd < 0) {
12010                 log_stderr("failure: get_userns_fd");
12011                 goto out;
12012         }
12013
12014         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12015                                      AT_EMPTY_PATH |
12016                                      AT_NO_AUTOMOUNT |
12017                                      AT_SYMLINK_NOFOLLOW |
12018                                      OPEN_TREE_CLOEXEC |
12019                                      OPEN_TREE_CLONE);
12020         if (open_tree_fd < 0) {
12021                 log_stderr("failure: sys_open_tree");
12022                 goto out;
12023         }
12024
12025         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12026                 log_stderr("failure: sys_mount_setattr");
12027                 goto out;
12028         }
12029
12030         /*
12031          * The open_tree() syscall returns an O_PATH file descriptor which we
12032          * can't use with ioctl(). So let's reopen it as a proper file
12033          * descriptor.
12034          */
12035         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12036         if (tree_fd < 0) {
12037                 log_stderr("failure: openat");
12038                 goto out;
12039         }
12040
12041         pid = fork();
12042         if (pid < 0) {
12043                 log_stderr("failure: fork");
12044                 goto out;
12045         }
12046         if (pid == 0) {
12047                 int subvolume_fd = -EBADF;
12048                 bool read_only = false;
12049
12050                 if (!switch_userns(attr.userns_fd, 0, 0, false))
12051                         die("failure: switch_userns");
12052
12053                 /* The caller's fsids now have mappings in the idmapped mount so
12054                  * any file creation must fail.
12055                  */
12056
12057                 /* create subvolume */
12058                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12059                         die("failure: btrfs_create_subvolume");
12060
12061                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12062                         die("failure: expected_uid_gid");
12063
12064                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12065                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12066                 if (subvolume_fd < 0)
12067                         die("failure: openat");
12068
12069                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12070                         die("failure: btrfs_get_subvolume_ro");
12071
12072                 if (read_only)
12073                         die("failure: read_only");
12074
12075                 if (btrfs_set_subvolume_ro(subvolume_fd, true))
12076                         die("failure: btrfs_set_subvolume_ro");
12077
12078                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12079                         die("failure: btrfs_get_subvolume_ro");
12080
12081                 if (!read_only)
12082                         die("failure: not read_only");
12083
12084                 if (btrfs_set_subvolume_ro(subvolume_fd, false))
12085                         die("failure: btrfs_set_subvolume_ro");
12086
12087                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12088                         die("failure: btrfs_get_subvolume_ro");
12089
12090                 if (read_only)
12091                         die("failure: read_only");
12092
12093                 safe_close(subvolume_fd);
12094
12095                 exit(EXIT_SUCCESS);
12096         }
12097         if (wait_for_pid(pid))
12098                 goto out;
12099
12100         /* remove directory */
12101         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12102                 log_stderr("failure: btrfs_delete_subvolume");
12103                 goto out;
12104         }
12105
12106         fret = 0;
12107         log_debug("Ran test");
12108 out:
12109         safe_close(attr.userns_fd);
12110         safe_close(open_tree_fd);
12111         safe_close(tree_fd);
12112
12113         return fret;
12114 }
12115
12116 static int btrfs_subvolumes_setflags_fsids_unmapped(void)
12117 {
12118         int fret = -1;
12119         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12120         struct mount_attr attr = {
12121                 .attr_set = MOUNT_ATTR_IDMAP,
12122         };
12123         pid_t pid;
12124
12125         if (!caps_supported())
12126                 return 0;
12127
12128         /* Changing mount properties on a detached mount. */
12129         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12130         if (attr.userns_fd < 0) {
12131                 log_stderr("failure: get_userns_fd");
12132                 goto out;
12133         }
12134
12135         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12136                                      AT_EMPTY_PATH |
12137                                      AT_NO_AUTOMOUNT |
12138                                      AT_SYMLINK_NOFOLLOW |
12139                                      OPEN_TREE_CLOEXEC |
12140                                      OPEN_TREE_CLONE);
12141         if (open_tree_fd < 0) {
12142                 log_stderr("failure: sys_open_tree");
12143                 goto out;
12144         }
12145
12146         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12147                 log_stderr("failure: sys_mount_setattr");
12148                 goto out;
12149         }
12150
12151         /*
12152          * The open_tree() syscall returns an O_PATH file descriptor which we
12153          * can't use with ioctl(). So let's reopen it as a proper file
12154          * descriptor.
12155          */
12156         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12157         if (tree_fd < 0) {
12158                 log_stderr("failure: openat");
12159                 goto out;
12160         }
12161
12162         /* create subvolume */
12163         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12164                 log_stderr("failure: btrfs_create_subvolume");
12165                 goto out;
12166         }
12167
12168         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12169                 log_stderr("failure: expected_uid_gid");
12170                 goto out;
12171         }
12172
12173         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12174                 log_stderr("failure: expected_uid_gid");
12175                 goto out;
12176         }
12177
12178         pid = fork();
12179         if (pid < 0) {
12180                 log_stderr("failure: fork");
12181                 goto out;
12182         }
12183         if (pid == 0) {
12184                 int subvolume_fd = -EBADF;
12185                 bool read_only = false;
12186
12187                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12188                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12189                 if (subvolume_fd < 0)
12190                         die("failure: openat");
12191
12192                 if (!switch_fsids(0, 0))
12193                         die("failure: switch fsids");
12194
12195                 if (!caps_down())
12196                         die("failure: raise caps");
12197
12198                 /*
12199                  * The caller's fsids don't have mappings in the idmapped mount
12200                  * so any file creation must fail.
12201                  */
12202
12203                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12204                         die("failure: btrfs_get_subvolume_ro");
12205
12206                 if (read_only)
12207                         die("failure: read_only");
12208
12209                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12210                         die("failure: btrfs_set_subvolume_ro");
12211                 if (errno != EPERM)
12212                         die("failure: errno");
12213
12214                 safe_close(subvolume_fd);
12215
12216                 exit(EXIT_SUCCESS);
12217         }
12218         if (wait_for_pid(pid))
12219                 goto out;
12220
12221         /* remove directory */
12222         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12223                 log_stderr("failure: btrfs_delete_subvolume");
12224                 goto out;
12225         }
12226
12227         fret = 0;
12228         log_debug("Ran test");
12229 out:
12230         safe_close(attr.userns_fd);
12231         safe_close(open_tree_fd);
12232         safe_close(tree_fd);
12233
12234         return fret;
12235 }
12236
12237 static int btrfs_subvolumes_setflags_fsids_unmapped_userns(void)
12238 {
12239         int fret = -1;
12240         int open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
12241         struct mount_attr attr = {
12242                 .attr_set = MOUNT_ATTR_IDMAP,
12243         };
12244         pid_t pid;
12245
12246         if (!caps_supported())
12247                 return 0;
12248
12249         /* Changing mount properties on a detached mount. */
12250         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12251         if (attr.userns_fd < 0) {
12252                 log_stderr("failure: get_userns_fd");
12253                 goto out;
12254         }
12255
12256         /* Changing mount properties on a detached mount. */
12257         userns_fd = get_userns_fd(0, 30000, 10000);
12258         if (userns_fd < 0) {
12259                 log_stderr("failure: get_userns_fd");
12260                 goto out;
12261         }
12262
12263         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12264                                      AT_EMPTY_PATH |
12265                                      AT_NO_AUTOMOUNT |
12266                                      AT_SYMLINK_NOFOLLOW |
12267                                      OPEN_TREE_CLOEXEC |
12268                                      OPEN_TREE_CLONE);
12269         if (open_tree_fd < 0) {
12270                 log_stderr("failure: sys_open_tree");
12271                 goto out;
12272         }
12273
12274         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12275                 log_stderr("failure: sys_mount_setattr");
12276                 goto out;
12277         }
12278
12279         /*
12280          * The open_tree() syscall returns an O_PATH file descriptor which we
12281          * can't use with ioctl(). So let's reopen it as a proper file
12282          * descriptor.
12283          */
12284         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12285         if (tree_fd < 0) {
12286                 log_stderr("failure: openat");
12287                 goto out;
12288         }
12289
12290         /* create subvolume */
12291         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12292                 log_stderr("failure: btrfs_create_subvolume");
12293                 goto out;
12294         }
12295
12296         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12297                 log_stderr("failure: expected_uid_gid");
12298                 goto out;
12299         }
12300
12301         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12302                 log_stderr("failure: expected_uid_gid");
12303                 goto out;
12304         }
12305
12306         pid = fork();
12307         if (pid < 0) {
12308                 log_stderr("failure: fork");
12309                 goto out;
12310         }
12311         if (pid == 0) {
12312                 int subvolume_fd = -EBADF;
12313                 bool read_only = false;
12314
12315                 /*
12316                  * The caller's fsids don't have mappings in the idmapped mount
12317                  * so any file creation must fail.
12318                  */
12319
12320                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12321                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12322                 if (subvolume_fd < 0)
12323                         die("failure: openat");
12324
12325                 if (!switch_userns(userns_fd, 0, 0, false))
12326                         die("failure: switch_userns");
12327
12328                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12329                                       t_overflowuid, t_overflowgid))
12330                         die("failure: expected_uid_gid");
12331
12332                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12333                                       t_overflowuid, t_overflowgid))
12334                         die("failure: expected_uid_gid");
12335
12336                 if (btrfs_get_subvolume_ro(subvolume_fd, &read_only))
12337                         die("failure: btrfs_get_subvolume_ro");
12338
12339                 if (read_only)
12340                         die("failure: read_only");
12341
12342                 if (!btrfs_set_subvolume_ro(subvolume_fd, true))
12343                         die("failure: btrfs_set_subvolume_ro");
12344                 if (errno != EPERM)
12345                         die("failure: errno");
12346
12347                 safe_close(subvolume_fd);
12348
12349                 exit(EXIT_SUCCESS);
12350         }
12351         if (wait_for_pid(pid))
12352                 goto out;
12353
12354         /* remove directory */
12355         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12356                 log_stderr("failure: btrfs_delete_subvolume");
12357                 goto out;
12358         }
12359
12360         fret = 0;
12361         log_debug("Ran test");
12362 out:
12363         safe_close(attr.userns_fd);
12364         safe_close(open_tree_fd);
12365         safe_close(tree_fd);
12366         safe_close(userns_fd);
12367
12368         return fret;
12369 }
12370
12371 static int btrfs_snapshots_setflags_fsids_mapped(void)
12372 {
12373         int fret = -1;
12374         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12375         struct mount_attr attr = {
12376                 .attr_set = MOUNT_ATTR_IDMAP,
12377         };
12378         pid_t pid;
12379
12380         if (!caps_supported())
12381                 return 0;
12382
12383         /* Changing mount properties on a detached mount. */
12384         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12385         if (attr.userns_fd < 0) {
12386                 log_stderr("failure: get_userns_fd");
12387                 goto out;
12388         }
12389
12390         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12391                                      AT_EMPTY_PATH |
12392                                      AT_NO_AUTOMOUNT |
12393                                      AT_SYMLINK_NOFOLLOW |
12394                                      OPEN_TREE_CLOEXEC |
12395                                      OPEN_TREE_CLONE);
12396         if (open_tree_fd < 0) {
12397                 log_stderr("failure: sys_open_tree");
12398                 goto out;
12399         }
12400
12401         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12402                 log_stderr("failure: sys_mount_setattr");
12403                 goto out;
12404         }
12405
12406         /*
12407          * The open_tree() syscall returns an O_PATH file descriptor which we
12408          * can't use with ioctl(). So let's reopen it as a proper file
12409          * descriptor.
12410          */
12411         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12412         if (tree_fd < 0) {
12413                 log_stderr("failure: openat");
12414                 goto out;
12415         }
12416
12417         pid = fork();
12418         if (pid < 0) {
12419                 log_stderr("failure: fork");
12420                 goto out;
12421         }
12422         if (pid == 0) {
12423                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12424                 bool read_only = false;
12425
12426                 if (!switch_fsids(10000, 10000))
12427                         die("failure: switch fsids");
12428
12429                 if (!caps_down())
12430                         die("failure: raise caps");
12431
12432                 /*
12433                  * The caller's fsids now have mappings in the idmapped mount
12434                  * so any file creation must succeed.
12435                  */
12436
12437                 /* create subvolume */
12438                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12439                         die("failure: btrfs_create_subvolume");
12440
12441                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000))
12442                         die("failure: expected_uid_gid");
12443
12444                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12445                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12446                 if (subvolume_fd < 0)
12447                         die("failure: openat");
12448
12449                 /* create read-write snapshot */
12450                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12451                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12452                         die("failure: btrfs_create_snapshot");
12453
12454                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000))
12455                         die("failure: expected_uid_gid");
12456
12457                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12458                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12459                 if (snapshot_fd < 0)
12460                         die("failure: openat");
12461
12462                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12463                         die("failure: btrfs_get_subvolume_ro");
12464
12465                 if (read_only)
12466                         die("failure: read_only");
12467
12468                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12469                         die("failure: btrfs_set_subvolume_ro");
12470
12471                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12472                         die("failure: btrfs_get_subvolume_ro");
12473
12474                 if (!read_only)
12475                         die("failure: not read_only");
12476
12477                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12478                         die("failure: btrfs_set_subvolume_ro");
12479
12480                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12481                         die("failure: btrfs_get_subvolume_ro");
12482
12483                 if (read_only)
12484                         die("failure: read_only");
12485
12486                 safe_close(snapshot_fd);
12487                 safe_close(subvolume_fd);
12488
12489                 exit(EXIT_SUCCESS);
12490         }
12491         if (wait_for_pid(pid))
12492                 goto out;
12493
12494         /* remove directory */
12495         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12496                 log_stderr("failure: btrfs_delete_subvolume");
12497                 goto out;
12498         }
12499
12500         /* remove directory */
12501         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12502                 log_stderr("failure: btrfs_delete_subvolume");
12503                 goto out;
12504         }
12505
12506         fret = 0;
12507         log_debug("Ran test");
12508 out:
12509         safe_close(attr.userns_fd);
12510         safe_close(open_tree_fd);
12511         safe_close(tree_fd);
12512
12513         return fret;
12514 }
12515
12516 static int btrfs_snapshots_setflags_fsids_mapped_userns(void)
12517 {
12518         int fret = -1;
12519         int open_tree_fd = -EBADF, tree_fd = -EBADF;
12520         struct mount_attr attr = {
12521                 .attr_set = MOUNT_ATTR_IDMAP,
12522         };
12523         pid_t pid;
12524
12525         if (!caps_supported())
12526                 return 0;
12527
12528         /* Changing mount properties on a detached mount. */
12529         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12530         if (attr.userns_fd < 0) {
12531                 log_stderr("failure: get_userns_fd");
12532                 goto out;
12533         }
12534
12535         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12536                                      AT_EMPTY_PATH |
12537                                      AT_NO_AUTOMOUNT |
12538                                      AT_SYMLINK_NOFOLLOW |
12539                                      OPEN_TREE_CLOEXEC |
12540                                      OPEN_TREE_CLONE);
12541         if (open_tree_fd < 0) {
12542                 log_stderr("failure: sys_open_tree");
12543                 goto out;
12544         }
12545
12546         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12547                 log_stderr("failure: sys_mount_setattr");
12548                 goto out;
12549         }
12550
12551         /*
12552          * The open_tree() syscall returns an O_PATH file descriptor which we
12553          * can't use with ioctl(). So let's reopen it as a proper file
12554          * descriptor.
12555          */
12556         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12557         if (tree_fd < 0) {
12558                 log_stderr("failure: openat");
12559                 goto out;
12560         }
12561
12562         pid = fork();
12563         if (pid < 0) {
12564                 log_stderr("failure: fork");
12565                 goto out;
12566         }
12567         if (pid == 0) {
12568                 int snapshot_fd = -EBADF, subvolume_fd = -EBADF;
12569                 bool read_only = false;
12570
12571                 if (!switch_userns(attr.userns_fd, 0, 0, false))
12572                         die("failure: switch_userns");
12573
12574                 /*
12575                  * The caller's fsids now have mappings in the idmapped mount so
12576                  * any file creation must succeed.
12577                  */
12578
12579                 /* create subvolume */
12580                 if (btrfs_create_subvolume(tree_fd, BTRFS_SUBVOLUME1))
12581                         die("failure: btrfs_create_subvolume");
12582
12583                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 0, 0))
12584                         die("failure: expected_uid_gid");
12585
12586                 subvolume_fd = openat(tree_fd, BTRFS_SUBVOLUME1,
12587                                       O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12588                 if (subvolume_fd < 0)
12589                         die("failure: openat");
12590
12591                 /* create read-write snapshot */
12592                 if (btrfs_create_snapshot(subvolume_fd, tree_fd,
12593                                           BTRFS_SUBVOLUME1_SNAPSHOT1, 0))
12594                         die("failure: btrfs_create_snapshot");
12595
12596                 if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0))
12597                         die("failure: expected_uid_gid");
12598
12599                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12600                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12601                 if (snapshot_fd < 0)
12602                         die("failure: openat");
12603
12604                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12605                         die("failure: btrfs_get_subvolume_ro");
12606
12607                 if (read_only)
12608                         die("failure: read_only");
12609
12610                 if (btrfs_set_subvolume_ro(snapshot_fd, true))
12611                         die("failure: btrfs_set_subvolume_ro");
12612
12613                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12614                         die("failure: btrfs_get_subvolume_ro");
12615
12616                 if (!read_only)
12617                         die("failure: not read_only");
12618
12619                 if (btrfs_set_subvolume_ro(snapshot_fd, false))
12620                         die("failure: btrfs_set_subvolume_ro");
12621
12622                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12623                         die("failure: btrfs_get_subvolume_ro");
12624
12625                 if (read_only)
12626                         die("failure: read_only");
12627
12628                 safe_close(snapshot_fd);
12629                 safe_close(subvolume_fd);
12630
12631                 exit(EXIT_SUCCESS);
12632         }
12633         if (wait_for_pid(pid))
12634                 goto out;
12635
12636         /* remove directory */
12637         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12638                 log_stderr("failure: btrfs_delete_subvolume");
12639                 goto out;
12640         }
12641
12642         /* remove directory */
12643         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12644                 log_stderr("failure: btrfs_delete_subvolume");
12645                 goto out;
12646         }
12647
12648         fret = 0;
12649         log_debug("Ran test");
12650 out:
12651         safe_close(attr.userns_fd);
12652         safe_close(open_tree_fd);
12653         safe_close(tree_fd);
12654
12655         return fret;
12656 }
12657
12658 static int btrfs_snapshots_setflags_fsids_unmapped(void)
12659 {
12660         int fret = -1;
12661         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF;
12662         struct mount_attr attr = {
12663                 .attr_set = MOUNT_ATTR_IDMAP,
12664         };
12665         pid_t pid;
12666
12667         if (!caps_supported())
12668                 return 0;
12669
12670         /* Changing mount properties on a detached mount. */
12671         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12672         if (attr.userns_fd < 0) {
12673                 log_stderr("failure: get_userns_fd");
12674                 goto out;
12675         }
12676
12677         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12678                                      AT_EMPTY_PATH |
12679                                      AT_NO_AUTOMOUNT |
12680                                      AT_SYMLINK_NOFOLLOW |
12681                                      OPEN_TREE_CLOEXEC |
12682                                      OPEN_TREE_CLONE);
12683         if (open_tree_fd < 0) {
12684                 log_stderr("failure: sys_open_tree");
12685                 goto out;
12686         }
12687
12688         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12689                 log_stderr("failure: sys_mount_setattr");
12690                 goto out;
12691         }
12692
12693         /*
12694          * The open_tree() syscall returns an O_PATH file descriptor which we
12695          * can't use with ioctl(). So let's reopen it as a proper file
12696          * descriptor.
12697          */
12698         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12699         if (tree_fd < 0) {
12700                 log_stderr("failure: openat");
12701                 goto out;
12702         }
12703
12704         /* create subvolume */
12705         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12706                 log_stderr("failure: btrfs_create_subvolume");
12707                 goto out;
12708         }
12709
12710         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12711                 log_stderr("failure: expected_uid_gid");
12712                 goto out;
12713         }
12714
12715         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12716                 log_stderr("failure: expected_uid_gid");
12717                 goto out;
12718         }
12719
12720         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12721                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12722         if (subvolume_fd < 0) {
12723                 log_stderr("failure: openat");
12724                 goto out;
12725         }
12726
12727         /* create read-write snapshot */
12728         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12729                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12730                 log_stderr("failure: btrfs_create_snapshot");
12731                 goto out;
12732         }
12733
12734         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12735                 log_stderr("failure: expected_uid_gid");
12736                 goto out;
12737         }
12738
12739         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12740                 log_stderr("failure: expected_uid_gid");
12741                 goto out;
12742         }
12743
12744         pid = fork();
12745         if (pid < 0) {
12746                 log_stderr("failure: fork");
12747                 goto out;
12748         }
12749         if (pid == 0) {
12750                 int snapshot_fd = -EBADF;
12751                 bool read_only = false;
12752
12753                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12754                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12755                 if (snapshot_fd < 0)
12756                         die("failure: openat");
12757
12758                 if (!switch_fsids(0, 0))
12759                         die("failure: switch fsids");
12760
12761                 if (!caps_down())
12762                         die("failure: raise caps");
12763
12764                 /*
12765                  * The caller's fsids don't have mappings in the idmapped mount
12766                  * so any file creation must fail.
12767                  */
12768
12769                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12770                         die("failure: btrfs_get_subvolume_ro");
12771
12772                 if (read_only)
12773                         die("failure: read_only");
12774
12775                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12776                         die("failure: btrfs_set_subvolume_ro");
12777                 if (errno != EPERM)
12778                         die("failure: errno");
12779
12780                 safe_close(snapshot_fd);
12781
12782                 exit(EXIT_SUCCESS);
12783         }
12784         if (wait_for_pid(pid))
12785                 goto out;
12786
12787         /* remove directory */
12788         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12789                 log_stderr("failure: btrfs_delete_subvolume");
12790                 goto out;
12791         }
12792
12793         /* remove directory */
12794         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12795                 log_stderr("failure: btrfs_delete_subvolume");
12796                 goto out;
12797         }
12798
12799         fret = 0;
12800         log_debug("Ran test");
12801 out:
12802         safe_close(attr.userns_fd);
12803         safe_close(open_tree_fd);
12804         safe_close(subvolume_fd);
12805         safe_close(tree_fd);
12806
12807         return fret;
12808 }
12809
12810 static int btrfs_snapshots_setflags_fsids_unmapped_userns(void)
12811 {
12812         int fret = -1;
12813         int open_tree_fd = -EBADF, subvolume_fd = -EBADF, tree_fd = -EBADF,
12814             userns_fd = -EBADF;
12815         struct mount_attr attr = {
12816                 .attr_set = MOUNT_ATTR_IDMAP,
12817         };
12818         pid_t pid;
12819
12820         if (!caps_supported())
12821                 return 0;
12822
12823         /* Changing mount properties on a detached mount. */
12824         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
12825         if (attr.userns_fd < 0) {
12826                 log_stderr("failure: get_userns_fd");
12827                 goto out;
12828         }
12829
12830         /* Changing mount properties on a detached mount. */
12831         userns_fd = get_userns_fd(0, 30000, 10000);
12832         if (userns_fd < 0) {
12833                 log_stderr("failure: get_userns_fd");
12834                 goto out;
12835         }
12836
12837         open_tree_fd = sys_open_tree(t_dir1_fd, "",
12838                                      AT_EMPTY_PATH |
12839                                      AT_NO_AUTOMOUNT |
12840                                      AT_SYMLINK_NOFOLLOW |
12841                                      OPEN_TREE_CLOEXEC |
12842                                      OPEN_TREE_CLONE);
12843         if (open_tree_fd < 0) {
12844                 log_stderr("failure: sys_open_tree");
12845                 goto out;
12846         }
12847
12848         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
12849                 log_stderr("failure: sys_mount_setattr");
12850                 goto out;
12851         }
12852
12853         /*
12854          * The open_tree() syscall returns an O_PATH file descriptor which we
12855          * can't use with ioctl(). So let's reopen it as a proper file
12856          * descriptor.
12857          */
12858         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12859         if (tree_fd < 0) {
12860                 log_stderr("failure: openat");
12861                 goto out;
12862         }
12863
12864         /* create subvolume */
12865         if (btrfs_create_subvolume(t_dir1_fd, BTRFS_SUBVOLUME1)) {
12866                 log_stderr("failure: btrfs_create_subvolume");
12867                 goto out;
12868         }
12869
12870         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0, 0, 0)) {
12871                 log_stderr("failure: expected_uid_gid");
12872                 goto out;
12873         }
12874
12875         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1, 0, 10000, 10000)) {
12876                 log_stderr("failure: expected_uid_gid");
12877                 goto out;
12878         }
12879
12880         subvolume_fd = openat(t_dir1_fd, BTRFS_SUBVOLUME1,
12881                               O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12882         if (subvolume_fd < 0) {
12883                 log_stderr("failure: openat");
12884                 goto out;
12885         }
12886
12887         /* create read-write snapshot */
12888         if (btrfs_create_snapshot(subvolume_fd, t_dir1_fd,
12889                                   BTRFS_SUBVOLUME1_SNAPSHOT1, 0)) {
12890                 log_stderr("failure: btrfs_create_snapshot");
12891                 goto out;
12892         }
12893
12894         if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 0, 0)) {
12895                 log_stderr("failure: expected_uid_gid");
12896                 goto out;
12897         }
12898
12899         if (!expected_uid_gid(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1, 0, 10000, 10000)) {
12900                 log_stderr("failure: expected_uid_gid");
12901                 goto out;
12902         }
12903
12904         pid = fork();
12905         if (pid < 0) {
12906                 log_stderr("failure: fork");
12907                 goto out;
12908         }
12909         if (pid == 0) {
12910                 int snapshot_fd = -EBADF;
12911                 bool read_only = false;
12912
12913                 /*
12914                  * The caller's fsids don't have mappings in the idmapped mount
12915                  * so any file creation must fail.
12916                  */
12917
12918                 snapshot_fd = openat(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1,
12919                                      O_RDONLY | O_CLOEXEC | O_DIRECTORY);
12920                 if (snapshot_fd < 0)
12921                         die("failure: openat");
12922
12923
12924                 if (!switch_userns(userns_fd, 0, 0, false))
12925                         die("failure: switch_userns");
12926
12927                 if (!expected_uid_gid(t_dir1_fd, BTRFS_SUBVOLUME1, 0,
12928                                       t_overflowuid, t_overflowgid))
12929                         die("failure: expected_uid_gid");
12930
12931                 if (!expected_uid_gid(open_tree_fd, BTRFS_SUBVOLUME1, 0,
12932                                       t_overflowuid, t_overflowgid))
12933                         die("failure: expected_uid_gid");
12934
12935                 /*
12936                  * The caller's fsids don't have mappings in the idmapped mount
12937                  * so any file creation must fail.
12938                  */
12939
12940                 if (btrfs_get_subvolume_ro(snapshot_fd, &read_only))
12941                         die("failure: btrfs_get_subvolume_ro");
12942
12943                 if (read_only)
12944                         die("failure: read_only");
12945
12946                 if (!btrfs_set_subvolume_ro(snapshot_fd, true))
12947                         die("failure: btrfs_set_subvolume_ro");
12948                 if (errno != EPERM)
12949                         die("failure: errno");
12950
12951                 safe_close(snapshot_fd);
12952
12953                 exit(EXIT_SUCCESS);
12954         }
12955         if (wait_for_pid(pid))
12956                 goto out;
12957
12958         /* remove directory */
12959         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1)) {
12960                 log_stderr("failure: btrfs_delete_subvolume");
12961                 goto out;
12962         }
12963
12964         /* remove directory */
12965         if (btrfs_delete_subvolume(tree_fd, BTRFS_SUBVOLUME1_SNAPSHOT1)) {
12966                 log_stderr("failure: btrfs_delete_subvolume");
12967                 goto out;
12968         }
12969
12970         fret = 0;
12971         log_debug("Ran test");
12972 out:
12973         safe_close(attr.userns_fd);
12974         safe_close(open_tree_fd);
12975         safe_close(subvolume_fd);
12976         safe_close(tree_fd);
12977         safe_close(userns_fd);
12978
12979         return fret;
12980 }
12981
12982 #define BTRFS_SUBVOLUME_SUBVOL1 "subvol1"
12983 #define BTRFS_SUBVOLUME_SUBVOL2 "subvol2"
12984 #define BTRFS_SUBVOLUME_SUBVOL3 "subvol3"
12985 #define BTRFS_SUBVOLUME_SUBVOL4 "subvol4"
12986
12987 #define BTRFS_SUBVOLUME_SUBVOL1_ID 0
12988 #define BTRFS_SUBVOLUME_SUBVOL2_ID 1
12989 #define BTRFS_SUBVOLUME_SUBVOL3_ID 2
12990 #define BTRFS_SUBVOLUME_SUBVOL4_ID 3
12991
12992 #define BTRFS_SUBVOLUME_DIR1 "dir1"
12993 #define BTRFS_SUBVOLUME_DIR2 "dir2"
12994
12995 #define BTRFS_SUBVOLUME_MNT "mnt_subvolume1"
12996
12997 #define BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3 "subvol1/subvol3"
12998 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2 "subvol1/dir1/dir2"
12999 #define BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4 "subvol1/dir1/dir2/subvol4"
13000
13001 /*
13002  * We create the following mount layout to test lookup:
13003  *
13004  * |-/mnt/test                    /dev/loop0                   btrfs       rw,relatime,space_cache,subvolid=5,subvol=/
13005  * | |-/mnt/test/mnt1             /dev/loop1[/subvol1]         btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=268,subvol=/subvol1
13006  * '-/mnt/scratch                 /dev/loop1                   btrfs       rw,relatime,space_cache,user_subvol_rm_allowed,subvolid=5,subvol=/
13007  */
13008 static int btrfs_subvolume_lookup_user(void)
13009 {
13010         int fret = -1, i;
13011         int dir1_fd = -EBADF, dir2_fd = -EBADF, mnt_fd = -EBADF,
13012             open_tree_fd = -EBADF, tree_fd = -EBADF, userns_fd = -EBADF;
13013         int subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
13014         uint64_t subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID + 1];
13015         uint64_t subvolid = -EINVAL;
13016         struct mount_attr attr = {
13017                 .attr_set = MOUNT_ATTR_IDMAP,
13018         };
13019         pid_t pid;
13020         struct btrfs_iter *iter;
13021
13022         if (!caps_supported())
13023                 return 0;
13024
13025         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
13026                 subvolume_fds[i] = -EBADF;
13027
13028         for (i = 0; i < ARRAY_SIZE(subvolume_ids); i++)
13029                 subvolume_ids[i] = -EINVAL;
13030
13031         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL1)) {
13032                 log_stderr("failure: btrfs_create_subvolume");
13033                 goto out;
13034         }
13035
13036         if (btrfs_create_subvolume(t_mnt_scratch_fd, BTRFS_SUBVOLUME_SUBVOL2)) {
13037                 log_stderr("failure: btrfs_create_subvolume");
13038                 goto out;
13039         }
13040
13041         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] = openat(t_mnt_scratch_fd,
13042                                                            BTRFS_SUBVOLUME_SUBVOL1,
13043                                                            O_CLOEXEC | O_DIRECTORY);
13044         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID] < 0) {
13045                 log_stderr("failure: openat");
13046                 goto out;
13047         }
13048
13049         /* create subvolume */
13050         if (btrfs_create_subvolume(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_SUBVOL3)) {
13051                 log_stderr("failure: btrfs_create_subvolume");
13052                 goto out;
13053         }
13054
13055         if (mkdirat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1, 0777)) {
13056                 log_stderr("failure: mkdirat");
13057                 goto out;
13058         }
13059
13060         dir1_fd = openat(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID], BTRFS_SUBVOLUME_DIR1,
13061                          O_CLOEXEC | O_DIRECTORY);
13062         if (dir1_fd < 0) {
13063                 log_stderr("failure: openat");
13064                 goto out;
13065         }
13066
13067         if (mkdirat(dir1_fd, BTRFS_SUBVOLUME_DIR2, 0777)) {
13068                 log_stderr("failure: mkdirat");
13069                 goto out;
13070         }
13071
13072         dir2_fd = openat(dir1_fd, BTRFS_SUBVOLUME_DIR2, O_CLOEXEC | O_DIRECTORY);
13073         if (dir2_fd < 0) {
13074                 log_stderr("failure: openat");
13075                 goto out;
13076         }
13077
13078         if (btrfs_create_subvolume(dir2_fd, BTRFS_SUBVOLUME_SUBVOL4)) {
13079                 log_stderr("failure: btrfs_create_subvolume");
13080                 goto out;
13081         }
13082
13083         if (mkdirat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, 0777)) {
13084                 log_stderr("failure: mkdirat");
13085                 goto out;
13086         }
13087
13088         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13089         if (sys_mount(t_device_scratch, t_buf, "btrfs", 0,
13090                       "subvol=" BTRFS_SUBVOLUME_SUBVOL1)) {
13091                 log_stderr("failure: mount");
13092                 goto out;
13093         }
13094
13095         mnt_fd = openat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, O_CLOEXEC | O_DIRECTORY);
13096         if (mnt_fd < 0) {
13097                 log_stderr("failure: openat");
13098                 goto out;
13099         }
13100
13101         if (chown_r(t_mnt_scratch_fd, ".", 1000, 1000)) {
13102                 log_stderr("failure: chown_r");
13103                 goto out;
13104         }
13105
13106         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] = openat(t_mnt_scratch_fd,
13107                                                            BTRFS_SUBVOLUME_SUBVOL2,
13108                                                            O_CLOEXEC | O_DIRECTORY);
13109         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID] < 0) {
13110                 log_stderr("failure: openat");
13111                 goto out;
13112         }
13113
13114         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL1_ID],
13115                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL1_ID])) {
13116                 log_stderr("failure: btrfs_get_subvolume_id");
13117                 goto out;
13118         }
13119
13120         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL2_ID],
13121                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL2_ID])) {
13122                 log_stderr("failure: btrfs_get_subvolume_id");
13123                 goto out;
13124         }
13125
13126         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] = openat(t_mnt_scratch_fd,
13127                                                            BTRFS_SUBVOLUME_SUBVOL1xSUBVOL3,
13128                                                            O_CLOEXEC | O_DIRECTORY);
13129         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID] < 0) {
13130                 log_stderr("failure: openat");
13131                 goto out;
13132         }
13133
13134         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID],
13135                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])) {
13136                 log_stderr("failure: btrfs_get_subvolume_id");
13137                 goto out;
13138         }
13139
13140         subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] = openat(t_mnt_scratch_fd,
13141                                                            BTRFS_SUBVOLUME_SUBVOL1xDIR1xDIR2xSUBVOL4,
13142                                                            O_CLOEXEC | O_DIRECTORY);
13143         if (subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID] < 0) {
13144                 log_stderr("failure: openat");
13145                 goto out;
13146         }
13147
13148         if (btrfs_get_subvolume_id(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID],
13149                                    &subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])) {
13150                 log_stderr("failure: btrfs_get_subvolume_id");
13151                 goto out;
13152         }
13153
13154
13155         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL3_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13156                 log_stderr("failure: fchmod");
13157                 goto out;
13158         }
13159
13160         if (fchmod(subvolume_fds[BTRFS_SUBVOLUME_SUBVOL4_ID], S_IRUSR | S_IWUSR | S_IXUSR), 0) {
13161                 log_stderr("failure: fchmod");
13162                 goto out;
13163         }
13164
13165         attr.userns_fd  = get_userns_fd(0, 10000, 10000);
13166         if (attr.userns_fd < 0) {
13167                 log_stderr("failure: get_userns_fd");
13168                 goto out;
13169         }
13170
13171         open_tree_fd = sys_open_tree(mnt_fd, "",
13172                                      AT_EMPTY_PATH |
13173                                      AT_NO_AUTOMOUNT |
13174                                      AT_SYMLINK_NOFOLLOW |
13175                                      OPEN_TREE_CLOEXEC |
13176                                      OPEN_TREE_CLONE);
13177         if (open_tree_fd < 0) {
13178                 log_stderr("failure: sys_open_tree");
13179                 goto out;
13180         }
13181
13182         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13183                 log_stderr("failure: sys_mount_setattr");
13184                 goto out;
13185         }
13186
13187         /*
13188          * The open_tree() syscall returns an O_PATH file descriptor which we
13189          * can't use with ioctl(). So let's reopen it as a proper file
13190          * descriptor.
13191          */
13192         tree_fd = openat(open_tree_fd, ".", O_RDONLY | O_CLOEXEC | O_DIRECTORY);
13193         if (tree_fd < 0) {
13194                 log_stderr("failure: openat");
13195                 goto out;
13196         }
13197
13198         pid = fork();
13199         if (pid < 0) {
13200                 log_stderr("failure: fork");
13201                 goto out;
13202         }
13203         if (pid == 0) {
13204                 bool subvolume3_found = false, subvolume4_found = false;
13205
13206                 if (!switch_fsids(11000, 11000))
13207                         die("failure: switch fsids");
13208
13209                 if (!caps_down())
13210                         die("failure: lower caps");
13211
13212                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13213                         die("failure: btrfs_iterator_start");
13214
13215                 for (;;) {
13216                         char *subvol_path = NULL;
13217                         int ret;
13218
13219                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13220                         if (ret == 1)
13221                                 break;
13222                         else if (ret)
13223                                 die("failure: btrfs_iterator_next");
13224
13225                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13226                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13227                                 die("failure: subvolume id %llu->%s",
13228                                     (long long unsigned)subvolid, subvol_path);
13229
13230                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13231                                 subvolume3_found = true;
13232
13233                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13234                                 subvolume4_found = true;
13235
13236                         free(subvol_path);
13237                 }
13238                 btrfs_iterator_end(iter);
13239
13240                 if (!subvolume3_found || !subvolume4_found)
13241                         die("failure: subvolume id");
13242
13243                 exit(EXIT_SUCCESS);
13244         }
13245         if (wait_for_pid(pid))
13246                 goto out;
13247
13248         pid = fork();
13249         if (pid < 0) {
13250                 log_stderr("failure: fork");
13251                 goto out;
13252         }
13253         if (pid == 0) {
13254                 bool subvolume3_found = false, subvolume4_found = false;
13255
13256                 if (!switch_userns(attr.userns_fd, 0, 0, false))
13257                         die("failure: switch_userns");
13258
13259                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13260                         die("failure: btrfs_iterator_start");
13261
13262                 for (;;) {
13263                         char *subvol_path = NULL;
13264                         int ret;
13265
13266                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13267                         if (ret == 1)
13268                                 break;
13269                         else if (ret)
13270                                 die("failure: btrfs_iterator_next");
13271
13272                         if (subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID] &&
13273                             subvolid != subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13274                                 die("failure: subvolume id %llu->%s",
13275                                     (long long unsigned)subvolid, subvol_path);
13276
13277                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL3_ID])
13278                                 subvolume3_found = true;
13279
13280                         if (subvolid == subvolume_ids[BTRFS_SUBVOLUME_SUBVOL4_ID])
13281                                 subvolume4_found = true;
13282
13283                         free(subvol_path);
13284                 }
13285                 btrfs_iterator_end(iter);
13286
13287                 if (!subvolume3_found || !subvolume4_found)
13288                         die("failure: subvolume id");
13289
13290                 exit(EXIT_SUCCESS);
13291         }
13292         if (wait_for_pid(pid))
13293                 goto out;
13294
13295         pid = fork();
13296         if (pid < 0) {
13297                 log_stderr("failure: fork");
13298                 goto out;
13299         }
13300         if (pid == 0) {
13301                 bool subvolume_found = false;
13302
13303                 if (!switch_fsids(0, 0))
13304                         die("failure: switch fsids");
13305
13306                 if (!caps_down())
13307                         die("failure: lower caps");
13308
13309                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13310                         die("failure: btrfs_iterator_start");
13311
13312                 for (;;) {
13313                         char *subvol_path = NULL;
13314                         int ret;
13315
13316                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13317                         if (ret == 1)
13318                                 break;
13319                         else if (ret)
13320                                 die("failure: btrfs_iterator_next");
13321
13322                         free(subvol_path);
13323
13324                         subvolume_found = true;
13325                         break;
13326                 }
13327                 btrfs_iterator_end(iter);
13328
13329                 if (subvolume_found)
13330                         die("failure: subvolume id");
13331
13332                 exit(EXIT_SUCCESS);
13333         }
13334         if (wait_for_pid(pid))
13335                 goto out;
13336
13337         userns_fd = get_userns_fd(0, 30000, 10000);
13338         if (userns_fd < 0) {
13339                 log_stderr("failure: get_userns_fd");
13340                 goto out;
13341         }
13342
13343         pid = fork();
13344         if (pid < 0) {
13345                 log_stderr("failure: fork");
13346                 goto out;
13347         }
13348         if (pid == 0) {
13349                 bool subvolume_found = false;
13350
13351                 if (!switch_userns(userns_fd, 0, 0, true))
13352                         die("failure: switch_userns");
13353
13354                 if (btrfs_iterator_start(tree_fd, 0, &iter))
13355                         die("failure: btrfs_iterator_start");
13356
13357                 for (;;) {
13358                         char *subvol_path = NULL;
13359                         int ret;
13360
13361                         ret = btrfs_iterator_next(iter, &subvol_path, &subvolid);
13362                         if (ret == 1)
13363                                 break;
13364                         else if (ret)
13365                                 die("failure: btrfs_iterator_next");
13366
13367                         free(subvol_path);
13368
13369                         subvolume_found = true;
13370                         break;
13371                 }
13372                 btrfs_iterator_end(iter);
13373
13374                 if (subvolume_found)
13375                         die("failure: subvolume id");
13376
13377                 exit(EXIT_SUCCESS);
13378         }
13379         if (wait_for_pid(pid))
13380                 goto out;
13381
13382         fret = 0;
13383         log_debug("Ran test");
13384 out:
13385         safe_close(dir1_fd);
13386         safe_close(dir2_fd);
13387         safe_close(open_tree_fd);
13388         safe_close(tree_fd);
13389         safe_close(userns_fd);
13390         for (i = 0; i < ARRAY_SIZE(subvolume_fds); i++)
13391                 safe_close(subvolume_fds[i]);
13392         snprintf(t_buf, sizeof(t_buf), "%s/%s", t_mountpoint, BTRFS_SUBVOLUME_MNT);
13393         sys_umount2(t_buf, MNT_DETACH);
13394         unlinkat(t_mnt_fd, BTRFS_SUBVOLUME_MNT, AT_REMOVEDIR);
13395
13396         return fret;
13397 }
13398
13399 #define USER1 "fsgqa"
13400 #define USER2 "fsgqa2"
13401
13402 /**
13403  * lookup_ids - lookup uid and gid for a username
13404  * @name: [in]  name of the user
13405  * @uid:  [out] pointer to the user-ID
13406  * @gid:  [out] pointer to the group-ID
13407  *
13408  * Lookup the uid and gid of a user.
13409  *
13410  * Return: On success, true is returned.
13411  *         On error, false is returned.
13412  */
13413 static bool lookup_ids(const char *name, uid_t *uid, gid_t *gid)
13414 {
13415         bool bret = false;
13416         struct passwd *pwentp = NULL;
13417         struct passwd pwent;
13418         char *buf;
13419         ssize_t bufsize;
13420         int ret;
13421
13422         bufsize = sysconf(_SC_GETPW_R_SIZE_MAX);
13423         if (bufsize < 0)
13424                 bufsize = 1024;
13425
13426         buf = malloc(bufsize);
13427         if (!buf)
13428                 return bret;
13429
13430         ret = getpwnam_r(name, &pwent, buf, bufsize, &pwentp);
13431         if (!ret && pwentp) {
13432                 *uid = pwent.pw_uid;
13433                 *gid = pwent.pw_gid;
13434                 bret = true;
13435         }
13436
13437         free(buf);
13438         return bret;
13439 }
13440
13441 /**
13442  * setattr_fix_968219708108 - test for commit 968219708108 ("fs: handle circular mappings correctly")
13443  *
13444  * Test that ->setattr() works correctly for idmapped mounts with circular
13445  * idmappings such as:
13446  *
13447  * b:1000:1001:1
13448  * b:1001:1000:1
13449  *
13450  * Assume a directory /source with two files:
13451  *
13452  * /source/file1 | 1000:1000
13453  * /source/file2 | 1001:1001
13454  *
13455  * and we create an idmapped mount of /source at /target with an idmapped of:
13456  *
13457  * mnt_userns:        1000:1001:1
13458  *                    1001:1000:1
13459  *
13460  * In the idmapped mount file1 will be owned by uid 1001 and file2 by uid 1000:
13461  *
13462  * /target/file1 | 1001:1001
13463  * /target/file2 | 1000:1000
13464  *
13465  * Because in essence the idmapped mount switches ownership for {g,u}id 1000
13466  * and {g,u}id 1001.
13467  *
13468  * 1. A user with fs{g,u}id 1000 must be allowed to setattr /target/file2 from
13469  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13470  * 2. A user with fs{g,u}id 1001 must be allowed to setattr /target/file1 from
13471  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1001.
13472  * 3. A user with fs{g,u}id 1000 must fail to setattr /target/file1 from
13473  *    {g,u}id 1001 in the idmapped mount to {g,u}id 1000.
13474  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13475  *    {g,u}id of the file.
13476  * 4. A user with fs{g,u}id 1001 must fail to setattr /target/file2 from
13477  *    {g,u}id 1000 in the idmapped mount to {g,u}id 1000.
13478  *    This must fail with EPERM. The caller's fs{g,u}id doesn't match the
13479  *    {g,u}id of the file.
13480  * 5. Both, a user with fs{g,u}id 1000 and a user with fs{g,u}id 1001, must
13481  *    fail to setattr /target/file1 owned by {g,u}id 1001 in the idmapped mount
13482  *    and /target/file2 owned by {g,u}id 1000 in the idmapped mount to any
13483  *    {g,u}id apart from {g,u}id 1000 or 1001 with EINVAL.
13484  *    Only {g,u}id 1000 and 1001 have a mapping in the idmapped mount. Other
13485  *    {g,u}id are unmapped.
13486  */
13487 static int setattr_fix_968219708108(void)
13488 {
13489         int fret = -1;
13490         int open_tree_fd = -EBADF;
13491         struct mount_attr attr = {
13492                 .attr_set       = MOUNT_ATTR_IDMAP,
13493                 .userns_fd      = -EBADF,
13494         };
13495         int ret;
13496         uid_t user1_uid, user2_uid;
13497         gid_t user1_gid, user2_gid;
13498         pid_t pid;
13499         struct list idmap;
13500         struct list *it_cur, *it_next;
13501
13502         if (!caps_supported())
13503                 return 0;
13504
13505         list_init(&idmap);
13506
13507         if (!lookup_ids(USER1, &user1_uid, &user1_gid)) {
13508                 log_stderr("failure: lookup_user");
13509                 goto out;
13510         }
13511
13512         if (!lookup_ids(USER2, &user2_uid, &user2_gid)) {
13513                 log_stderr("failure: lookup_user");
13514                 goto out;
13515         }
13516
13517         log_debug("Found " USER1 " with uid(%d) and gid(%d) and " USER2 " with uid(%d) and gid(%d)",
13518                   user1_uid, user1_gid, user2_uid, user2_gid);
13519
13520         if (mkdirat(t_dir1_fd, DIR1, 0777)) {
13521                 log_stderr("failure: mkdirat");
13522                 goto out;
13523         }
13524
13525         if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | 0644, 0)) {
13526                 log_stderr("failure: mknodat");
13527                 goto out;
13528         }
13529
13530         if (chown_r(t_mnt_fd, T_DIR1, user1_uid, user1_gid)) {
13531                 log_stderr("failure: chown_r");
13532                 goto out;
13533         }
13534
13535         if (mknodat(t_dir1_fd, DIR1 "/" FILE2, S_IFREG | 0644, 0)) {
13536                 log_stderr("failure: mknodat");
13537                 goto out;
13538         }
13539
13540         if (fchownat(t_dir1_fd, DIR1 "/" FILE2, user2_uid, user2_gid, AT_SYMLINK_NOFOLLOW)) {
13541                 log_stderr("failure: fchownat");
13542                 goto out;
13543         }
13544
13545         print_r(t_mnt_fd, T_DIR1);
13546
13547         /* u:1000:1001:1 */
13548         ret = add_map_entry(&idmap, user1_uid, user2_uid, 1, ID_TYPE_UID);
13549         if (ret) {
13550                 log_stderr("failure: add_map_entry");
13551                 goto out;
13552         }
13553
13554         /* u:1001:1000:1 */
13555         ret = add_map_entry(&idmap, user2_uid, user1_uid, 1, ID_TYPE_UID);
13556         if (ret) {
13557                 log_stderr("failure: add_map_entry");
13558                 goto out;
13559         }
13560
13561         /* g:1000:1001:1 */
13562         ret = add_map_entry(&idmap, user1_gid, user2_gid, 1, ID_TYPE_GID);
13563         if (ret) {
13564                 log_stderr("failure: add_map_entry");
13565                 goto out;
13566         }
13567
13568         /* g:1001:1000:1 */
13569         ret = add_map_entry(&idmap, user2_gid, user1_gid, 1, ID_TYPE_GID);
13570         if (ret) {
13571                 log_stderr("failure: add_map_entry");
13572                 goto out;
13573         }
13574
13575         attr.userns_fd = get_userns_fd_from_idmap(&idmap);
13576         if (attr.userns_fd < 0) {
13577                 log_stderr("failure: get_userns_fd");
13578                 goto out;
13579         }
13580
13581         open_tree_fd = sys_open_tree(t_dir1_fd, DIR1,
13582                                      AT_NO_AUTOMOUNT |
13583                                      AT_SYMLINK_NOFOLLOW |
13584                                      OPEN_TREE_CLOEXEC |
13585                                      OPEN_TREE_CLONE |
13586                                      AT_RECURSIVE);
13587         if (open_tree_fd < 0) {
13588                 log_stderr("failure: sys_open_tree");
13589                 goto out;
13590         }
13591
13592         if (sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr))) {
13593                 log_stderr("failure: sys_mount_setattr");
13594                 goto out;
13595         }
13596
13597         print_r(open_tree_fd, "");
13598
13599         pid = fork();
13600         if (pid < 0) {
13601                 log_stderr("failure: fork");
13602                 goto out;
13603         }
13604         if (pid == 0) {
13605                 /* switch to {g,u}id 1001 */
13606                 if (!switch_resids(user2_uid, user2_gid))
13607                         die("failure: switch_resids");
13608
13609                 /* drop all capabilities */
13610                 if (!caps_down())
13611                         die("failure: caps_down");
13612
13613                 /*
13614                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13615                  * needs to fail with EINVAL.
13616                  */
13617                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13618                         die("failure: change ownership");
13619                 if (errno != EINVAL)
13620                         die("failure: errno");
13621
13622                 /*
13623                  * A user with fs{g,u}id 1001 must be allowed to change
13624                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13625                  * idmapped mount to {g,u}id 1001.
13626                  */
13627                 if (fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13628                              AT_SYMLINK_NOFOLLOW))
13629                         die("failure: change ownership");
13630
13631                 /* Verify that the ownership is still {g,u}id 1001. */
13632                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13633                                       user2_uid, user2_gid))
13634                         die("failure: check ownership");
13635
13636                 /*
13637                  * A user with fs{g,u}id 1001 must not be allowed to change
13638                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13639                  * idmapped mount to {g,u}id 1000.
13640                  */
13641                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13642                               AT_SYMLINK_NOFOLLOW))
13643                         die("failure: change ownership");
13644                 if (errno != EPERM)
13645                         die("failure: errno");
13646
13647                 /* Verify that the ownership is still {g,u}id 1001. */
13648                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13649                                       user2_uid, user2_gid))
13650                         die("failure: check ownership");
13651
13652                 /*
13653                  * A user with fs{g,u}id 1001 must not be allowed to change
13654                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13655                  * idmapped mount to {g,u}id 1000.
13656                  */
13657                 if (!fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13658                               AT_SYMLINK_NOFOLLOW))
13659                         die("failure: change ownership");
13660                 if (errno != EPERM)
13661                         die("failure: errno");
13662
13663                 /* Verify that the ownership is still {g,u}id 1000. */
13664                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13665                                       user1_uid, user1_gid))
13666                         die("failure: check ownership");
13667
13668                 /*
13669                  * A user with fs{g,u}id 1001 must not be allowed to change
13670                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13671                  * idmapped mount to {g,u}id 1001.
13672                  */
13673                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13674                               AT_SYMLINK_NOFOLLOW))
13675                         die("failure: change ownership");
13676                 if (errno != EPERM)
13677                         die("failure: errno");
13678
13679                 /* Verify that the ownership is still {g,u}id 1000. */
13680                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13681                                       user1_uid, user1_gid))
13682                         die("failure: check ownership");
13683
13684                 exit(EXIT_SUCCESS);
13685         }
13686         if (wait_for_pid(pid))
13687                 goto out;
13688
13689         pid = fork();
13690         if (pid < 0) {
13691                 log_stderr("failure: fork");
13692                 goto out;
13693         }
13694         if (pid == 0) {
13695                 /* switch to {g,u}id 1000 */
13696                 if (!switch_resids(user1_uid, user1_gid))
13697                         die("failure: switch_resids");
13698
13699                 /* drop all capabilities */
13700                 if (!caps_down())
13701                         die("failure: caps_down");
13702
13703                 /*
13704                  * The {g,u}id 0 is not mapped in this idmapped mount so this
13705                  * needs to fail with EINVAL.
13706                  */
13707                 if (!fchownat(open_tree_fd, FILE1, 0, 0, AT_SYMLINK_NOFOLLOW))
13708                         die("failure: change ownership");
13709                 if (errno != EINVAL)
13710                         die("failure: errno");
13711
13712                 /*
13713                  * A user with fs{g,u}id 1000 must be allowed to change
13714                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13715                  * idmapped mount to {g,u}id 1000.
13716                  */
13717                 if (fchownat(open_tree_fd, FILE2, user1_uid, user1_gid,
13718                              AT_SYMLINK_NOFOLLOW))
13719                         die("failure: change ownership");
13720
13721                 /* Verify that the ownership is still {g,u}id 1000. */
13722                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13723                                       user1_uid, user1_gid))
13724                         die("failure: check ownership");
13725
13726                 /*
13727                  * A user with fs{g,u}id 1000 must not be allowed to change
13728                  * ownership of /target/file2 owned by {g,u}id 1000 in this
13729                  * idmapped mount to {g,u}id 1001.
13730                  */
13731                 if (!fchownat(open_tree_fd, FILE2, user2_uid, user2_gid,
13732                               AT_SYMLINK_NOFOLLOW))
13733                         die("failure: change ownership");
13734                 if (errno != EPERM)
13735                         die("failure: errno");
13736
13737                 /* Verify that the ownership is still {g,u}id 1000. */
13738                 if (!expected_uid_gid(open_tree_fd, FILE2, AT_SYMLINK_NOFOLLOW,
13739                                       user1_uid, user1_gid))
13740                         die("failure: check ownership");
13741
13742                 /*
13743                  * A user with fs{g,u}id 1000 must not be allowed to change
13744                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13745                  * idmapped mount to {g,u}id 1000.
13746                  */
13747                 if (!fchownat(open_tree_fd, FILE1, user1_uid, user1_gid,
13748                              AT_SYMLINK_NOFOLLOW))
13749                         die("failure: change ownership");
13750                 if (errno != EPERM)
13751                         die("failure: errno");
13752
13753                 /* Verify that the ownership is still {g,u}id 1001. */
13754                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13755                                       user2_uid, user2_gid))
13756                         die("failure: check ownership");
13757
13758                 /*
13759                  * A user with fs{g,u}id 1000 must not be allowed to change
13760                  * ownership of /target/file1 owned by {g,u}id 1001 in this
13761                  * idmapped mount to {g,u}id 1001.
13762                  */
13763                 if (!fchownat(open_tree_fd, FILE1, user2_uid, user2_gid,
13764                               AT_SYMLINK_NOFOLLOW))
13765                         die("failure: change ownership");
13766                 if (errno != EPERM)
13767                         die("failure: errno");
13768
13769                 /* Verify that the ownership is still {g,u}id 1001. */
13770                 if (!expected_uid_gid(open_tree_fd, FILE1, AT_SYMLINK_NOFOLLOW,
13771                                       user2_uid, user2_gid))
13772                         die("failure: check ownership");
13773
13774                 exit(EXIT_SUCCESS);
13775         }
13776         if (wait_for_pid(pid))
13777                 goto out;
13778
13779         fret = 0;
13780         log_debug("Ran test");
13781 out:
13782         safe_close(attr.userns_fd);
13783         safe_close(open_tree_fd);
13784
13785         list_for_each_safe(it_cur, &idmap, it_next) {
13786                 list_del(it_cur);
13787                 free(it_cur->elem);
13788                 free(it_cur);
13789         }
13790
13791         return fret;
13792 }
13793
13794 static void usage(void)
13795 {
13796         fprintf(stderr, "Description:\n");
13797         fprintf(stderr, "    Run idmapped mount tests\n\n");
13798
13799         fprintf(stderr, "Arguments:\n");
13800         fprintf(stderr, "--device                            Device used in the tests\n");
13801         fprintf(stderr, "--fstype                            Filesystem type used in the tests\n");
13802         fprintf(stderr, "--help                              Print help\n");
13803         fprintf(stderr, "--mountpoint                        Mountpoint of device\n");
13804         fprintf(stderr, "--supported                         Test whether idmapped mounts are supported on this filesystem\n");
13805         fprintf(stderr, "--scratch-mountpoint                Mountpoint of scratch device used in the tests\n");
13806         fprintf(stderr, "--scratch-device                    Scratch device used in the tests\n");
13807         fprintf(stderr, "--test-core                         Run core idmapped mount testsuite\n");
13808         fprintf(stderr, "--test-fscaps-regression            Run fscap regression tests\n");
13809         fprintf(stderr, "--test-nested-userns                Run nested userns idmapped mount testsuite\n");
13810         fprintf(stderr, "--test-btrfs                        Run btrfs specific idmapped mount testsuite\n");
13811         fprintf(stderr, "--test-setattr-fix-968219708108     Run setattr regression tests\n");
13812
13813         _exit(EXIT_SUCCESS);
13814 }
13815
13816 static const struct option longopts[] = {
13817         {"device",                              required_argument,      0,      'd'},
13818         {"fstype",                              required_argument,      0,      'f'},
13819         {"mountpoint",                          required_argument,      0,      'm'},
13820         {"scratch-mountpoint",                  required_argument,      0,      'a'},
13821         {"scratch-device",                      required_argument,      0,      'e'},
13822         {"supported",                           no_argument,            0,      's'},
13823         {"help",                                no_argument,            0,      'h'},
13824         {"test-core",                           no_argument,            0,      'c'},
13825         {"test-fscaps-regression",              no_argument,            0,      'g'},
13826         {"test-nested-userns",                  no_argument,            0,      'n'},
13827         {"test-btrfs",                          no_argument,            0,      'b'},
13828         {"test-setattr-fix-968219708108",       no_argument,            0,      'i'},
13829         {NULL,                                  0,                      0,        0},
13830 };
13831
13832 /* Flags for which functionality is required by the test */
13833 #define T_REQUIRE_IDMAPPED_MOUNTS (1U << 0)
13834
13835 struct t_idmapped_mounts {
13836         int (*test)(void);
13837         unsigned int support_flags;
13838         const char *description;
13839 } basic_suite[] = {
13840         { acls,                                                         T_REQUIRE_IDMAPPED_MOUNTS,      "posix acls on regular mounts",                                                                 },
13841         { create_in_userns,                                             T_REQUIRE_IDMAPPED_MOUNTS,      "create operations in user namespace",                                                          },
13842         { device_node_in_userns,                                        T_REQUIRE_IDMAPPED_MOUNTS,      "device node in user namespace",                                                                },
13843         { expected_uid_gid_idmapped_mounts,                             T_REQUIRE_IDMAPPED_MOUNTS,      "expected ownership on idmapped mounts",                                                        },
13844         { fscaps,                                                       0,                              "fscaps on regular mounts",                                                                     },
13845         { fscaps_idmapped_mounts,                                       T_REQUIRE_IDMAPPED_MOUNTS,      "fscaps on idmapped mounts",                                                                    },
13846         { fscaps_idmapped_mounts_in_userns,                             T_REQUIRE_IDMAPPED_MOUNTS,      "fscaps on idmapped mounts in user namespace",                                                  },
13847         { fscaps_idmapped_mounts_in_userns_separate_userns,             T_REQUIRE_IDMAPPED_MOUNTS,      "fscaps on idmapped mounts in user namespace with different id mappings",                       },
13848         { fsids_mapped,                                                 T_REQUIRE_IDMAPPED_MOUNTS,      "mapped fsids",                                                                                 },
13849         { fsids_unmapped,                                               T_REQUIRE_IDMAPPED_MOUNTS,      "unmapped fsids",                                                                               },
13850         { hardlink_crossing_mounts,                                     0,                              "cross mount hardlink",                                                                         },
13851         { hardlink_crossing_idmapped_mounts,                            T_REQUIRE_IDMAPPED_MOUNTS,      "cross idmapped mount hardlink",                                                                },
13852         { hardlink_from_idmapped_mount,                                 T_REQUIRE_IDMAPPED_MOUNTS,      "hardlinks from idmapped mounts",                                                               },
13853         { hardlink_from_idmapped_mount_in_userns,                       T_REQUIRE_IDMAPPED_MOUNTS,      "hardlinks from idmapped mounts in user namespace",                                             },
13854 #ifdef HAVE_LIBURING_H
13855         { io_uring,                                                     0,                              "io_uring",                                                                                     },
13856         { io_uring_userns,                                              0,                              "io_uring in user namespace",                                                                   },
13857         { io_uring_idmapped,                                            T_REQUIRE_IDMAPPED_MOUNTS,      "io_uring from idmapped mounts",                                                                },
13858         { io_uring_idmapped_userns,                                     T_REQUIRE_IDMAPPED_MOUNTS,      "io_uring from idmapped mounts in user namespace",                                              },
13859         { io_uring_idmapped_unmapped,                                   T_REQUIRE_IDMAPPED_MOUNTS,      "io_uring from idmapped mounts with unmapped ids",                                              },
13860         { io_uring_idmapped_unmapped_userns,                            T_REQUIRE_IDMAPPED_MOUNTS,      "io_uring from idmapped mounts with unmapped ids in user namespace",                            },
13861 #endif
13862         { protected_symlinks,                                           0,                              "following protected symlinks on regular mounts",                                               },
13863         { protected_symlinks_idmapped_mounts,                           T_REQUIRE_IDMAPPED_MOUNTS,      "following protected symlinks on idmapped mounts",                                              },
13864         { protected_symlinks_idmapped_mounts_in_userns,                 T_REQUIRE_IDMAPPED_MOUNTS,      "following protected symlinks on idmapped mounts in user namespace",                            },
13865         { rename_crossing_mounts,                                       0,                              "cross mount rename",                                                                           },
13866         { rename_crossing_idmapped_mounts,                              T_REQUIRE_IDMAPPED_MOUNTS,      "cross idmapped mount rename",                                                                  },
13867         { rename_from_idmapped_mount,                                   T_REQUIRE_IDMAPPED_MOUNTS,      "rename from idmapped mounts",                                                                  },
13868         { rename_from_idmapped_mount_in_userns,                         T_REQUIRE_IDMAPPED_MOUNTS,      "rename from idmapped mounts in user namespace",                                                },
13869         { setattr_truncate,                                             0,                              "setattr truncate",                                                                             },
13870         { setattr_truncate_idmapped,                                    T_REQUIRE_IDMAPPED_MOUNTS,      "setattr truncate on idmapped mounts",                                                          },
13871         { setattr_truncate_idmapped_in_userns,                          T_REQUIRE_IDMAPPED_MOUNTS,      "setattr truncate on idmapped mounts in user namespace",                                        },
13872         { setgid_create,                                                0,                              "create operations in directories with setgid bit set",                                         },
13873         { setgid_create_idmapped,                                       T_REQUIRE_IDMAPPED_MOUNTS,      "create operations in directories with setgid bit set on idmapped mounts",                      },
13874         { setgid_create_idmapped_in_userns,                             T_REQUIRE_IDMAPPED_MOUNTS,      "create operations in directories with setgid bit set on idmapped mounts in user namespace",    },
13875         { setid_binaries,                                               0,                              "setid binaries on regular mounts",                                                             },
13876         { setid_binaries_idmapped_mounts,                               T_REQUIRE_IDMAPPED_MOUNTS,      "setid binaries on idmapped mounts",                                                            },
13877         { setid_binaries_idmapped_mounts_in_userns,                     T_REQUIRE_IDMAPPED_MOUNTS,      "setid binaries on idmapped mounts in user namespace",                                          },
13878         { setid_binaries_idmapped_mounts_in_userns_separate_userns,     T_REQUIRE_IDMAPPED_MOUNTS,      "setid binaries on idmapped mounts in user namespace with different id mappings",               },
13879         { sticky_bit_unlink,                                            0,                              "sticky bit unlink operations on regular mounts",                                               },
13880         { sticky_bit_unlink_idmapped_mounts,                            T_REQUIRE_IDMAPPED_MOUNTS,      "sticky bit unlink operations on idmapped mounts",                                              },
13881         { sticky_bit_unlink_idmapped_mounts_in_userns,                  T_REQUIRE_IDMAPPED_MOUNTS,      "sticky bit unlink operations on idmapped mounts in user namespace",                            },
13882         { sticky_bit_rename,                                            0,                              "sticky bit rename operations on regular mounts",                                               },
13883         { sticky_bit_rename_idmapped_mounts,                            T_REQUIRE_IDMAPPED_MOUNTS,      "sticky bit rename operations on idmapped mounts",                                              },
13884         { sticky_bit_rename_idmapped_mounts_in_userns,                  T_REQUIRE_IDMAPPED_MOUNTS,      "sticky bit rename operations on idmapped mounts in user namespace",                            },
13885         { symlink_regular_mounts,                                       0,                              "symlink from regular mounts",                                                                  },
13886         { symlink_idmapped_mounts,                                      T_REQUIRE_IDMAPPED_MOUNTS,      "symlink from idmapped mounts",                                                                 },
13887         { symlink_idmapped_mounts_in_userns,                            T_REQUIRE_IDMAPPED_MOUNTS,      "symlink from idmapped mounts in user namespace",                                               },
13888         { threaded_idmapped_mount_interactions,                         T_REQUIRE_IDMAPPED_MOUNTS,      "threaded operations on idmapped mounts",                                                       },
13889 };
13890
13891 struct t_idmapped_mounts fscaps_in_ancestor_userns[] = {
13892         { fscaps_idmapped_mounts_in_userns_valid_in_ancestor_userns,    true,   "fscaps on idmapped mounts in user namespace writing fscap valid in ancestor userns",           },
13893 };
13894
13895 struct t_idmapped_mounts t_nested_userns[] = {
13896         { nested_userns,                                                true,   "test that nested user namespaces behave correctly when attached to idmapped mounts",           },
13897 };
13898
13899 struct t_idmapped_mounts t_btrfs[] = {
13900         { btrfs_subvolumes_fsids_mapped,                                true,   "test subvolumes with mapped fsids",                                                            },
13901         { btrfs_subvolumes_fsids_mapped_userns,                         true,   "test subvolumes with mapped fsids inside user namespace",                                      },
13902         { btrfs_subvolumes_fsids_mapped_user_subvol_rm_allowed,         true,   "test subvolume deletion with user_subvol_rm_allowed mount option",                             },
13903         { btrfs_subvolumes_fsids_mapped_userns_user_subvol_rm_allowed,  true,   "test subvolume deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13904         { btrfs_subvolumes_fsids_unmapped,                              true,   "test subvolumes with unmapped fsids",                                                          },
13905         { btrfs_subvolumes_fsids_unmapped_userns,                       true,   "test subvolumes with unmapped fsids inside user namespace",                                    },
13906         { btrfs_snapshots_fsids_mapped,                                 true,   "test snapshots with mapped fsids",                                                             },
13907         { btrfs_snapshots_fsids_mapped_userns,                          true,   "test snapshots with mapped fsids inside user namespace",                                       },
13908         { btrfs_snapshots_fsids_mapped_user_subvol_rm_allowed,          true,   "test snapshots deletion with user_subvol_rm_allowed mount option",                             },
13909         { btrfs_snapshots_fsids_mapped_userns_user_subvol_rm_allowed,   true,   "test snapshots deletion with user_subvol_rm_allowed mount option inside user namespace",       },
13910         { btrfs_snapshots_fsids_unmapped,                               true,   "test snapshots with unmapped fsids",                                                           },
13911         { btrfs_snapshots_fsids_unmapped_userns,                        true,   "test snapshots with unmapped fsids inside user namespace",                                     },
13912         { btrfs_delete_by_spec_id,                                      true,   "test subvolume deletion by spec id",                                                           },
13913         { btrfs_subvolumes_setflags_fsids_mapped,                       true,   "test subvolume flags with mapped fsids",                                                       },
13914         { btrfs_subvolumes_setflags_fsids_mapped_userns,                true,   "test subvolume flags with mapped fsids inside user namespace",                                 },
13915         { btrfs_subvolumes_setflags_fsids_unmapped,                     true,   "test subvolume flags with unmapped fsids",                                                     },
13916         { btrfs_subvolumes_setflags_fsids_unmapped_userns,              true,   "test subvolume flags with unmapped fsids inside user namespace",                               },
13917         { btrfs_snapshots_setflags_fsids_mapped,                        true,   "test snapshots flags with mapped fsids",                                                       },
13918         { btrfs_snapshots_setflags_fsids_mapped_userns,                 true,   "test snapshots flags with mapped fsids inside user namespace",                                 },
13919         { btrfs_snapshots_setflags_fsids_unmapped,                      true,   "test snapshots flags with unmapped fsids",                                                     },
13920         { btrfs_snapshots_setflags_fsids_unmapped_userns,               true,   "test snapshots flags with unmapped fsids inside user namespace",                               },
13921         { btrfs_subvolume_lookup_user,                                  true,   "test unprivileged subvolume lookup",                                                           },
13922 };
13923
13924 /* Test for commit 968219708108 ("fs: handle circular mappings correctly"). */
13925 struct t_idmapped_mounts t_setattr_fix_968219708108[] = {
13926         { setattr_fix_968219708108,                                     true,   "test that setattr works correctly",                                                            },
13927 };
13928
13929 static bool run_test(struct t_idmapped_mounts suite[], size_t suite_size)
13930 {
13931         int i;
13932
13933         for (i = 0; i < suite_size; i++) {
13934                 struct t_idmapped_mounts *t = &suite[i];
13935                 int ret;
13936                 pid_t pid;
13937
13938                 /*
13939                  * If the underlying filesystems does not support idmapped
13940                  * mounts only run vfs generic tests.
13941                  */
13942                 if (t->support_flags & T_REQUIRE_IDMAPPED_MOUNTS &&
13943                     !t_fs_allow_idmap) {
13944                         log_debug("Skipping test %s", t->description);
13945                         continue;
13946                 }
13947
13948                 test_setup();
13949
13950                 pid = fork();
13951                 if (pid < 0)
13952                         return false;
13953
13954                 if (pid == 0) {
13955                         ret = t->test();
13956                         if (ret)
13957                                 die("failure: %s", t->description);
13958
13959                         exit(EXIT_SUCCESS);
13960                 }
13961
13962                 ret = wait_for_pid(pid);
13963                 test_cleanup();
13964
13965                 if (ret)
13966                         return false;
13967         }
13968
13969         return true;
13970 }
13971
13972 static bool fs_allow_idmap(void)
13973 {
13974         int ret;
13975         int open_tree_fd = -EBADF;
13976         struct mount_attr attr = {
13977                 .attr_set       = MOUNT_ATTR_IDMAP,
13978                 .userns_fd      = -EBADF,
13979         };
13980
13981         /* Changing mount properties on a detached mount. */
13982         attr.userns_fd = get_userns_fd(0, 1000, 1);
13983         if (attr.userns_fd < 0)
13984                 return false;
13985
13986         open_tree_fd = sys_open_tree(t_mnt_fd, "",
13987                                      AT_EMPTY_PATH | AT_NO_AUTOMOUNT |
13988                                      AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC |
13989                                      OPEN_TREE_CLONE);
13990         if (open_tree_fd < 0)
13991                 ret = -1;
13992         else
13993                 ret = sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
13994                                         sizeof(attr));
13995         close(open_tree_fd);
13996         close(attr.userns_fd);
13997
13998         return ret == 0;
13999 }
14000
14001 int main(int argc, char *argv[])
14002 {
14003         int fret, ret;
14004         int index = 0;
14005         bool supported = false, test_btrfs = false, test_core = false,
14006              test_fscaps_regression = false, test_nested_userns = false,
14007              test_setattr_fix_968219708108 = false;
14008
14009         while ((ret = getopt_long_only(argc, argv, "", longopts, &index)) != -1) {
14010                 switch (ret) {
14011                 case 'd':
14012                         t_device = optarg;
14013                         break;
14014                 case 'f':
14015                         t_fstype = optarg;
14016                         break;
14017                 case 'm':
14018                         t_mountpoint = optarg;
14019                         break;
14020                 case 's':
14021                         supported = true;
14022                         break;
14023                 case 'c':
14024                         test_core = true;
14025                         break;
14026                 case 'g':
14027                         test_fscaps_regression = true;
14028                         break;
14029                 case 'n':
14030                         test_nested_userns = true;
14031                         break;
14032                 case 'b':
14033                         test_btrfs = true;
14034                         break;
14035                 case 'a':
14036                         t_mountpoint_scratch = optarg;
14037                         break;
14038                 case 'e':
14039                         t_device_scratch = optarg;
14040                         break;
14041                 case 'i':
14042                         test_setattr_fix_968219708108 = true;
14043                         break;
14044                 case 'h':
14045                         /* fallthrough */
14046                 default:
14047                         usage();
14048                 }
14049         }
14050
14051         if (!t_device)
14052                 die_errno(EINVAL, "test device missing");
14053
14054         if (!t_fstype)
14055                 die_errno(EINVAL, "test filesystem type missing");
14056
14057         if (!t_mountpoint)
14058                 die_errno(EINVAL, "mountpoint of test device missing");
14059
14060         /* create separate mount namespace */
14061         if (unshare(CLONE_NEWNS))
14062                 die("failure: create new mount namespace");
14063
14064         /* turn off mount propagation */
14065         if (sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
14066                 die("failure: turn mount propagation off");
14067
14068         t_mnt_fd = openat(-EBADF, t_mountpoint, O_CLOEXEC | O_DIRECTORY);
14069         if (t_mnt_fd < 0)
14070                 die("failed to open %s", t_mountpoint);
14071
14072         t_mnt_scratch_fd = openat(-EBADF, t_mountpoint_scratch, O_CLOEXEC | O_DIRECTORY);
14073         if (t_mnt_fd < 0)
14074                 die("failed to open %s", t_mountpoint_scratch);
14075
14076         t_fs_allow_idmap = fs_allow_idmap();
14077         if (supported) {
14078                 /*
14079                  * Caller just wants to know whether the filesystem we're on
14080                  * supports idmapped mounts.
14081                  */
14082                 if (!t_fs_allow_idmap)
14083                         exit(EXIT_FAILURE);
14084
14085                 exit(EXIT_SUCCESS);
14086         }
14087
14088         stash_overflowuid();
14089         stash_overflowgid();
14090
14091         fret = EXIT_FAILURE;
14092
14093         if (test_core && !run_test(basic_suite, ARRAY_SIZE(basic_suite)))
14094                 goto out;
14095
14096         if (test_fscaps_regression &&
14097             !run_test(fscaps_in_ancestor_userns,
14098                       ARRAY_SIZE(fscaps_in_ancestor_userns)))
14099                 goto out;
14100
14101         if (test_nested_userns &&
14102             !run_test(t_nested_userns, ARRAY_SIZE(t_nested_userns)))
14103                 goto out;
14104
14105         if (test_btrfs && !run_test(t_btrfs, ARRAY_SIZE(t_btrfs)))
14106                 goto out;
14107
14108         if (test_setattr_fix_968219708108 &&
14109             !run_test(t_setattr_fix_968219708108,
14110                       ARRAY_SIZE(t_setattr_fix_968219708108)))
14111                 goto out;
14112
14113         fret = EXIT_SUCCESS;
14114
14115 out:
14116         exit(fret);
14117 }