1 // SPDX-License-Identifier: GPL-2.0
7 #include <linux/limits.h>
11 #include <sys/eventfd.h>
12 #include <sys/mount.h>
13 #include <sys/prctl.h>
14 #include <sys/socket.h>
16 #include <sys/types.h>
21 ssize_t read_nointr(int fd, void *buf, size_t count)
26 ret = read(fd, buf, count);
27 } while (ret < 0 && errno == EINTR);
32 ssize_t write_nointr(int fd, const void *buf, size_t count)
37 ret = write(fd, buf, count);
38 } while (ret < 0 && errno == EINTR);
43 #define __STACK_SIZE (8 * 1024 * 1024)
44 pid_t do_clone(int (*fn)(void *), void *arg, int flags)
48 stack = malloc(__STACK_SIZE);
53 return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
55 return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
59 static int get_userns_fd_cb(void *data)
64 int wait_for_pid(pid_t pid)
69 ret = waitpid(pid, &status, 0);
77 if (!WIFEXITED(status))
80 return WEXITSTATUS(status);
83 static int write_id_mapping(idmap_type_t map_type, pid_t pid, const char *buf, size_t buf_size)
85 int fd = -EBADF, setgroups_fd = -EBADF;
88 char path[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
89 STRLITERALLEN("/setgroups") + 1];
91 if (geteuid() != 0 && map_type == ID_TYPE_GID) {
92 ret = snprintf(path, sizeof(path), "/proc/%d/setgroups", pid);
93 if (ret < 0 || ret >= sizeof(path))
96 setgroups_fd = open(path, O_WRONLY | O_CLOEXEC);
97 if (setgroups_fd < 0 && errno != ENOENT) {
98 syserror("Failed to open \"%s\"", path);
102 if (setgroups_fd >= 0) {
103 ret = write_nointr(setgroups_fd, "deny\n", STRLITERALLEN("deny\n"));
104 if (ret != STRLITERALLEN("deny\n")) {
105 syserror("Failed to write \"deny\" to \"/proc/%d/setgroups\"", pid);
111 ret = snprintf(path, sizeof(path), "/proc/%d/%cid_map", pid, map_type == ID_TYPE_UID ? 'u' : 'g');
112 if (ret < 0 || ret >= sizeof(path))
115 fd = open(path, O_WRONLY | O_CLOEXEC);
117 syserror("Failed to open \"%s\"", path);
121 ret = write_nointr(fd, buf, buf_size);
122 if (ret != buf_size) {
123 syserror("Failed to write %cid mapping to \"%s\"",
124 map_type == ID_TYPE_UID ? 'u' : 'g', path);
132 if (setgroups_fd >= 0)
138 static int map_ids_from_idmap(struct list *idmap, pid_t pid)
141 char mapbuf[4096] = {};
142 bool had_entry = false;
143 idmap_type_t map_type, u_or_g;
145 if (list_empty(idmap))
148 for (map_type = ID_TYPE_UID, u_or_g = 'u';
149 map_type <= ID_TYPE_GID; map_type++, u_or_g = 'g') {
152 struct list *iterator;
155 list_for_each(iterator, idmap) {
156 struct id_map *map = iterator->elem;
157 if (map->map_type != map_type)
162 left = 4096 - (pos - mapbuf);
163 fill = snprintf(pos, left, "%u %u %u\n", map->nsid, map->hostid, map->range);
165 * The kernel only takes <= 4k for writes to
166 * /proc/<pid>/{g,u}id_map
168 if (fill <= 0 || fill >= left)
169 return syserror_set(-E2BIG, "Too many %cid mappings defined", u_or_g);
176 ret = write_id_mapping(map_type, pid, mapbuf, pos - mapbuf);
178 return syserror("Failed to write mapping: %s", mapbuf);
180 memset(mapbuf, 0, sizeof(mapbuf));
186 int get_userns_fd_from_idmap(struct list *idmap)
190 char path_ns[STRLITERALLEN("/proc/") + INTTYPE_TO_STRLEN(pid_t) +
191 STRLITERALLEN("/ns/user") + 1];
193 pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER | CLONE_NEWNS);
197 ret = map_ids_from_idmap(idmap, pid);
201 ret = snprintf(path_ns, sizeof(path_ns), "/proc/%d/ns/user", pid);
202 if (ret < 0 || (size_t)ret >= sizeof(path_ns))
205 ret = open(path_ns, O_RDONLY | O_CLOEXEC | O_NOCTTY);
207 (void)kill(pid, SIGKILL);
208 (void)wait_for_pid(pid);
212 int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
214 struct list head, uid_mapl, gid_mapl;
215 struct id_map uid_map = {
216 .map_type = ID_TYPE_UID,
221 struct id_map gid_map = {
222 .map_type = ID_TYPE_GID,
229 uid_mapl.elem = &uid_map;
230 gid_mapl.elem = &gid_map;
231 list_add_tail(&head, &uid_mapl);
232 list_add_tail(&head, &gid_mapl);
234 return get_userns_fd_from_idmap(&head);
237 bool switch_ids(uid_t uid, gid_t gid)
239 if (setgroups(0, NULL))
240 return syserror("failure: setgroups");
242 if (setresgid(gid, gid, gid))
243 return syserror("failure: setresgid");
245 if (setresuid(uid, uid, uid))
246 return syserror("failure: setresuid");
251 static int userns_fd_cb(void *data)
253 struct userns_hierarchy *h = data;
257 ret = read_nointr(h->fd_event, &c, 1);
259 return syserror("failure: read from socketpair");
261 /* Only switch ids if someone actually wrote a mapping for us. */
263 if (!switch_ids(0, 0))
264 return syserror("failure: switch ids to 0");
266 /* Ensure we can access proc files from processes we can ptrace. */
267 ret = prctl(PR_SET_DUMPABLE, 1, 0, 0, 0);
269 return syserror("failure: make dumpable");
272 ret = write_nointr(h->fd_event, "1", 1);
274 return syserror("failure: write to socketpair");
276 ret = create_userns_hierarchy(++h);
278 return syserror("failure: userns level %d", h->level);
283 int create_userns_hierarchy(struct userns_hierarchy *h)
288 int fd_userns = -EBADF, ret = -1;
293 if (h->level == MAX_USERNS_LEVEL)
296 ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, fd_socket);
298 return syserror("failure: create socketpair");
300 /* Note the CLONE_FILES | CLONE_VM when mucking with fds and memory. */
301 h->fd_event = fd_socket[1];
302 pid = do_clone(userns_fd_cb, h, CLONE_NEWUSER | CLONE_FILES | CLONE_VM);
304 syserror("failure: userns level %d", h->level);
308 ret = map_ids_from_idmap(&h->id_map, pid);
311 syserror("failure: writing id mapping for userns level %d for %d", h->level, pid);
315 if (!list_empty(&h->id_map))
316 bytes = write_nointr(fd_socket[0], "1", 1); /* Inform the child we wrote a mapping. */
318 bytes = write_nointr(fd_socket[0], "0", 1); /* Inform the child we didn't write a mapping. */
321 syserror("failure: write to socketpair");
325 /* Wait for child to set*id() and become dumpable. */
326 bytes = read_nointr(fd_socket[0], &c, 1);
329 syserror("failure: read from socketpair");
333 snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
334 fd_userns = open(path, O_RDONLY | O_CLOEXEC);
337 syserror("failure: open userns level %d for %d", h->level, pid);
344 if (!wait_for_pid(pid) && !fret) {
345 h->fd_userns = fd_userns;
357 int add_map_entry(struct list *head,
361 idmap_type_t map_type)
363 struct list *new_list = NULL;
364 struct id_map *newmap = NULL;
366 newmap = malloc(sizeof(*newmap));
370 new_list = malloc(sizeof(struct list));
376 *newmap = (struct id_map){
380 .map_type = map_type,
383 new_list->elem = newmap;
384 list_add_tail(head, new_list);