3 Copyright 2013, Michael Kerrisk
4 Licensed under GNU General Public License v2 or later
6 Create a child process that executes a shell command in new
7 namespace(s); allow UID and GID mappings to be specified when
8 creating a user namespace.
25 /* A simple error-handling function: print an error message based
26 on the value in 'errno' and terminate the calling process */
28 #define errExit(msg) do { perror(msg); exit(EXIT_FAILURE); \
32 char **argv; /* Command to be executed by child, with arguments */
33 int pipe_fd[2]; /* Pipe used to synchronize parent and child */
36 static int verbose, setid;
41 fprintf(stderr, "Usage: %s [options] cmd [arg...]\n\n", pname);
42 fprintf(stderr, "Create a child process that executes a shell command "
43 "in a new user namespace,\n"
44 "and possibly also other new namespace(s).\n\n");
45 fprintf(stderr, "Options can be:\n\n");
46 #define fpe(str) fprintf(stderr, " %s", str);
47 fpe("-i New IPC namespace\n");
48 fpe("-m New mount namespace\n");
49 fpe("-n New network namespace\n");
50 fpe("-p New PID namespace\n");
51 fpe("-u New UTS namespace\n");
52 fpe("-U New user namespace\n");
53 fpe("-M uid_map Specify UID map for user namespace\n");
54 fpe("-G gid_map Specify GID map for user namespace\n");
55 fpe(" If -M or -G is specified, -U is required\n");
56 fpe("-s Set uid/gid to 0 in the new user namespace\n");
57 fpe("-v Display verbose messages\n");
59 fpe("Map strings for -M and -G consist of records of the form:\n");
61 fpe(" ID-inside-ns ID-outside-ns len\n");
63 fpe("A map string can contain multiple records, separated by commas;\n");
64 fpe("the commas are replaced by newlines before writing to map files.\n");
69 /* Update the mapping file 'map_file', with the value provided in
70 'mapping', a string that defines a UID or GID mapping. A UID or
71 GID mapping consists of one or more newline-delimited records
74 ID_inside-ns ID-outside-ns length
76 Requiring the user to supply a string that contains newlines is
77 of course inconvenient for command-line use. Thus, we permit the
78 use of commas to delimit records in this string, and replace them
79 with newlines before writing the string to the file. */
82 update_map(char *mapping, char *map_file)
85 size_t map_len; /* Length of 'mapping' */
87 /* Replace commas in mapping string with newlines */
89 map_len = strlen(mapping);
90 for (j = 0; j < map_len; j++)
91 if (mapping[j] == ',')
94 fd = open(map_file, O_RDWR);
96 fprintf(stderr, "open %s: %s\n", map_file, strerror(errno));
100 if (write(fd, mapping, map_len) != map_len) {
101 fprintf(stderr, "write %s: %s\n", map_file, strerror(errno));
108 static int /* Start function for cloned child */
111 struct child_args *args = (struct child_args *) arg;
114 /* Wait until the parent has updated the UID and GID mappings. See
115 the comment in main(). We wait for end of file on a pipe that will
116 be closed by the parent process once it has updated the mappings. */
118 close(args->pipe_fd[1]); /* Close our descriptor for the write end
119 of the pipe so that we see EOF when
120 parent closes its descriptor */
121 if (read(args->pipe_fd[0], &ch, 1) != 0) {
122 fprintf(stderr, "Failure in child: read from pipe returned != 0\n");
128 fprintf(stderr, "Failure in child to setgid 0: %s\n", strerror(errno));
130 fprintf(stderr, "Failure in child to setuid 0: %s\n", strerror(errno));
133 /* Execute a shell command */
135 execvp(args->argv[0], args->argv);
139 #define STACK_SIZE (1024 * 1024)
141 static char child_stack[STACK_SIZE]; /* Space for child's stack */
144 main(int argc, char *argv[])
148 struct child_args args;
149 char *uid_map, *gid_map;
150 char map_path[PATH_MAX];
152 /* Parse command-line options. The initial '+' character in
153 the final getopt() argument prevents GNU-style permutation
154 of command-line options. That's useful, since sometimes
155 the 'command' to be executed by this program itself
156 has command-line options. We don't want getopt() to treat
157 those as options to this program. */
164 while ((opt = getopt(argc, argv, "+imnpuUM:G:vs")) != -1) {
166 case 'i': flags |= CLONE_NEWIPC; break;
167 case 'm': flags |= CLONE_NEWNS; break;
168 case 'n': flags |= CLONE_NEWNET; break;
169 case 'p': flags |= CLONE_NEWPID; break;
170 case 'u': flags |= CLONE_NEWUTS; break;
171 case 'v': verbose = 1; break;
172 case 'M': uid_map = optarg; break;
173 case 'G': gid_map = optarg; break;
174 case 'U': flags |= CLONE_NEWUSER; break;
175 case 's': setid = 1; break;
176 default: usage(argv[0]);
180 /* -M or -G without -U is nonsensical */
182 if ((uid_map != NULL || gid_map != NULL) &&
183 !(flags & CLONE_NEWUSER))
186 args.argv = &argv[optind];
188 /* We use a pipe to synchronize the parent and child, in order to
189 ensure that the parent sets the UID and GID maps before the child
190 calls execve(). This ensures that the child maintains its
191 capabilities during the execve() in the common case where we
192 want to map the child's effective user ID to 0 in the new user
193 namespace. Without this synchronization, the child would lose
194 its capabilities if it performed an execve() with nonzero
195 user IDs (see the capabilities(7) man page for details of the
196 transformation of a process's capabilities during execve()). */
198 if (pipe(args.pipe_fd) == -1)
201 /* Create the child in new namespace(s) */
203 child_pid = clone(childFunc, child_stack + STACK_SIZE,
204 flags | SIGCHLD, &args);
208 /* Parent falls through to here */
211 printf("%s: PID of child created by clone() is %ld\n",
212 argv[0], (long) child_pid);
214 /* Update the UID and GID maps in the child */
216 if (uid_map != NULL) {
217 snprintf(map_path, PATH_MAX, "/proc/%ld/uid_map",
219 update_map(uid_map, map_path);
221 if (gid_map != NULL) {
222 snprintf(map_path, PATH_MAX, "/proc/%ld/gid_map",
224 update_map(gid_map, map_path);
227 /* Close the write end of the pipe, to signal to the child that we
228 have updated the UID and GID maps */
230 close(args.pipe_fd[1]);
232 if (waitpid(child_pid, NULL, 0) == -1) /* Wait for child */
236 printf("%s: terminating\n", argv[0]);