}
static void lock_mount_exact(const struct path *path,
- struct pinned_mountpoint *mp);
+ struct pinned_mountpoint *mp, bool copy_mount,
+ unsigned int copy_flags);
#define LOCK_MOUNT_MAYBE_BENEATH(mp, path, beneath) \
struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
#define LOCK_MOUNT(mp, path) LOCK_MOUNT_MAYBE_BENEATH(mp, (path), false)
#define LOCK_MOUNT_EXACT(mp, path) \
struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
- lock_mount_exact((path), &mp)
+ lock_mount_exact((path), &mp, false, 0)
+#define LOCK_MOUNT_EXACT_COPY(mp, path, copy_flags) \
+ struct pinned_mountpoint mp __cleanup(unlock_mount) = {}; \
+ lock_mount_exact((path), &mp, true, (copy_flags))
static int graft_tree(struct mount *mnt, const struct pinned_mountpoint *mp)
{
return file;
}
-DEFINE_FREE(put_empty_mnt_ns, struct mnt_namespace *,
- if (!IS_ERR_OR_NULL(_T)) free_mnt_ns(_T))
-
static struct mnt_namespace *create_new_namespace(struct path *path, unsigned int flags)
{
- struct mnt_namespace *new_ns __free(put_empty_mnt_ns) = NULL;
- struct path to_path __free(path_put) = {};
struct mnt_namespace *ns = current->nsproxy->mnt_ns;
struct user_namespace *user_ns = current_user_ns();
- struct mount *new_ns_root;
+ struct mnt_namespace *new_ns;
+ struct mount *new_ns_root, *old_ns_root;
+ struct path to_path;
struct mount *mnt;
unsigned int copy_flags = 0;
bool locked = false;
if (IS_ERR(new_ns))
return ERR_CAST(new_ns);
- scoped_guard(namespace_excl) {
- new_ns_root = clone_mnt(ns->root, ns->root->mnt.mnt_root, copy_flags);
- if (IS_ERR(new_ns_root))
- return ERR_CAST(new_ns_root);
+ old_ns_root = ns->root;
+ to_path.mnt = &old_ns_root->mnt;
+ to_path.dentry = old_ns_root->mnt.mnt_root;
- /*
- * If the real rootfs had a locked mount on top of it somewhere
- * in the stack, lock the new mount tree as well so it can't be
- * exposed.
- */
- mnt = ns->root;
- while (mnt->overmount) {
- mnt = mnt->overmount;
- if (mnt->mnt.mnt_flags & MNT_LOCKED)
- locked = true;
- }
+ VFS_WARN_ON_ONCE(old_ns_root->mnt.mnt_sb->s_type != &nullfs_fs_type);
+
+ LOCK_MOUNT_EXACT_COPY(mp, &to_path, copy_flags);
+ if (IS_ERR(mp.parent)) {
+ free_mnt_ns(new_ns);
+ return ERR_CAST(mp.parent);
}
+ new_ns_root = mp.parent;
/*
- * We dropped the namespace semaphore so we can actually lock
- * the copy for mounting. The copied mount isn't attached to any
- * mount namespace and it is thus excluded from any propagation.
- * So realistically we're isolated and the mount can't be
- * overmounted.
+ * If the real rootfs had a locked mount on top of it somewhere
+ * in the stack, lock the new mount tree as well so it can't be
+ * exposed.
*/
-
- /* Borrow the reference from clone_mnt(). */
- to_path.mnt = &new_ns_root->mnt;
- to_path.dentry = dget(new_ns_root->mnt.mnt_root);
-
- /* Now lock for actual mounting. */
- LOCK_MOUNT_EXACT(mp, &to_path);
- if (unlikely(IS_ERR(mp.parent)))
- return ERR_CAST(mp.parent);
+ mnt = old_ns_root;
+ while (mnt->overmount) {
+ mnt = mnt->overmount;
+ if (mnt->mnt.mnt_flags & MNT_LOCKED)
+ locked = true;
+ }
/*
- * We don't emulate unshare()ing a mount namespace. We stick to the
- * restrictions of creating detached bind-mounts. It has a lot
- * saner and simpler semantics.
+ * We don't emulate unshare()ing a mount namespace. We stick
+ * to the restrictions of creating detached bind-mounts. It
+ * has a lot saner and simpler semantics.
*/
mnt = __do_loopback(path, flags, copy_flags);
- if (IS_ERR(mnt))
- return ERR_CAST(mnt);
-
scoped_guard(mount_writer) {
+ if (IS_ERR(mnt)) {
+ emptied_ns = new_ns;
+ umount_tree(new_ns_root, 0);
+ return ERR_CAST(mnt);
+ }
+
if (locked)
mnt->mnt.mnt_flags |= MNT_LOCKED;
/*
- * Now mount the detached tree on top of the copy of the
- * real rootfs we created.
+ * now mount the detached tree on top of the copy
+ * of the real rootfs we created.
*/
attach_mnt(mnt, new_ns_root, mp.mp);
if (user_ns != ns->user_ns)
lock_mnt_tree(new_ns_root);
}
- /* Add all mounts to the new namespace. */
- for (struct mount *p = new_ns_root; p; p = next_mnt(p, new_ns_root)) {
- mnt_add_to_ns(new_ns, p);
+ for (mnt = new_ns_root; mnt; mnt = next_mnt(mnt, new_ns_root)) {
+ mnt_add_to_ns(new_ns, mnt);
new_ns->nr_mounts++;
}
- new_ns->root = real_mount(no_free_ptr(to_path.mnt));
+ new_ns->root = new_ns_root;
ns_tree_add_raw(new_ns);
- return no_free_ptr(new_ns);
+ return new_ns;
}
static struct file *open_new_namespace(struct path *path, unsigned int flags)
}
static void lock_mount_exact(const struct path *path,
- struct pinned_mountpoint *mp)
+ struct pinned_mountpoint *mp, bool copy_mount,
+ unsigned int copy_flags)
{
struct dentry *dentry = path->dentry;
int err;
+ /* Assert that inode_lock() locked the correct inode. */
+ VFS_WARN_ON_ONCE(copy_mount && !path_mounted(path));
+
inode_lock(dentry->d_inode);
namespace_lock();
if (unlikely(cant_mount(dentry)))
err = -ENOENT;
- else if (path_overmounted(path))
+ else if (!copy_mount && path_overmounted(path))
err = -EBUSY;
else
err = get_mountpoint(dentry, mp);
namespace_unlock();
inode_unlock(dentry->d_inode);
mp->parent = ERR_PTR(err);
- } else {
- mp->parent = real_mount(path->mnt);
+ return;
}
+
+ if (copy_mount)
+ mp->parent = clone_mnt(real_mount(path->mnt), dentry, copy_flags);
+ else
+ mp->parent = real_mount(path->mnt);
+ if (unlikely(IS_ERR(mp->parent)))
+ __unlock_mount(mp);
}
int finish_automount(struct vfsmount *__m, const struct path *path)