summaryrefslogtreecommitdiff
path: root/fs/namespace.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namespace.c')
-rw-r--r--fs/namespace.c206
1 files changed, 89 insertions, 117 deletions
diff --git a/fs/namespace.c b/fs/namespace.c
index d82910f33dc4..c58674a20cad 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -132,16 +132,6 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
-static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
-{
- struct ns_common *ns;
-
- if (!node)
- return NULL;
- ns = rb_entry(node, struct ns_common, ns_tree_node);
- return container_of(ns, struct mnt_namespace, ns);
-}
-
static void mnt_ns_release(struct mnt_namespace *ns)
{
/* keep alive for {list,stat}mount() */
@@ -151,7 +141,8 @@ static void mnt_ns_release(struct mnt_namespace *ns)
kfree(ns);
}
}
-DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
+DEFINE_FREE(mnt_ns_release, struct mnt_namespace *,
+ if (!IS_ERR(_T)) mnt_ns_release(_T))
static void mnt_ns_release_rcu(struct rcu_head *rcu)
{
@@ -1345,26 +1336,12 @@ static void delayed_mntput(struct work_struct *unused)
}
static DECLARE_DELAYED_WORK(delayed_mntput_work, delayed_mntput);
-static void mntput_no_expire(struct mount *mnt)
+static void noinline mntput_no_expire_slowpath(struct mount *mnt)
{
LIST_HEAD(list);
int count;
- rcu_read_lock();
- if (likely(READ_ONCE(mnt->mnt_ns))) {
- /*
- * Since we don't do lock_mount_hash() here,
- * ->mnt_ns can change under us. However, if it's
- * non-NULL, then there's a reference that won't
- * be dropped until after an RCU delay done after
- * turning ->mnt_ns NULL. So if we observe it
- * non-NULL under rcu_read_lock(), the reference
- * we are dropping is not the final one.
- */
- mnt_add_count(mnt, -1);
- rcu_read_unlock();
- return;
- }
+ VFS_BUG_ON(mnt->mnt_ns);
lock_mount_hash();
/*
* make sure that if __legitimize_mnt() has not seen us grab
@@ -1415,6 +1392,26 @@ static void mntput_no_expire(struct mount *mnt)
cleanup_mnt(mnt);
}
+static void mntput_no_expire(struct mount *mnt)
+{
+ rcu_read_lock();
+ if (likely(READ_ONCE(mnt->mnt_ns))) {
+ /*
+ * Since we don't do lock_mount_hash() here,
+ * ->mnt_ns can change under us. However, if it's
+ * non-NULL, then there's a reference that won't
+ * be dropped until after an RCU delay done after
+ * turning ->mnt_ns NULL. So if we observe it
+ * non-NULL under rcu_read_lock(), the reference
+ * we are dropping is not the final one.
+ */
+ mnt_add_count(mnt, -1);
+ rcu_read_unlock();
+ return;
+ }
+ mntput_no_expire_slowpath(mnt);
+}
+
void mntput(struct vfsmount *mnt)
{
if (mnt) {
@@ -3103,19 +3100,7 @@ static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned
SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags)
{
- int fd;
- struct file *file __free(fput) = NULL;
-
- file = vfs_open_tree(dfd, filename, flags);
- if (IS_ERR(file))
- return PTR_ERR(file);
-
- fd = get_unused_fd_flags(flags & O_CLOEXEC);
- if (fd < 0)
- return fd;
-
- fd_install(fd, no_free_ptr(file));
- return fd;
+ return FD_ADD(flags, vfs_open_tree(dfd, filename, flags));
}
/*
@@ -4093,8 +4078,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
- if (!anon)
- ns_tree_gen_id(&new_ns->ns);
+ ns_tree_gen_id(new_ns);
+
+ new_ns->is_anon = anon;
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
init_waitqueue_head(&new_ns->poll);
@@ -4283,10 +4269,10 @@ static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
unsigned int, attr_flags)
{
+ struct path new_path __free(path_put) = {};
struct mnt_namespace *ns;
struct fs_context *fc;
- struct file *file;
- struct path newmount;
+ struct vfsmount *new_mnt;
struct mount *mnt;
unsigned int mnt_flags = 0;
long ret;
@@ -4324,35 +4310,36 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
fc = fd_file(f)->private_data;
- ret = mutex_lock_interruptible(&fc->uapi_mutex);
- if (ret < 0)
+ ACQUIRE(mutex_intr, uapi_mutex)(&fc->uapi_mutex);
+ ret = ACQUIRE_ERR(mutex_intr, &uapi_mutex);
+ if (ret)
return ret;
/* There must be a valid superblock or we can't mount it */
ret = -EINVAL;
if (!fc->root)
- goto err_unlock;
+ return ret;
ret = -EPERM;
if (mount_too_revealing(fc->root->d_sb, &mnt_flags)) {
errorfcp(fc, "VFS", "Mount too revealing");
- goto err_unlock;
+ return ret;
}
ret = -EBUSY;
if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
- goto err_unlock;
+ return ret;
if (fc->sb_flags & SB_MANDLOCK)
warn_mandlock();
- newmount.mnt = vfs_create_mount(fc);
- if (IS_ERR(newmount.mnt)) {
- ret = PTR_ERR(newmount.mnt);
- goto err_unlock;
- }
- newmount.dentry = dget(fc->root);
- newmount.mnt->mnt_flags = mnt_flags;
+ new_mnt = vfs_create_mount(fc);
+ if (IS_ERR(new_mnt))
+ return PTR_ERR(new_mnt);
+ new_mnt->mnt_flags = mnt_flags;
+
+ new_path.dentry = dget(fc->root);
+ new_path.mnt = new_mnt;
/* We've done the mount bit - now move the file context into more or
* less the same state as if we'd done an fspick(). We don't want to
@@ -4362,38 +4349,27 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
vfs_clean_context(fc);
ns = alloc_mnt_ns(current->nsproxy->mnt_ns->user_ns, true);
- if (IS_ERR(ns)) {
- ret = PTR_ERR(ns);
- goto err_path;
- }
- mnt = real_mount(newmount.mnt);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+ mnt = real_mount(new_path.mnt);
ns->root = mnt;
ns->nr_mounts = 1;
mnt_add_to_ns(ns, mnt);
- mntget(newmount.mnt);
+ mntget(new_path.mnt);
- /* Attach to an apparent O_PATH fd with a note that we need to unmount
- * it, not just simply put it.
- */
- file = dentry_open(&newmount, O_PATH, fc->cred);
- if (IS_ERR(file)) {
- dissolve_on_fput(newmount.mnt);
- ret = PTR_ERR(file);
- goto err_path;
+ FD_PREPARE(fdf, (flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0,
+ dentry_open(&new_path, O_PATH, fc->cred));
+ if (fdf.err) {
+ dissolve_on_fput(new_path.mnt);
+ return fdf.err;
}
- file->f_mode |= FMODE_NEED_UNMOUNT;
-
- ret = get_unused_fd_flags((flags & FSMOUNT_CLOEXEC) ? O_CLOEXEC : 0);
- if (ret >= 0)
- fd_install(ret, file);
- else
- fput(file);
-err_path:
- path_put(&newmount);
-err_unlock:
- mutex_unlock(&fc->uapi_mutex);
- return ret;
+ /*
+ * Attach to an apparent O_PATH fd with a note that we
+ * need to unmount it, not just simply put it.
+ */
+ fd_prepare_file(fdf)->f_mode |= FMODE_NEED_UNMOUNT;
+ return fd_publish(fdf);
}
static inline int vfs_move_mount(const struct path *from_path,
@@ -5035,19 +5011,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
unsigned, flags, struct mount_attr __user *, uattr,
size_t, usize)
{
- struct file __free(fput) *file = NULL;
- int fd;
-
if (!uattr && usize)
return -EINVAL;
- file = vfs_open_tree(dfd, filename, flags);
- if (IS_ERR(file))
- return PTR_ERR(file);
+ FD_PREPARE(fdf, flags, vfs_open_tree(dfd, filename, flags));
+ if (fdf.err)
+ return fdf.err;
if (uattr) {
- int ret;
struct mount_kattr kattr = {};
+ struct file *file = fd_prepare_file(fdf);
+ int ret;
if (flags & OPEN_TREE_CLONE)
kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE;
@@ -5063,12 +5037,7 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename,
return ret;
}
- fd = get_unused_fd_flags(flags & O_CLOEXEC);
- if (fd < 0)
- return fd;
-
- fd_install(fd, no_free_ptr(file));
- return fd;
+ return fd_publish(fdf);
}
int show_path(struct seq_file *m, struct dentry *root)
@@ -5150,6 +5119,12 @@ static u64 mnt_to_propagation_flags(struct mount *m)
return propagation;
}
+u64 vfsmount_to_propagation_flags(struct vfsmount *mnt)
+{
+ return mnt_to_propagation_flags(real_mount(mnt));
+}
+EXPORT_SYMBOL_GPL(vfsmount_to_propagation_flags);
+
static void statmount_sb_basic(struct kstatmount *s)
{
struct super_block *sb = s->mnt->mnt_sb;
@@ -5454,11 +5429,11 @@ static int statmount_string(struct kstatmount *s, u64 flag)
ret = statmount_sb_source(s, seq);
break;
case STATMOUNT_MNT_UIDMAP:
- sm->mnt_uidmap = start;
+ offp = &sm->mnt_uidmap;
ret = statmount_mnt_uidmap(s, seq);
break;
case STATMOUNT_MNT_GIDMAP:
- sm->mnt_gidmap = start;
+ offp = &sm->mnt_gidmap;
ret = statmount_mnt_gidmap(s, seq);
break;
default:
@@ -5736,7 +5711,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
if (ret)
return ret;
- if (kreq->spare != 0)
+ if (kreq->mnt_ns_fd != 0 && kreq->mnt_ns_id)
return -EINVAL;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
@@ -5753,16 +5728,14 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq
{
struct mnt_namespace *mnt_ns;
- if (kreq->mnt_ns_id && kreq->spare)
- return ERR_PTR(-EINVAL);
-
- if (kreq->mnt_ns_id)
- return lookup_mnt_ns(kreq->mnt_ns_id);
-
- if (kreq->spare) {
+ if (kreq->mnt_ns_id) {
+ mnt_ns = lookup_mnt_ns(kreq->mnt_ns_id);
+ if (!mnt_ns)
+ return ERR_PTR(-ENOENT);
+ } else if (kreq->mnt_ns_fd) {
struct ns_common *ns;
- CLASS(fd, f)(kreq->spare);
+ CLASS(fd, f)(kreq->mnt_ns_fd);
if (fd_empty(f))
return ERR_PTR(-EBADF);
@@ -5774,11 +5747,12 @@ static struct mnt_namespace *grab_requested_mnt_ns(const struct mnt_id_req *kreq
return ERR_PTR(-EINVAL);
mnt_ns = to_mnt_ns(ns);
+ refcount_inc(&mnt_ns->passive);
} else {
mnt_ns = current->nsproxy->mnt_ns;
+ refcount_inc(&mnt_ns->passive);
}
- refcount_inc(&mnt_ns->passive);
return mnt_ns;
}
@@ -5801,8 +5775,8 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
return ret;
ns = grab_requested_mnt_ns(&kreq);
- if (!ns)
- return -ENOENT;
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
!ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
@@ -5912,8 +5886,8 @@ static void __free_klistmount_free(const struct klistmount *kls)
static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *kreq,
size_t nr_mnt_ids)
{
-
u64 last_mnt_id = kreq->param;
+ struct mnt_namespace *ns;
/* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
@@ -5927,9 +5901,10 @@ static inline int prepare_klistmount(struct klistmount *kls, struct mnt_id_req *
if (!kls->kmnt_ids)
return -ENOMEM;
- kls->ns = grab_requested_mnt_ns(kreq);
- if (!kls->ns)
- return -ENOENT;
+ ns = grab_requested_mnt_ns(kreq);
+ if (IS_ERR(ns))
+ return PTR_ERR(ns);
+ kls->ns = ns;
kls->mnt_parent_id = kreq->mnt_id;
return 0;
@@ -5985,11 +5960,8 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
}
struct mnt_namespace init_mnt_ns = {
- .ns.inum = ns_init_inum(&init_mnt_ns),
- .ns.ops = &mntns_operations,
+ .ns = NS_COMMON_INIT(init_mnt_ns),
.user_ns = &init_user_ns,
- .ns.__ns_ref = REFCOUNT_INIT(1),
- .ns.ns_type = ns_common_type(&init_mnt_ns),
.passive = REFCOUNT_INIT(1),
.mounts = RB_ROOT,
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),