summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--block/blk-integrity.c8
-rw-r--r--fs/namespace.c15
-rw-r--r--fs/nsfs.c6
-rw-r--r--fs/pidfs.c2
-rw-r--r--include/linux/fs.h14
-rw-r--r--include/linux/ns_common.h29
-rw-r--r--include/linux/nsfs.h26
-rw-r--r--include/linux/nsproxy.h11
-rw-r--r--include/linux/nstree.h91
-rw-r--r--include/linux/proc_ns.h32
-rw-r--r--ipc/namespace.c4
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/cgroup/namespace.c12
-rw-r--r--kernel/nstree.c233
-rw-r--r--kernel/pid_namespace.c4
-rw-r--r--kernel/time/namespace.c7
-rw-r--r--kernel/user_namespace.c5
-rw-r--r--kernel/utsname.c16
-rw-r--r--net/core/net_namespace.c46
19 files changed, 470 insertions, 93 deletions
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 056b8948369d..ce08ad4565e2 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -58,16 +58,14 @@ new_segment:
int blk_get_meta_cap(struct block_device *bdev, unsigned int cmd,
struct logical_block_metadata_cap __user *argp)
{
- struct blk_integrity *bi = blk_get_integrity(bdev->bd_disk);
+ struct blk_integrity *bi;
struct logical_block_metadata_cap meta_cap = {};
size_t usize = _IOC_SIZE(cmd);
- if (_IOC_DIR(cmd) != _IOC_DIR(FS_IOC_GETLBMD_CAP) ||
- _IOC_TYPE(cmd) != _IOC_TYPE(FS_IOC_GETLBMD_CAP) ||
- _IOC_NR(cmd) != _IOC_NR(FS_IOC_GETLBMD_CAP) ||
- _IOC_SIZE(cmd) < LBMD_SIZE_VER0)
+ if (!extensible_ioctl_valid(cmd, FS_IOC_GETLBMD_CAP, LBMD_SIZE_VER0))
return -ENOIOCTLCMD;
+ bi = blk_get_integrity(bdev->bd_disk);
if (!bi)
goto out;
diff --git a/fs/namespace.c b/fs/namespace.c
index 39afeb521a80..5b2c15f7170f 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -4180,18 +4180,15 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(-ENOMEM);
}
- if (!anon) {
- ret = ns_alloc_inum(&new_ns->ns);
- if (ret) {
- kfree(new_ns);
- dec_mnt_namespaces(ucounts);
- return ERR_PTR(ret);
- }
+
+ ret = ns_common_init(&new_ns->ns, &mntns_operations, !anon);
+ if (ret) {
+ kfree(new_ns);
+ dec_mnt_namespaces(ucounts);
+ return ERR_PTR(ret);
}
- new_ns->ns.ops = &mntns_operations;
if (!anon)
new_ns->seq = atomic64_inc_return(&mnt_ns_seq);
- refcount_set(&new_ns->ns.count, 1);
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
INIT_LIST_HEAD(&new_ns->mnt_ns_list);
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 59aa801347a7..d016d36272d4 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -163,15 +163,17 @@ static bool nsfs_ioctl_valid(unsigned int cmd)
case NS_GET_TGID_FROM_PIDNS:
case NS_GET_PID_IN_PIDNS:
case NS_GET_TGID_IN_PIDNS:
- return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
+ return true;
}
/* Extensible ioctls require some extra handling. */
switch (_IOC_NR(cmd)) {
case _IOC_NR(NS_MNT_GET_INFO):
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_INFO, MNT_NS_INFO_SIZE_VER0);
case _IOC_NR(NS_MNT_GET_NEXT):
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_NEXT, MNT_NS_INFO_SIZE_VER0);
case _IOC_NR(NS_MNT_GET_PREV):
- return (_IOC_TYPE(cmd) == _IOC_TYPE(cmd));
+ return extensible_ioctl_valid(cmd, NS_MNT_GET_PREV, MNT_NS_INFO_SIZE_VER0);
}
return false;
diff --git a/fs/pidfs.c b/fs/pidfs.c
index 108e7527f837..2c9c7636253a 100644
--- a/fs/pidfs.c
+++ b/fs/pidfs.c
@@ -440,7 +440,7 @@ static bool pidfs_ioctl_valid(unsigned int cmd)
* erronously mistook the file descriptor for a pidfd.
* This is not perfect but will catch most cases.
*/
- return (_IOC_TYPE(cmd) == _IOC_TYPE(PIDFD_GET_INFO));
+ return extensible_ioctl_valid(cmd, PIDFD_GET_INFO, PIDFD_INFO_SIZE_VER0);
}
return false;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index d7ab4f96d705..2f2edc53bf3c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -4023,4 +4023,18 @@ static inline bool vfs_empty_path(int dfd, const char __user *path)
int generic_atomic_write_valid(struct kiocb *iocb, struct iov_iter *iter);
+static inline bool extensible_ioctl_valid(unsigned int cmd_a,
+ unsigned int cmd_b, size_t min_size)
+{
+ if (_IOC_DIR(cmd_a) != _IOC_DIR(cmd_b))
+ return false;
+ if (_IOC_TYPE(cmd_a) != _IOC_TYPE(cmd_b))
+ return false;
+ if (_IOC_NR(cmd_a) != _IOC_NR(cmd_b))
+ return false;
+ if (_IOC_SIZE(cmd_a) < min_size)
+ return false;
+ return true;
+}
+
#endif /* _LINUX_FS_H */
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 7d22ea50b098..7224072cccc5 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -3,14 +3,43 @@
#define _LINUX_NS_COMMON_H
#include <linux/refcount.h>
+#include <linux/rbtree.h>
struct proc_ns_operations;
+struct cgroup_namespace;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
struct ns_common {
struct dentry *stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
refcount_t count;
+ union {
+ struct {
+ u64 ns_id;
+ struct rb_node ns_tree_node;
+ struct list_head ns_list_node;
+ };
+ struct rcu_head ns_rcu;
+ };
};
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns)
+
#endif
diff --git a/include/linux/nsfs.h b/include/linux/nsfs.h
new file mode 100644
index 000000000000..fb84aa538091
--- /dev/null
+++ b/include/linux/nsfs.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+
+#ifndef _LINUX_NSFS_H
+#define _LINUX_NSFS_H
+
+#include <linux/ns_common.h>
+
+struct path;
+struct task_struct;
+struct proc_ns_operations;
+
+int ns_get_path(struct path *path, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+typedef struct ns_common *ns_get_path_helper_t(void *);
+int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
+ void *private_data);
+
+bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+ const struct proc_ns_operations *ns_ops);
+void nsfs_init(void);
+
+#endif /* _LINUX_NSFS_H */
+
diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
index dab6a1734a22..e6bec522b139 100644
--- a/include/linux/nsproxy.h
+++ b/include/linux/nsproxy.h
@@ -42,17 +42,6 @@ struct nsproxy {
};
extern struct nsproxy init_nsproxy;
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns->ns), \
- struct ipc_namespace *: &(__ns->ns), \
- struct net *: &(__ns->ns), \
- struct pid_namespace *: &(__ns->ns), \
- struct mnt_namespace *: &(__ns->ns), \
- struct time_namespace *: &(__ns->ns), \
- struct user_namespace *: &(__ns->ns), \
- struct uts_namespace *: &(__ns->ns))
-
/*
* A structure to encompass all bits needed to install
* a partial or complete new set of namespaces.
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
new file mode 100644
index 000000000000..29ad6402260c
--- /dev/null
+++ b/include/linux/nstree.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NSTREE_H
+#define _LINUX_NSTREE_H
+
+#include <linux/ns_common.h>
+#include <linux/nsproxy.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/rculist.h>
+#include <linux/cookie.h>
+
+/**
+ * struct ns_tree - Namespace tree
+ * @ns_tree: Rbtree of namespaces of a particular type
+ * @ns_list: Sequentially walkable list of all namespaces of this type
+ * @ns_tree_lock: Seqlock to protect the tree and list
+ */
+struct ns_tree {
+ struct rb_root ns_tree;
+ struct list_head ns_list;
+ seqlock_t ns_tree_lock;
+ int type;
+};
+
+extern struct ns_tree cgroup_ns_tree;
+extern struct ns_tree ipc_ns_tree;
+extern struct ns_tree mnt_ns_tree;
+extern struct ns_tree net_ns_tree;
+extern struct ns_tree pid_ns_tree;
+extern struct ns_tree time_ns_tree;
+extern struct ns_tree user_ns_tree;
+extern struct ns_tree uts_ns_tree;
+
+#define to_ns_tree(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(cgroup_ns_tree), \
+ struct ipc_namespace *: &(ipc_ns_tree), \
+ struct net *: &(net_ns_tree), \
+ struct pid_namespace *: &(pid_ns_tree), \
+ struct mnt_namespace *: &(mnt_ns_tree), \
+ struct time_namespace *: &(time_ns_tree), \
+ struct user_namespace *: &(user_ns_tree), \
+ struct uts_namespace *: &(uts_ns_tree))
+
+u64 ns_tree_gen_id(struct ns_common *ns);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
+struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
+ struct ns_tree *ns_tree,
+ bool previous);
+
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+ ns_tree_gen_id(ns);
+ __ns_tree_add_raw(ns, ns_tree);
+}
+
+/**
+ * ns_tree_add_raw - Add a namespace to a namespace
+ * @ns: Namespace to add
+ *
+ * This function adds a namespace to the appropriate namespace tree
+ * without assigning a id.
+ */
+#define ns_tree_add_raw(__ns) __ns_tree_add_raw(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_add - Add a namespace to a namespace tree
+ * @ns: Namespace to add
+ *
+ * This function assigns a new id to the namespace and adds it to the
+ * appropriate namespace tree and list.
+ */
+#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
+
+/**
+ * ns_tree_remove - Remove a namespace from a namespace tree
+ * @ns: Namespace to remove
+ *
+ * This function removes a namespace from the appropriate namespace
+ * tree and list.
+ */
+#define ns_tree_remove(__ns) __ns_tree_remove(to_ns_common(__ns), to_ns_tree(__ns))
+
+#define ns_tree_adjoined_rcu(__ns, __previous) \
+ __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
+
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+
+#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 4b20375f3783..7f89f0829e60 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -5,7 +5,7 @@
#ifndef _LINUX_PROC_NS_H
#define _LINUX_PROC_NS_H
-#include <linux/ns_common.h>
+#include <linux/nsfs.h>
#include <uapi/linux/nsfs.h>
struct pid_namespace;
@@ -66,25 +66,27 @@ static inline void proc_free_inum(unsigned int inum) {}
#endif /* CONFIG_PROC_FS */
-static inline int ns_alloc_inum(struct ns_common *ns)
+static inline int ns_common_init(struct ns_common *ns,
+ const struct proc_ns_operations *ops,
+ bool alloc_inum)
{
- WRITE_ONCE(ns->stashed, NULL);
- return proc_alloc_inum(&ns->inum);
+ if (alloc_inum) {
+ int ret;
+ ret = proc_alloc_inum(&ns->inum);
+ if (ret)
+ return ret;
+ }
+ refcount_set(&ns->count, 1);
+ ns->stashed = NULL;
+ ns->ops = ops;
+ ns->ns_id = 0;
+ RB_CLEAR_NODE(&ns->ns_tree_node);
+ INIT_LIST_HEAD(&ns->ns_list_node);
+ return 0;
}
#define ns_free_inum(ns) proc_free_inum((ns)->inum)
#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
-extern int ns_get_path(struct path *path, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-typedef struct ns_common *ns_get_path_helper_t(void *);
-extern int ns_get_path_cb(struct path *path, ns_get_path_helper_t ns_get_cb,
- void *private_data);
-
-extern bool ns_match(const struct ns_common *ns, dev_t dev, ino_t ino);
-
-extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
- const struct proc_ns_operations *ns_ops);
-extern void nsfs_init(void);
#endif /* _LINUX_PROC_NS_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 4df91ceeeafe..d4188a88ee57 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -61,12 +61,10 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (ns == NULL)
goto fail_dec;
- err = ns_alloc_inum(&ns->ns);
+ err = ns_common_init(&ns->ns, &ipcns_operations, true);
if (err)
goto fail_free;
- ns->ns.ops = &ipcns_operations;
- refcount_set(&ns->ns.count, 1);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
diff --git a/kernel/Makefile b/kernel/Makefile
index c60623448235..b807516a1b43 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -8,7 +8,7 @@ obj-y = fork.o exec_domain.o panic.o \
sysctl.o capability.o ptrace.o user.o \
signal.o sys.o umh.o workqueue.o pid.o task_work.o \
extable.o params.o \
- kthread.o sys_ni.o nsproxy.o \
+ kthread.o sys_ni.o nsproxy.o nstree.o \
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o smpboot.o ucount.o regset.o ksyms_common.o
diff --git a/kernel/cgroup/namespace.c b/kernel/cgroup/namespace.c
index 144a464e45c6..0391b6ab0bf1 100644
--- a/kernel/cgroup/namespace.c
+++ b/kernel/cgroup/namespace.c
@@ -21,20 +21,16 @@ static void dec_cgroup_namespaces(struct ucounts *ucounts)
static struct cgroup_namespace *alloc_cgroup_ns(void)
{
- struct cgroup_namespace *new_ns;
+ struct cgroup_namespace *new_ns __free(kfree) = NULL;
int ret;
new_ns = kzalloc(sizeof(struct cgroup_namespace), GFP_KERNEL_ACCOUNT);
if (!new_ns)
return ERR_PTR(-ENOMEM);
- ret = ns_alloc_inum(&new_ns->ns);
- if (ret) {
- kfree(new_ns);
+ ret = ns_common_init(&new_ns->ns, &cgroupns_operations, true);
+ if (ret)
return ERR_PTR(ret);
- }
- refcount_set(&new_ns->ns.count, 1);
- new_ns->ns.ops = &cgroupns_operations;
- return new_ns;
+ return no_free_ptr(new_ns);
}
void free_cgroup_ns(struct cgroup_namespace *ns)
diff --git a/kernel/nstree.c b/kernel/nstree.c
new file mode 100644
index 000000000000..bbe8bedc924c
--- /dev/null
+++ b/kernel/nstree.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/nstree.h>
+#include <linux/proc_ns.h>
+#include <linux/vfsdebug.h>
+
+struct ns_tree mnt_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(mnt_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWNS,
+};
+
+struct ns_tree net_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(net_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWNET,
+};
+EXPORT_SYMBOL_GPL(net_ns_tree);
+
+struct ns_tree uts_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(uts_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWUTS,
+};
+
+struct ns_tree user_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(user_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWUSER,
+};
+
+struct ns_tree ipc_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(ipc_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWIPC,
+};
+
+struct ns_tree pid_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(pid_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWPID,
+};
+
+struct ns_tree cgroup_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(cgroup_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWCGROUP,
+};
+
+struct ns_tree time_ns_tree = {
+ .ns_tree = RB_ROOT,
+ .ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list),
+ .ns_tree_lock = __SEQLOCK_UNLOCKED(time_ns_tree.ns_tree_lock),
+ .type = CLONE_NEWTIME,
+};
+
+DEFINE_COOKIE(namespace_cookie);
+
+static inline struct ns_common *node_to_ns(const struct rb_node *node)
+{
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct ns_common, ns_tree_node);
+}
+
+static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
+{
+ struct ns_common *ns_a = node_to_ns(a);
+ struct ns_common *ns_b = node_to_ns(b);
+ u64 ns_id_a = ns_a->ns_id;
+ u64 ns_id_b = ns_b->ns_id;
+
+ if (ns_id_a < ns_id_b)
+ return -1;
+ if (ns_id_a > ns_id_b)
+ return 1;
+ return 0;
+}
+
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+ struct rb_node *node, *prev;
+
+ VFS_WARN_ON_ONCE(!ns->ns_id);
+
+ write_seqlock(&ns_tree->ns_tree_lock);
+
+ VFS_WARN_ON_ONCE(ns->ops->type != ns_tree->type);
+
+ node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp);
+ /*
+ * If there's no previous entry simply add it after the
+ * head and if there is add it after the previous entry.
+ */
+ prev = rb_prev(&ns->ns_tree_node);
+ if (!prev)
+ list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list);
+ else
+ list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);
+
+ write_sequnlock(&ns_tree->ns_tree_lock);
+
+ VFS_WARN_ON_ONCE(node);
+}
+
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
+{
+ VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));
+ VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
+ VFS_WARN_ON_ONCE(ns->ops->type != ns_tree->type);
+
+ write_seqlock(&ns_tree->ns_tree_lock);
+ rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
+ list_bidir_del_rcu(&ns->ns_list_node);
+ RB_CLEAR_NODE(&ns->ns_tree_node);
+ write_sequnlock(&ns_tree->ns_tree_lock);
+}
+EXPORT_SYMBOL_GPL(__ns_tree_remove);
+
+static int ns_find(const void *key, const struct rb_node *node)
+{
+ const u64 ns_id = *(u64 *)key;
+ const struct ns_common *ns = node_to_ns(node);
+
+ if (ns_id < ns->ns_id)
+ return -1;
+ if (ns_id > ns->ns_id)
+ return 1;
+ return 0;
+}
+
+
+static struct ns_tree *ns_tree_from_type(int ns_type)
+{
+ switch (ns_type) {
+ case CLONE_NEWCGROUP:
+ return &cgroup_ns_tree;
+ case CLONE_NEWIPC:
+ return &ipc_ns_tree;
+ case CLONE_NEWNS:
+ return &mnt_ns_tree;
+ case CLONE_NEWNET:
+ return &net_ns_tree;
+ case CLONE_NEWPID:
+ return &pid_ns_tree;
+ case CLONE_NEWUSER:
+ return &user_ns_tree;
+ case CLONE_NEWUTS:
+ return &uts_ns_tree;
+ case CLONE_NEWTIME:
+ return &time_ns_tree;
+ }
+
+ return NULL;
+}
+
+struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
+{
+ struct ns_tree *ns_tree;
+ struct rb_node *node;
+ unsigned int seq;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
+
+ ns_tree = ns_tree_from_type(ns_type);
+ if (!ns_tree)
+ return NULL;
+
+ do {
+ seq = read_seqbegin(&ns_tree->ns_tree_lock);
+ node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find);
+ if (node)
+ break;
+ } while (read_seqretry(&ns_tree->ns_tree_lock, seq));
+
+ if (!node)
+ return NULL;
+
+ VFS_WARN_ON_ONCE(node_to_ns(node)->ops->type != ns_type);
+
+ return node_to_ns(node);
+}
+
+/**
+ * ns_tree_adjoined_rcu - find the next/previous namespace in the same
+ * tree
+ * @ns: namespace to start from
+ * @previous: if true find the previous namespace, otherwise the next
+ *
+ * Find the next or previous namespace in the same tree as @ns. If
+ * there is no next/previous namespace, -ENOENT is returned.
+ */
+struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
+ struct ns_tree *ns_tree, bool previous)
+{
+ struct list_head *list;
+
+ RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
+
+ if (previous)
+ list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node));
+ else
+ list = rcu_dereference(list_next_rcu(&ns->ns_list_node));
+ if (list_is_head(list, &ns_tree->ns_list))
+ return ERR_PTR(-ENOENT);
+
+ VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ops->type != ns_tree->type);
+
+ return list_entry_rcu(list, struct ns_common, ns_list_node);
+}
+
+/**
+ * ns_tree_gen_id - generate a new namespace id
+ * @ns: namespace to generate id for
+ *
+ * Generates a new namespace id and assigns it to the namespace. All
+ * namespaces types share the same id space and thus can be compared
+ * directly. IOW, when two ids of two namespace are equal, they are
+ * identical.
+ */
+u64 ns_tree_gen_id(struct ns_common *ns)
+{
+ guard(preempt)();
+ ns->ns_id = gen_cookie_next(&namespace_cookie);
+ return ns->ns_id;
+}
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 7098ed44e717..20ce4052d1c5 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -102,17 +102,15 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
if (ns->pid_cachep == NULL)
goto out_free_idr;
- err = ns_alloc_inum(&ns->ns);
+ err = ns_common_init(&ns->ns, &pidns_operations, true);
if (err)
goto out_free_idr;
- ns->ns.ops = &pidns_operations;
ns->pid_max = PID_MAX_LIMIT;
err = register_pidns_sysctls(ns);
if (err)
goto out_free_inum;
- refcount_set(&ns->ns.count, 1);
ns->level = level;
ns->parent = get_pid_ns(parent_pid_ns);
ns->user_ns = get_user_ns(user_ns);
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 667452768ed3..0be93d8f2896 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -88,22 +88,19 @@ static struct time_namespace *clone_time_ns(struct user_namespace *user_ns,
goto fail;
err = -ENOMEM;
- ns = kmalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT);
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL_ACCOUNT);
if (!ns)
goto fail_dec;
- refcount_set(&ns->ns.count, 1);
-
ns->vvar_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
if (!ns->vvar_page)
goto fail_free;
- err = ns_alloc_inum(&ns->ns);
+ err = ns_common_init(&ns->ns, &timens_operations, true);
if (err)
goto fail_free_page;
ns->ucounts = ucounts;
- ns->ns.ops = &timens_operations;
ns->user_ns = get_user_ns(user_ns);
ns->offsets = old_ns->offsets;
ns->frozen_offsets = false;
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 682f40d5632d..98f4fe84d039 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -124,12 +124,11 @@ int create_user_ns(struct cred *new)
goto fail_dec;
ns->parent_could_setfcap = cap_raised(new->cap_effective, CAP_SETFCAP);
- ret = ns_alloc_inum(&ns->ns);
+
+ ret = ns_common_init(&ns->ns, &userns_operations, true);
if (ret)
goto fail_free;
- ns->ns.ops = &userns_operations;
- refcount_set(&ns->ns.count, 1);
/* Leave the new->user_ns reference with the new user namespace. */
ns->parent = parent_ns;
ns->level = parent_ns->level + 1;
diff --git a/kernel/utsname.c b/kernel/utsname.c
index b1ac3ca870f2..02037010b378 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -27,16 +27,6 @@ static void dec_uts_namespaces(struct ucounts *ucounts)
dec_ucount(ucounts, UCOUNT_UTS_NAMESPACES);
}
-static struct uts_namespace *create_uts_ns(void)
-{
- struct uts_namespace *uts_ns;
-
- uts_ns = kmem_cache_alloc(uts_ns_cache, GFP_KERNEL);
- if (uts_ns)
- refcount_set(&uts_ns->ns.count, 1);
- return uts_ns;
-}
-
/*
* Clone a new ns copying an original utsname, setting refcount to 1
* @old_ns: namespace to clone
@@ -55,17 +45,15 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
goto fail;
err = -ENOMEM;
- ns = create_uts_ns();
+ ns = kmem_cache_zalloc(uts_ns_cache, GFP_KERNEL);
if (!ns)
goto fail_dec;
- err = ns_alloc_inum(&ns->ns);
+ err = ns_common_init(&ns->ns, &utsns_operations, true);
if (err)
goto fail_free;
ns->ucounts = ucounts;
- ns->ns.ops = &utsns_operations;
-
down_read(&uts_sem);
memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
ns->user_ns = get_user_ns(user_ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 1b6f3826dd0e..5fb7bd8ac45a 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -397,10 +397,22 @@ static __net_init void preinit_net_sysctl(struct net *net)
}
/* init code that must occur even if setup_net() is not called. */
-static __net_init void preinit_net(struct net *net, struct user_namespace *user_ns)
+static __net_init int preinit_net(struct net *net, struct user_namespace *user_ns)
{
+ const struct proc_ns_operations *ns_ops;
+ int ret;
+
+#ifdef CONFIG_NET_NS
+ ns_ops = &netns_operations;
+#else
+ ns_ops = NULL;
+#endif
+
+ ret = ns_common_init(&net->ns, ns_ops, false);
+ if (ret)
+ return ret;
+
refcount_set(&net->passive, 1);
- refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128, "net_refcnt");
ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net_notrefcnt");
@@ -420,6 +432,7 @@ static __net_init void preinit_net(struct net *net, struct user_namespace *user_
INIT_LIST_HEAD(&net->ptype_all);
INIT_LIST_HEAD(&net->ptype_specific);
preinit_net_sysctl(net);
+ return 0;
}
/*
@@ -559,7 +572,9 @@ struct net *copy_net_ns(unsigned long flags,
goto dec_ucounts;
}
- preinit_net(net, user_ns);
+ rv = preinit_net(net, user_ns);
+ if (rv < 0)
+ goto dec_ucounts;
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -812,15 +827,15 @@ static void net_ns_net_debugfs(struct net *net)
static __net_init int net_ns_net_init(struct net *net)
{
-#ifdef CONFIG_NET_NS
- net->ns.ops = &netns_operations;
-#endif
- net->ns.inum = PROC_NET_INIT_INO;
- if (net != &init_net) {
- int ret = ns_alloc_inum(&net->ns);
- if (ret)
- return ret;
- }
+ int ret = 0;
+
+ if (net == &init_net)
+ net->ns.inum = PROC_NET_INIT_INO;
+ else
+ ret = proc_alloc_inum(&to_ns_common(net)->inum);
+ if (ret)
+ return ret;
+
net_ns_net_debugfs(net);
return 0;
}
@@ -1282,7 +1297,12 @@ void __init net_ns_init(void)
#ifdef CONFIG_KEYS
init_net.key_domain = &init_net_key_domain;
#endif
- preinit_net(&init_net, &init_user_ns);
+ /*
+ * This currently cannot fail as the initial network namespace
+ * has a static inode number.
+ */
+ if (preinit_net(&init_net, &init_user_ns))
+ panic("Could not preinitialize the initial network namespace");
down_write(&pernet_ops_rwsem);
if (setup_net(&init_net))