summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/mount.h3
-rw-r--r--fs/namespace.c9
-rw-r--r--include/linux/ns/ns_common_types.h196
-rw-r--r--include/linux/ns/nstree_types.h55
-rw-r--r--include/linux/ns_common.h266
-rw-r--r--include/linux/nstree.h38
-rw-r--r--include/linux/pid_namespace.h3
-rw-r--r--init/version-timestamp.c2
-rw-r--r--ipc/msgutil.c2
-rw-r--r--ipc/namespace.c3
-rw-r--r--kernel/cgroup/cgroup.c2
-rw-r--r--kernel/nscommon.c15
-rw-r--r--kernel/nstree.c304
-rw-r--r--kernel/pid.c2
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/time/namespace.c2
-rw-r--r--kernel/user.c2
-rw-r--r--tools/testing/selftests/namespaces/nsid_test.c107
18 files changed, 576 insertions, 437 deletions
diff --git a/fs/mount.h b/fs/mount.h
index f13a28752d0b..2d28ef2a3aed 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -27,6 +27,7 @@ struct mnt_namespace {
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
refcount_t passive; /* number references not pinning @mounts */
+ bool is_anon;
} __randomize_layout;
struct mnt_pcp {
@@ -175,7 +176,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
static inline bool is_anon_ns(struct mnt_namespace *ns)
{
- return ns->ns.ns_id == 0;
+ return ns->is_anon;
}
static inline bool anon_ns_root(const struct mount *m)
diff --git a/fs/namespace.c b/fs/namespace.c
index eded33eeb647..25289b869be1 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -138,7 +138,7 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
if (!node)
return NULL;
- ns = rb_entry(node, struct ns_common, ns_tree_node);
+ ns = rb_entry(node, struct ns_common, ns_tree_node.ns_node);
return container_of(ns, struct mnt_namespace, ns);
}
@@ -4093,8 +4093,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
dec_mnt_namespaces(ucounts);
return ERR_PTR(ret);
}
- if (!anon)
- ns_tree_gen_id(new_ns);
+ ns_tree_gen_id(new_ns);
+
+ new_ns->is_anon = anon;
refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
init_waitqueue_head(&new_ns->poll);
@@ -5985,7 +5986,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
}
struct mnt_namespace init_mnt_ns = {
- .ns = NS_COMMON_INIT(init_mnt_ns, 1),
+ .ns = NS_COMMON_INIT(init_mnt_ns),
.user_ns = &init_user_ns,
.passive = REFCOUNT_INIT(1),
.mounts = RB_ROOT,
diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h
new file mode 100644
index 000000000000..b332b019b29c
--- /dev/null
+++ b/include/linux/ns/ns_common_types.h
@@ -0,0 +1,196 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_NS_COMMON_TYPES_H
+#define _LINUX_NS_COMMON_TYPES_H
+
+#include <linux/atomic.h>
+#include <linux/ns/nstree_types.h>
+#include <linux/rbtree.h>
+#include <linux/refcount.h>
+#include <linux/types.h>
+
+struct cgroup_namespace;
+struct dentry;
+struct ipc_namespace;
+struct mnt_namespace;
+struct net;
+struct pid_namespace;
+struct proc_ns_operations;
+struct time_namespace;
+struct user_namespace;
+struct uts_namespace;
+
+extern struct cgroup_namespace init_cgroup_ns;
+extern struct ipc_namespace init_ipc_ns;
+extern struct mnt_namespace init_mnt_ns;
+extern struct net init_net;
+extern struct pid_namespace init_pid_ns;
+extern struct time_namespace init_time_ns;
+extern struct user_namespace init_user_ns;
+extern struct uts_namespace init_uts_ns;
+
+extern const struct proc_ns_operations cgroupns_operations;
+extern const struct proc_ns_operations ipcns_operations;
+extern const struct proc_ns_operations mntns_operations;
+extern const struct proc_ns_operations netns_operations;
+extern const struct proc_ns_operations pidns_operations;
+extern const struct proc_ns_operations pidns_for_children_operations;
+extern const struct proc_ns_operations timens_operations;
+extern const struct proc_ns_operations timens_for_children_operations;
+extern const struct proc_ns_operations userns_operations;
+extern const struct proc_ns_operations utsns_operations;
+
+/*
+ * Namespace lifetimes are managed via a two-tier reference counting model:
+ *
+ * (1) __ns_ref (refcount_t): Main reference count tracking memory
+ * lifetime. Controls when the namespace structure itself is freed.
+ * It also pins the namespace on the namespace trees whereas (2)
+ * only regulates their visibility to userspace.
+ *
+ * (2) __ns_ref_active (atomic_t): Reference count tracking active users.
+ * Controls visibility of the namespace in the namespace trees.
+ * Any live task that uses the namespace (via nsproxy or cred) holds
+ * an active reference. Any open file descriptor or bind-mount of
+ * the namespace holds an active reference. Once all tasks have
+ * called exited their namespaces and all file descriptors and
+ * bind-mounts have been released the active reference count drops
+ * to zero and the namespace becomes inactive. IOW, the namespace
+ * cannot be listed or opened via file handles anymore.
+ *
+ * Note that it is valid to transition from active to inactive and
+ * back from inactive to active e.g., when resurrecting an inactive
+ * namespace tree via the SIOCGSKNS ioctl().
+ *
+ * Relationship and lifecycle states:
+ *
+ * - Active (__ns_ref_active > 0):
+ * Namespace is actively used and visible to userspace. The namespace
+ * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
+ * handles, or discovered via listns().
+ *
+ * - Inactive (__ns_ref_active == 0, __ns_ref > 0):
+ * No tasks are actively using the namespace and it isn't pinned by
+ * any bind-mounts or open file descriptors anymore. But the namespace
+ * is still kept alive by internal references. For example, the user
+ * namespace could be pinned by an open file through file->f_cred
+ * references when one of the now defunct tasks had opened a file and
+ * handed the file descriptor off to another process via a UNIX
+ * sockets. Such references keep the namespace structure alive through
+ * __ns_ref but will not hold an active reference.
+ *
+ * - Destroyed (__ns_ref == 0):
+ * No references remain. The namespace is removed from the tree and freed.
+ *
+ * State transitions:
+ *
+ * Active -> Inactive:
+ * When the last task using the namespace exits it drops its active
+ * references to all namespaces. However, user and pid namespaces
+ * remain accessible until the task has been reaped.
+ *
+ * Inactive -> Active:
+ * An inactive namespace tree might be resurrected due to e.g., the
+ * SIOCGSKNS ioctl() on a socket.
+ *
+ * Inactive -> Destroyed:
+ * When __ns_ref drops to zero the namespace is removed from the
+ * namespaces trees and the memory is freed (after RCU grace period).
+ *
+ * Initial namespaces:
+ * Boot-time namespaces (init_net, init_pid_ns, etc.) start with
+ * __ns_ref_active = 1 and remain active forever.
+ *
+ * @ns_type: type of namespace (e.g., CLONE_NEWNET)
+ * @stashed: cached dentry to be used by the vfs
+ * @ops: namespace operations
+ * @inum: namespace inode number (quickly recycled for non-initial namespaces)
+ * @__ns_ref: main reference count (do not use directly)
+ * @ns_tree: namespace tree nodes and active reference count
+ */
+struct ns_common {
+ u32 ns_type;
+ struct dentry *stashed;
+ const struct proc_ns_operations *ops;
+ unsigned int inum;
+ refcount_t __ns_ref; /* do not use directly */
+ union {
+ struct ns_tree;
+ struct rcu_head ns_rcu;
+ };
+};
+
+#define to_ns_common(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &(__ns)->ns, \
+ const struct cgroup_namespace *: &(__ns)->ns, \
+ struct ipc_namespace *: &(__ns)->ns, \
+ const struct ipc_namespace *: &(__ns)->ns, \
+ struct mnt_namespace *: &(__ns)->ns, \
+ const struct mnt_namespace *: &(__ns)->ns, \
+ struct net *: &(__ns)->ns, \
+ const struct net *: &(__ns)->ns, \
+ struct pid_namespace *: &(__ns)->ns, \
+ const struct pid_namespace *: &(__ns)->ns, \
+ struct time_namespace *: &(__ns)->ns, \
+ const struct time_namespace *: &(__ns)->ns, \
+ struct user_namespace *: &(__ns)->ns, \
+ const struct user_namespace *: &(__ns)->ns, \
+ struct uts_namespace *: &(__ns)->ns, \
+ const struct uts_namespace *: &(__ns)->ns)
+
+#define ns_init_inum(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
+ struct ipc_namespace *: IPC_NS_INIT_INO, \
+ struct mnt_namespace *: MNT_NS_INIT_INO, \
+ struct net *: NET_NS_INIT_INO, \
+ struct pid_namespace *: PID_NS_INIT_INO, \
+ struct time_namespace *: TIME_NS_INIT_INO, \
+ struct user_namespace *: USER_NS_INIT_INO, \
+ struct uts_namespace *: UTS_NS_INIT_INO)
+
+#define ns_init_ns(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: &init_cgroup_ns, \
+ struct ipc_namespace *: &init_ipc_ns, \
+ struct mnt_namespace *: &init_mnt_ns, \
+ struct net *: &init_net, \
+ struct pid_namespace *: &init_pid_ns, \
+ struct time_namespace *: &init_time_ns, \
+ struct user_namespace *: &init_user_ns, \
+ struct uts_namespace *: &init_uts_ns)
+
+#define ns_init_id(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CGROUP_NS_INIT_ID, \
+ struct ipc_namespace *: IPC_NS_INIT_ID, \
+ struct mnt_namespace *: MNT_NS_INIT_ID, \
+ struct net *: NET_NS_INIT_ID, \
+ struct pid_namespace *: PID_NS_INIT_ID, \
+ struct time_namespace *: TIME_NS_INIT_ID, \
+ struct user_namespace *: USER_NS_INIT_ID, \
+ struct uts_namespace *: UTS_NS_INIT_ID)
+
+#define to_ns_operations(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
+ struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
+ struct mnt_namespace *: &mntns_operations, \
+ struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
+ struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
+ struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
+ struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
+ struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
+
+#define ns_common_type(__ns) \
+ _Generic((__ns), \
+ struct cgroup_namespace *: CLONE_NEWCGROUP, \
+ struct ipc_namespace *: CLONE_NEWIPC, \
+ struct mnt_namespace *: CLONE_NEWNS, \
+ struct net *: CLONE_NEWNET, \
+ struct pid_namespace *: CLONE_NEWPID, \
+ struct time_namespace *: CLONE_NEWTIME, \
+ struct user_namespace *: CLONE_NEWUSER, \
+ struct uts_namespace *: CLONE_NEWUTS)
+
+#endif /* _LINUX_NS_COMMON_TYPES_H */
diff --git a/include/linux/ns/nstree_types.h b/include/linux/ns/nstree_types.h
new file mode 100644
index 000000000000..2fb28ee31efb
--- /dev/null
+++ b/include/linux/ns/nstree_types.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
+#ifndef _LINUX_NSTREE_TYPES_H
+#define _LINUX_NSTREE_TYPES_H
+
+#include <linux/rbtree.h>
+#include <linux/list.h>
+
+/**
+ * struct ns_tree_root - Root of a namespace tree
+ * @ns_rb: Red-black tree root for efficient lookups
+ * @ns_list_head: List head for sequential iteration
+ *
+ * Each namespace tree maintains both an rbtree (for O(log n) lookups)
+ * and a list (for efficient sequential iteration). The list is kept in
+ * the same sorted order as the rbtree.
+ */
+struct ns_tree_root {
+ struct rb_root ns_rb;
+ struct list_head ns_list_head;
+};
+
+/**
+ * struct ns_tree_node - Node in a namespace tree
+ * @ns_node: Red-black tree node
+ * @ns_list_entry: List entry for sequential iteration
+ *
+ * Represents a namespace's position in a tree. Each namespace has
+ * multiple tree nodes for different trees (unified, per-type, owner).
+ */
+struct ns_tree_node {
+ struct rb_node ns_node;
+ struct list_head ns_list_entry;
+};
+
+/**
+ * struct ns_tree - Namespace tree nodes and active reference count
+ * @ns_id: Unique namespace identifier
+ * @__ns_ref_active: Active reference count (do not use directly)
+ * @ns_unified_node: Node in the global namespace tree
+ * @ns_tree_node: Node in the per-type namespace tree
+ * @ns_owner_node: Node in the owner namespace's tree of owned namespaces
+ * @ns_owner_root: Root of the tree of namespaces owned by this namespace
+ * (only used when this namespace is an owner)
+ */
+struct ns_tree {
+ u64 ns_id;
+ atomic_t __ns_ref_active;
+ struct ns_tree_node ns_unified_node;
+ struct ns_tree_node ns_tree_node;
+ struct ns_tree_node ns_owner_node;
+ struct ns_tree_root ns_owner_root;
+};
+
+#endif /* _LINUX_NSTREE_TYPES_H */
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 66ea09b48377..136f6a322e53 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -2,139 +2,18 @@
#ifndef _LINUX_NS_COMMON_H
#define _LINUX_NS_COMMON_H
+#include <linux/ns/ns_common_types.h>
#include <linux/refcount.h>
-#include <linux/rbtree.h>
#include <linux/vfsdebug.h>
#include <uapi/linux/sched.h>
#include <uapi/linux/nsfs.h>
-struct proc_ns_operations;
-
-struct cgroup_namespace;
-struct ipc_namespace;
-struct mnt_namespace;
-struct net;
-struct pid_namespace;
-struct time_namespace;
-struct user_namespace;
-struct uts_namespace;
-
-extern struct cgroup_namespace init_cgroup_ns;
-extern struct ipc_namespace init_ipc_ns;
-extern struct mnt_namespace init_mnt_ns;
-extern struct net init_net;
-extern struct pid_namespace init_pid_ns;
-extern struct time_namespace init_time_ns;
-extern struct user_namespace init_user_ns;
-extern struct uts_namespace init_uts_ns;
-
-extern const struct proc_ns_operations netns_operations;
-extern const struct proc_ns_operations utsns_operations;
-extern const struct proc_ns_operations ipcns_operations;
-extern const struct proc_ns_operations pidns_operations;
-extern const struct proc_ns_operations pidns_for_children_operations;
-extern const struct proc_ns_operations userns_operations;
-extern const struct proc_ns_operations mntns_operations;
-extern const struct proc_ns_operations cgroupns_operations;
-extern const struct proc_ns_operations timens_operations;
-extern const struct proc_ns_operations timens_for_children_operations;
-
-/*
- * Namespace lifetimes are managed via a two-tier reference counting model:
- *
- * (1) __ns_ref (refcount_t): Main reference count tracking memory
- * lifetime. Controls when the namespace structure itself is freed.
- * It also pins the namespace on the namespace trees whereas (2)
- * only regulates their visibility to userspace.
- *
- * (2) __ns_ref_active (atomic_t): Reference count tracking active users.
- * Controls visibility of the namespace in the namespace trees.
- * Any live task that uses the namespace (via nsproxy or cred) holds
- * an active reference. Any open file descriptor or bind-mount of
- * the namespace holds an active reference. Once all tasks have
- * called exited their namespaces and all file descriptors and
- * bind-mounts have been released the active reference count drops
- * to zero and the namespace becomes inactive. IOW, the namespace
- * cannot be listed or opened via file handles anymore.
- *
- * Note that it is valid to transition from active to inactive and
- * back from inactive to active e.g., when resurrecting an inactive
- * namespace tree via the SIOCGSKNS ioctl().
- *
- * Relationship and lifecycle states:
- *
- * - Active (__ns_ref_active > 0):
- * Namespace is actively used and visible to userspace. The namespace
- * can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
- * handles, or discovered via listns().
- *
- * - Inactive (__ns_ref_active == 0, __ns_ref > 0):
- * No tasks are actively using the namespace and it isn't pinned by
- * any bind-mounts or open file descriptors anymore. But the namespace
- * is still kept alive by internal references. For example, the user
- * namespace could be pinned by an open file through file->f_cred
- * references when one of the now defunct tasks had opened a file and
- * handed the file descriptor off to another process via a UNIX
- * sockets. Such references keep the namespace structure alive through
- * __ns_ref but will not hold an active reference.
- *
- * - Destroyed (__ns_ref == 0):
- * No references remain. The namespace is removed from the tree and freed.
- *
- * State transitions:
- *
- * Active -> Inactive:
- * When the last task using the namespace exits it drops its active
- * references to all namespaces. However, user and pid namespaces
- * remain accessible until the task has been reaped.
- *
- * Inactive -> Active:
- * An inactive namespace tree might be resurrected due to e.g., the
- * SIOCGSKNS ioctl() on a socket.
- *
- * Inactive -> Destroyed:
- * When __ns_ref drops to zero the namespace is removed from the
- * namespaces trees and the memory is freed (after RCU grace period).
- *
- * Initial namespaces:
- * Boot-time namespaces (init_net, init_pid_ns, etc.) start with
- * __ns_ref_active = 1 and remain active forever.
- */
-struct ns_common {
- u32 ns_type;
- struct dentry *stashed;
- const struct proc_ns_operations *ops;
- unsigned int inum;
- refcount_t __ns_ref; /* do not use directly */
- union {
- struct {
- u64 ns_id;
- struct /* global namespace rbtree and list */ {
- struct rb_node ns_unified_tree_node;
- struct list_head ns_unified_list_node;
- };
- struct /* per type rbtree and list */ {
- struct rb_node ns_tree_node;
- struct list_head ns_list_node;
- };
- struct /* namespace ownership rbtree and list */ {
- struct rb_root ns_owner_tree; /* rbtree of namespaces owned by this namespace */
- struct list_head ns_owner; /* list of namespaces owned by this namespace */
- struct rb_node ns_owner_tree_node; /* node in the owner namespace's rbtree */
- struct list_head ns_owner_entry; /* node in the owner namespace's ns_owned list */
- };
- atomic_t __ns_ref_active; /* do not use directly */
- };
- struct rcu_head ns_rcu;
- };
-};
-
bool is_current_namespace(struct ns_common *ns);
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
void __ns_common_free(struct ns_common *ns);
struct ns_common *__must_check ns_owner(struct ns_common *ns);
-static __always_inline bool is_initial_namespace(struct ns_common *ns)
+static __always_inline bool is_ns_init_inum(const struct ns_common *ns)
{
VFS_WARN_ON_ONCE(ns->inum == 0);
return unlikely(in_range(ns->inum, MNT_NS_INIT_INO,
@@ -147,93 +26,19 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns)
return ns->ns_id <= NS_LAST_INIT_ID;
}
-#define to_ns_common(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &(__ns)->ns, \
- const struct cgroup_namespace *: &(__ns)->ns, \
- struct ipc_namespace *: &(__ns)->ns, \
- const struct ipc_namespace *: &(__ns)->ns, \
- struct mnt_namespace *: &(__ns)->ns, \
- const struct mnt_namespace *: &(__ns)->ns, \
- struct net *: &(__ns)->ns, \
- const struct net *: &(__ns)->ns, \
- struct pid_namespace *: &(__ns)->ns, \
- const struct pid_namespace *: &(__ns)->ns, \
- struct time_namespace *: &(__ns)->ns, \
- const struct time_namespace *: &(__ns)->ns, \
- struct user_namespace *: &(__ns)->ns, \
- const struct user_namespace *: &(__ns)->ns, \
- struct uts_namespace *: &(__ns)->ns, \
- const struct uts_namespace *: &(__ns)->ns)
-
-#define ns_init_inum(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
- struct ipc_namespace *: IPC_NS_INIT_INO, \
- struct mnt_namespace *: MNT_NS_INIT_INO, \
- struct net *: NET_NS_INIT_INO, \
- struct pid_namespace *: PID_NS_INIT_INO, \
- struct time_namespace *: TIME_NS_INIT_INO, \
- struct user_namespace *: USER_NS_INIT_INO, \
- struct uts_namespace *: UTS_NS_INIT_INO)
-
-#define ns_init_ns(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: &init_cgroup_ns, \
- struct ipc_namespace *: &init_ipc_ns, \
- struct mnt_namespace *: &init_mnt_ns, \
- struct net *: &init_net, \
- struct pid_namespace *: &init_pid_ns, \
- struct time_namespace *: &init_time_ns, \
- struct user_namespace *: &init_user_ns, \
- struct uts_namespace *: &init_uts_ns)
-
-#define ns_init_id(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: CGROUP_NS_INIT_ID, \
- struct ipc_namespace *: IPC_NS_INIT_ID, \
- struct mnt_namespace *: MNT_NS_INIT_ID, \
- struct net *: NET_NS_INIT_ID, \
- struct pid_namespace *: PID_NS_INIT_ID, \
- struct time_namespace *: TIME_NS_INIT_ID, \
- struct user_namespace *: USER_NS_INIT_ID, \
- struct uts_namespace *: UTS_NS_INIT_ID)
-
-#define to_ns_operations(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
- struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
- struct mnt_namespace *: &mntns_operations, \
- struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
- struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
- struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
- struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
- struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
-
-#define ns_common_type(__ns) \
- _Generic((__ns), \
- struct cgroup_namespace *: CLONE_NEWCGROUP, \
- struct ipc_namespace *: CLONE_NEWIPC, \
- struct mnt_namespace *: CLONE_NEWNS, \
- struct net *: CLONE_NEWNET, \
- struct pid_namespace *: CLONE_NEWPID, \
- struct time_namespace *: CLONE_NEWTIME, \
- struct user_namespace *: CLONE_NEWUSER, \
- struct uts_namespace *: CLONE_NEWUTS)
-
-#define NS_COMMON_INIT(nsname, refs) \
-{ \
- .ns_type = ns_common_type(&nsname), \
- .ns_id = ns_init_id(&nsname), \
- .inum = ns_init_inum(&nsname), \
- .ops = to_ns_operations(&nsname), \
- .stashed = NULL, \
- .__ns_ref = REFCOUNT_INIT(refs), \
- .__ns_ref_active = ATOMIC_INIT(1), \
- .ns_list_node = LIST_HEAD_INIT(nsname.ns.ns_list_node), \
- .ns_owner_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_entry), \
- .ns_owner = LIST_HEAD_INIT(nsname.ns.ns_owner), \
- .ns_unified_list_node = LIST_HEAD_INIT(nsname.ns.ns_unified_list_node), \
+#define NS_COMMON_INIT(nsname) \
+{ \
+ .ns_type = ns_common_type(&nsname), \
+ .ns_id = ns_init_id(&nsname), \
+ .inum = ns_init_inum(&nsname), \
+ .ops = to_ns_operations(&nsname), \
+ .stashed = NULL, \
+ .__ns_ref = REFCOUNT_INIT(1), \
+ .__ns_ref_active = ATOMIC_INIT(1), \
+ .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \
+ .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \
+ .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \
+ .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \
}
#define ns_common_init(__ns) \
@@ -255,8 +60,18 @@ static __always_inline __must_check int __ns_ref_active_read(const struct ns_com
return atomic_read(&ns->__ns_ref_active);
}
+static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns)
+{
+ return refcount_read(&ns->__ns_ref);
+}
+
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
{
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return false;
+ }
if (refcount_dec_and_test(&ns->__ns_ref)) {
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
return true;
@@ -266,23 +81,44 @@ static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
{
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return true;
+ }
if (refcount_inc_not_zero(&ns->__ns_ref))
return true;
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
return false;
}
-static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns)
+static __always_inline void __ns_ref_inc(struct ns_common *ns)
{
- return refcount_read(&ns->__ns_ref);
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return;
+ }
+ refcount_inc(&ns->__ns_ref);
+}
+
+static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns,
+ spinlock_t *ns_lock)
+{
+ if (is_ns_init_id(ns)) {
+ VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
+ VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
+ return false;
+ }
+ return refcount_dec_and_lock(&ns->__ns_ref, ns_lock);
}
#define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns)))
-#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
+#define ns_ref_inc(__ns) __ns_ref_inc(to_ns_common((__ns)))
#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
-#define ns_ref_put_and_lock(__ns, __lock) \
- refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
+#define ns_ref_put_and_lock(__ns, __ns_lock) \
+ __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock)
#define ns_ref_active_read(__ns) \
((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0)
diff --git a/include/linux/nstree.h b/include/linux/nstree.h
index 38674c6fa4f7..175e4625bfa6 100644
--- a/include/linux/nstree.h
+++ b/include/linux/nstree.h
@@ -3,7 +3,7 @@
#ifndef _LINUX_NSTREE_H
#define _LINUX_NSTREE_H
-#include <linux/ns_common.h>
+#include <linux/ns/nstree_types.h>
#include <linux/nsproxy.h>
#include <linux/rbtree.h>
#include <linux/seqlock.h>
@@ -11,14 +11,24 @@
#include <linux/cookie.h>
#include <uapi/linux/nsfs.h>
-extern struct ns_tree cgroup_ns_tree;
-extern struct ns_tree ipc_ns_tree;
-extern struct ns_tree mnt_ns_tree;
-extern struct ns_tree net_ns_tree;
-extern struct ns_tree pid_ns_tree;
-extern struct ns_tree time_ns_tree;
-extern struct ns_tree user_ns_tree;
-extern struct ns_tree uts_ns_tree;
+struct ns_common;
+
+extern struct ns_tree_root cgroup_ns_tree;
+extern struct ns_tree_root ipc_ns_tree;
+extern struct ns_tree_root mnt_ns_tree;
+extern struct ns_tree_root net_ns_tree;
+extern struct ns_tree_root pid_ns_tree;
+extern struct ns_tree_root time_ns_tree;
+extern struct ns_tree_root user_ns_tree;
+extern struct ns_tree_root uts_ns_tree;
+
+void ns_tree_node_init(struct ns_tree_node *node);
+void ns_tree_root_init(struct ns_tree_root *root);
+bool ns_tree_node_empty(const struct ns_tree_node *node);
+struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
+ struct ns_tree_root *root,
+ int (*cmp)(struct rb_node *, const struct rb_node *));
+void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root);
#define to_ns_tree(__ns) \
_Generic((__ns), \
@@ -36,14 +46,14 @@ extern struct ns_tree uts_ns_tree;
(((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0))
u64 __ns_tree_gen_id(struct ns_common *ns, u64 id);
-void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
-void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree);
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree);
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
- struct ns_tree *ns_tree,
+ struct ns_tree_root *ns_tree,
bool previous);
-static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree, u64 id)
+static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_tree, u64 id)
{
__ns_tree_gen_id(ns, id);
__ns_tree_add_raw(ns, ns_tree);
@@ -81,6 +91,6 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree,
#define ns_tree_adjoined_rcu(__ns, __previous) \
__ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
-#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
+#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node.ns_node))
#endif /* _LINUX_NSTREE_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 445517a72ad0..0e7ae12c96d2 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -61,8 +61,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
- if (ns != &init_pid_ns)
- ns_ref_inc(ns);
+ ns_ref_inc(ns);
return ns;
}
diff --git a/init/version-timestamp.c b/init/version-timestamp.c
index 56ded64fdfe4..375726e05f69 100644
--- a/init/version-timestamp.c
+++ b/init/version-timestamp.c
@@ -8,7 +8,7 @@
#include <linux/utsname.h>
struct uts_namespace init_uts_ns = {
- .ns = NS_COMMON_INIT(init_uts_ns, 2),
+ .ns = NS_COMMON_INIT(init_uts_ns),
.name = {
.sysname = UTS_SYSNAME,
.nodename = UTS_NODENAME,
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 55a908ec0674..e28f0cecb2ec 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -27,7 +27,7 @@ DEFINE_SPINLOCK(mq_lock);
* and not CONFIG_IPC_NS.
*/
struct ipc_namespace init_ipc_ns = {
- .ns = NS_COMMON_INIT(init_ipc_ns, 1),
+ .ns = NS_COMMON_INIT(init_ipc_ns),
.user_ns = &init_user_ns,
};
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 59b12fcb40bd..c0dbfdd9015f 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -66,6 +66,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (err)
goto fail_free;
+ ns_tree_gen_id(ns);
ns->user_ns = get_user_ns(user_ns);
ns->ucounts = ucounts;
@@ -86,7 +87,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
sem_init_ns(ns);
shm_init_ns(ns);
- ns_tree_add(ns);
+ ns_tree_add_raw(ns);
return ns;
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 20ab84b2cf4e..2bf3951ca88f 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -250,7 +250,7 @@ bool cgroup_enable_per_threadgroup_rwsem __read_mostly;
/* cgroup namespace for init task */
struct cgroup_namespace init_cgroup_ns = {
- .ns = NS_COMMON_INIT(init_cgroup_ns, 2),
+ .ns = NS_COMMON_INIT(init_cgroup_ns),
.user_ns = &init_user_ns,
.root_cset = &init_css_set,
};
diff --git a/kernel/nscommon.c b/kernel/nscommon.c
index c910b979e433..bdc3c86231d3 100644
--- a/kernel/nscommon.c
+++ b/kernel/nscommon.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
#include <linux/ns_common.h>
+#include <linux/nstree.h>
#include <linux/proc_ns.h>
#include <linux/user_namespace.h>
#include <linux/vfsdebug.h>
@@ -61,14 +62,10 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope
ns->ops = ops;
ns->ns_id = 0;
ns->ns_type = ns_type;
- RB_CLEAR_NODE(&ns->ns_tree_node);
- RB_CLEAR_NODE(&ns->ns_unified_tree_node);
- RB_CLEAR_NODE(&ns->ns_owner_tree_node);
- INIT_LIST_HEAD(&ns->ns_list_node);
- INIT_LIST_HEAD(&ns->ns_unified_list_node);
- ns->ns_owner_tree = RB_ROOT;
- INIT_LIST_HEAD(&ns->ns_owner);
- INIT_LIST_HEAD(&ns->ns_owner_entry);
+ ns_tree_node_init(&ns->ns_tree_node);
+ ns_tree_node_init(&ns->ns_unified_node);
+ ns_tree_node_init(&ns->ns_owner_node);
+ ns_tree_root_init(&ns->ns_owner_root);
#ifdef CONFIG_DEBUG_VFS
ns_debug(ns, ops);
@@ -85,7 +82,7 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope
* active use (installed in nsproxy) and decremented when all
* active uses are gone. Initial namespaces are always active.
*/
- if (is_initial_namespace(ns))
+ if (is_ns_init_inum(ns))
atomic_set(&ns->__ns_ref_active, 1);
else
atomic_set(&ns->__ns_ref_active, 0);
diff --git a/kernel/nstree.c b/kernel/nstree.c
index 97404fb90749..5270e0fa67a2 100644
--- a/kernel/nstree.c
+++ b/kernel/nstree.c
@@ -9,89 +9,165 @@
#include <linux/user_namespace.h>
static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
-static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */
-static LIST_HEAD(ns_unified_list); /* protected by ns_tree_lock */
-/**
- * struct ns_tree - Namespace tree
- * @ns_tree: Rbtree of namespaces of a particular type
- * @ns_list: Sequentially walkable list of all namespaces of this type
- * @type: type of namespaces in this tree
- */
-struct ns_tree {
- struct rb_root ns_tree;
- struct list_head ns_list;
- int type;
+DEFINE_LOCK_GUARD_0(ns_tree_writer,
+ write_seqlock(&ns_tree_lock),
+ write_sequnlock(&ns_tree_lock))
+
+DEFINE_LOCK_GUARD_0(ns_tree_locked_reader,
+ read_seqlock_excl(&ns_tree_lock),
+ read_sequnlock_excl(&ns_tree_lock))
+
+static struct ns_tree_root ns_unified_root = { /* protected by ns_tree_lock */
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(ns_unified_root.ns_list_head),
};
-struct ns_tree mnt_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list),
- .type = CLONE_NEWNS,
+struct ns_tree_root mnt_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(mnt_ns_tree.ns_list_head),
};
-struct ns_tree net_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list),
- .type = CLONE_NEWNET,
+struct ns_tree_root net_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(net_ns_tree.ns_list_head),
};
EXPORT_SYMBOL_GPL(net_ns_tree);
-struct ns_tree uts_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list),
- .type = CLONE_NEWUTS,
+struct ns_tree_root uts_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(uts_ns_tree.ns_list_head),
};
-struct ns_tree user_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list),
- .type = CLONE_NEWUSER,
+struct ns_tree_root user_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(user_ns_tree.ns_list_head),
};
-struct ns_tree ipc_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list),
- .type = CLONE_NEWIPC,
+struct ns_tree_root ipc_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(ipc_ns_tree.ns_list_head),
};
-struct ns_tree pid_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list),
- .type = CLONE_NEWPID,
+struct ns_tree_root pid_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(pid_ns_tree.ns_list_head),
};
-struct ns_tree cgroup_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list),
- .type = CLONE_NEWCGROUP,
+struct ns_tree_root cgroup_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(cgroup_ns_tree.ns_list_head),
};
-struct ns_tree time_ns_tree = {
- .ns_tree = RB_ROOT,
- .ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list),
- .type = CLONE_NEWTIME,
+struct ns_tree_root time_ns_tree = {
+ .ns_rb = RB_ROOT,
+ .ns_list_head = LIST_HEAD_INIT(time_ns_tree.ns_list_head),
};
+/**
+ * ns_tree_node_init - Initialize a namespace tree node
+ * @node: The node to initialize
+ *
+ * Initializes both the rbtree node and list entry.
+ */
+void ns_tree_node_init(struct ns_tree_node *node)
+{
+ RB_CLEAR_NODE(&node->ns_node);
+ INIT_LIST_HEAD(&node->ns_list_entry);
+}
+
+/**
+ * ns_tree_root_init - Initialize a namespace tree root
+ * @root: The root to initialize
+ *
+ * Initializes both the rbtree root and list head.
+ */
+void ns_tree_root_init(struct ns_tree_root *root)
+{
+ root->ns_rb = RB_ROOT;
+ INIT_LIST_HEAD(&root->ns_list_head);
+}
+
+/**
+ * ns_tree_node_empty - Check if a namespace tree node is empty
+ * @node: The node to check
+ *
+ * Returns true if the node is not in any tree.
+ */
+bool ns_tree_node_empty(const struct ns_tree_node *node)
+{
+ return RB_EMPTY_NODE(&node->ns_node);
+}
+
+/**
+ * ns_tree_node_add - Add a node to a namespace tree
+ * @node: The node to add
+ * @root: The tree root to add to
+ * @cmp: Comparison function for rbtree insertion
+ *
+ * Adds the node to both the rbtree and the list, maintaining sorted order.
+ * The list is maintained in the same order as the rbtree to enable efficient
+ * iteration.
+ *
+ * Returns: NULL if insertion succeeded, existing node if duplicate found
+ */
+struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
+ struct ns_tree_root *root,
+ int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+ struct rb_node *ret, *prev;
+
+ /* Add to rbtree */
+ ret = rb_find_add_rcu(&node->ns_node, &root->ns_rb, cmp);
+
+ /* Add to list in sorted order */
+ prev = rb_prev(&node->ns_node);
+ if (!prev) {
+ /* No previous node, add at head */
+ list_add_rcu(&node->ns_list_entry, &root->ns_list_head);
+ } else {
+ /* Add after previous node */
+ struct ns_tree_node *prev_node;
+ prev_node = rb_entry(prev, struct ns_tree_node, ns_node);
+ list_add_rcu(&node->ns_list_entry, &prev_node->ns_list_entry);
+ }
+
+ return ret;
+}
+
+/**
+ * ns_tree_node_del - Remove a node from a namespace tree
+ * @node: The node to remove
+ * @root: The tree root to remove from
+ *
+ * Removes the node from both the rbtree and the list atomically.
+ */
+void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root)
+{
+ rb_erase(&node->ns_node, &root->ns_rb);
+ RB_CLEAR_NODE(&node->ns_node);
+ list_bidir_del_rcu(&node->ns_list_entry);
+}
+
static inline struct ns_common *node_to_ns(const struct rb_node *node)
{
if (!node)
return NULL;
- return rb_entry(node, struct ns_common, ns_tree_node);
+ return rb_entry(node, struct ns_common, ns_tree_node.ns_node);
}
static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
{
if (!node)
return NULL;
- return rb_entry(node, struct ns_common, ns_unified_tree_node);
+ return rb_entry(node, struct ns_common, ns_unified_node.ns_node);
}
static inline struct ns_common *node_to_ns_owner(const struct rb_node *node)
{
if (!node)
return NULL;
- return rb_entry(node, struct ns_common, ns_owner_tree_node);
+ return rb_entry(node, struct ns_common, ns_owner_node.ns_node);
}
static int ns_id_cmp(u64 id_a, u64 id_b)
@@ -118,35 +194,22 @@ static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b)
return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id);
}
-void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
+void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree)
{
- struct rb_node *node, *prev;
+ struct rb_node *node;
const struct proc_ns_operations *ops = ns->ops;
VFS_WARN_ON_ONCE(!ns->ns_id);
- VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
- write_seqlock(&ns_tree_lock);
+ guard(ns_tree_writer)();
- node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp);
- /*
- * If there's no previous entry simply add it after the
- * head and if there is add it after the previous entry.
- */
- prev = rb_prev(&ns->ns_tree_node);
- if (!prev)
- list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list);
- else
- list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);
+ /* Add to per-type tree and list */
+ node = ns_tree_node_add(&ns->ns_tree_node, ns_tree, ns_cmp);
/* Add to unified tree and list */
- rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified);
- prev = rb_prev(&ns->ns_unified_tree_node);
- if (!prev)
- list_add_rcu(&ns->ns_unified_list_node, &ns_unified_list);
- else
- list_add_rcu(&ns->ns_unified_list_node, &node_to_ns_unified(prev)->ns_unified_list_node);
+ ns_tree_node_add(&ns->ns_unified_node, &ns_unified_root, ns_cmp_unified);
+ /* Add to owner's tree if applicable */
if (ops) {
struct user_namespace *user_ns;
@@ -156,55 +219,40 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
struct ns_common *owner = &user_ns->ns;
VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
- /* Insert into owner's rbtree */
- rb_find_add_rcu(&ns->ns_owner_tree_node, &owner->ns_owner_tree, ns_cmp_owner);
-
- /* Insert into owner's list in sorted order */
- prev = rb_prev(&ns->ns_owner_tree_node);
- if (!prev)
- list_add_rcu(&ns->ns_owner_entry, &owner->ns_owner);
- else
- list_add_rcu(&ns->ns_owner_entry, &node_to_ns_owner(prev)->ns_owner_entry);
+ /* Insert into owner's tree and list */
+ ns_tree_node_add(&ns->ns_owner_node, &owner->ns_owner_root, ns_cmp_owner);
} else {
/* Only the initial user namespace doesn't have an owner. */
VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns));
}
}
- write_sequnlock(&ns_tree_lock);
VFS_WARN_ON_ONCE(node);
}
-void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
+void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree)
{
const struct proc_ns_operations *ops = ns->ops;
struct user_namespace *user_ns;
- VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));
- VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
- VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
+ VFS_WARN_ON_ONCE(ns_tree_node_empty(&ns->ns_tree_node));
+ VFS_WARN_ON_ONCE(list_empty(&ns->ns_tree_node.ns_list_entry));
write_seqlock(&ns_tree_lock);
- rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
- RB_CLEAR_NODE(&ns->ns_tree_node);
-
- list_bidir_del_rcu(&ns->ns_list_node);
- rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree);
- RB_CLEAR_NODE(&ns->ns_unified_tree_node);
+ /* Remove from per-type tree and list */
+ ns_tree_node_del(&ns->ns_tree_node, ns_tree);
- list_bidir_del_rcu(&ns->ns_unified_list_node);
+ /* Remove from unified tree and list */
+ ns_tree_node_del(&ns->ns_unified_node, &ns_unified_root);
- /* Remove from owner's rbtree if this namespace has an owner */
+ /* Remove from owner's tree if applicable */
if (ops) {
user_ns = ops->owner(ns);
if (user_ns) {
struct ns_common *owner = &user_ns->ns;
- rb_erase(&ns->ns_owner_tree_node, &owner->ns_owner_tree);
- RB_CLEAR_NODE(&ns->ns_owner_tree_node);
+ ns_tree_node_del(&ns->ns_owner_node, &owner->ns_owner_root);
}
-
- list_bidir_del_rcu(&ns->ns_owner_entry);
}
write_sequnlock(&ns_tree_lock);
@@ -235,7 +283,7 @@ static int ns_find_unified(const void *key, const struct rb_node *node)
return 0;
}
-static struct ns_tree *ns_tree_from_type(int ns_type)
+static struct ns_tree_root *ns_tree_from_type(int ns_type)
{
switch (ns_type) {
case CLONE_NEWCGROUP:
@@ -266,7 +314,7 @@ static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
do {
seq = read_seqbegin(&ns_tree_lock);
- node = rb_find_rcu(&ns_id, &ns_unified_tree, ns_find_unified);
+ node = rb_find_rcu(&ns_id, &ns_unified_root.ns_rb, ns_find_unified);
if (node)
break;
} while (read_seqretry(&ns_tree_lock, seq));
@@ -276,7 +324,7 @@ static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
{
- struct ns_tree *ns_tree;
+ struct ns_tree_root *ns_tree;
struct rb_node *node;
unsigned int seq;
@@ -286,7 +334,7 @@ static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
do {
seq = read_seqbegin(&ns_tree_lock);
- node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find);
+ node = rb_find_rcu(&ns_id, &ns_tree->ns_rb, ns_find);
if (node)
break;
} while (read_seqretry(&ns_tree_lock, seq));
@@ -314,22 +362,20 @@ struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
* there is no next/previous namespace, -ENOENT is returned.
*/
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
- struct ns_tree *ns_tree, bool previous)
+ struct ns_tree_root *ns_tree, bool previous)
{
struct list_head *list;
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
if (previous)
- list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node));
+ list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_tree_node.ns_list_entry));
else
- list = rcu_dereference(list_next_rcu(&ns->ns_list_node));
- if (list_is_head(list, &ns_tree->ns_list))
+ list = rcu_dereference(list_next_rcu(&ns->ns_tree_node.ns_list_entry));
+ if (list_is_head(list, &ns_tree->ns_list_head))
return ERR_PTR(-ENOENT);
- VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ns_type != ns_tree->type);
-
- return list_entry_rcu(list, struct ns_common, ns_list_node);
+ return list_entry_rcu(list, struct ns_common, ns_tree_node.ns_list_entry);
}
/**
@@ -422,9 +468,9 @@ static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner)
VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
- read_seqlock_excl(&ns_tree_lock);
- node = owner->ns_owner_tree.rb_node;
+ guard(ns_tree_locked_reader)();
+ node = owner->ns_owner_root.ns_rb.rb_node;
while (node) {
struct ns_common *ns;
@@ -441,7 +487,6 @@ static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner)
if (ret)
ret = ns_get_unless_inactive(ret);
- read_sequnlock_excl(&ns_tree_lock);
return ret;
}
@@ -553,18 +598,21 @@ static ssize_t do_listns_userns(struct klistns *kls)
}
ret = 0;
- head = &to_ns_common(kls->user_ns)->ns_owner;
+ head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
rcu_read_lock();
if (!first_ns)
- first_ns = list_entry_rcu(head->next, typeof(*ns), ns_owner_entry);
+ first_ns = list_entry_rcu(head->next, typeof(*first_ns), ns_owner_node.ns_list_entry);
- for (ns = first_ns; &ns->ns_owner_entry != head && nr_ns_ids;
- ns = list_entry_rcu(ns->ns_owner_entry.next, typeof(*ns), ns_owner_entry)) {
+ ns = first_ns;
+ list_for_each_entry_from_rcu(ns, head, ns_owner_node.ns_list_entry) {
struct ns_common *valid;
+ if (!nr_ns_ids)
+ break;
+
valid = legitimize_ns(kls, ns);
if (!valid)
continue;
@@ -597,7 +645,7 @@ static ssize_t do_listns_userns(struct klistns *kls)
static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
{
struct ns_common *ret = NULL;
- struct ns_tree *ns_tree = NULL;
+ struct ns_tree_root *ns_tree = NULL;
struct rb_node *node;
if (ns_type) {
@@ -606,11 +654,12 @@ static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
return NULL;
}
- read_seqlock_excl(&ns_tree_lock);
+ guard(ns_tree_locked_reader)();
+
if (ns_tree)
- node = ns_tree->ns_tree.rb_node;
+ node = ns_tree->ns_rb.rb_node;
else
- node = ns_unified_tree.rb_node;
+ node = ns_unified_root.ns_rb.rb_node;
while (node) {
struct ns_common *ns;
@@ -635,33 +684,32 @@ static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
if (ret)
ret = ns_get_unless_inactive(ret);
- read_sequnlock_excl(&ns_tree_lock);
return ret;
}
static inline struct ns_common *first_ns_common(const struct list_head *head,
- struct ns_tree *ns_tree)
+ struct ns_tree_root *ns_tree)
{
if (ns_tree)
- return list_entry_rcu(head->next, struct ns_common, ns_list_node);
- return list_entry_rcu(head->next, struct ns_common, ns_unified_list_node);
+ return list_entry_rcu(head->next, struct ns_common, ns_tree_node.ns_list_entry);
+ return list_entry_rcu(head->next, struct ns_common, ns_unified_node.ns_list_entry);
}
static inline struct ns_common *next_ns_common(struct ns_common *ns,
- struct ns_tree *ns_tree)
+ struct ns_tree_root *ns_tree)
{
if (ns_tree)
- return list_entry_rcu(ns->ns_list_node.next, struct ns_common, ns_list_node);
- return list_entry_rcu(ns->ns_unified_list_node.next, struct ns_common, ns_unified_list_node);
+ return list_entry_rcu(ns->ns_tree_node.ns_list_entry.next, struct ns_common, ns_tree_node.ns_list_entry);
+ return list_entry_rcu(ns->ns_unified_node.ns_list_entry.next, struct ns_common, ns_unified_node.ns_list_entry);
}
static inline bool ns_common_is_head(struct ns_common *ns,
const struct list_head *head,
- struct ns_tree *ns_tree)
+ struct ns_tree_root *ns_tree)
{
if (ns_tree)
- return &ns->ns_list_node == head;
- return &ns->ns_unified_list_node == head;
+ return &ns->ns_tree_node.ns_list_entry == head;
+ return &ns->ns_unified_node.ns_list_entry == head;
}
static ssize_t do_listns(struct klistns *kls)
@@ -669,7 +717,7 @@ static ssize_t do_listns(struct klistns *kls)
u64 __user *ns_ids = kls->uns_ids;
size_t nr_ns_ids = kls->nr_ns_ids;
struct ns_common *ns, *first_ns = NULL, *prev = NULL;
- struct ns_tree *ns_tree = NULL;
+ struct ns_tree_root *ns_tree = NULL;
const struct list_head *head;
u32 ns_type;
ssize_t ret;
@@ -694,9 +742,9 @@ static ssize_t do_listns(struct klistns *kls)
ret = 0;
if (ns_tree)
- head = &ns_tree->ns_list;
+ head = &ns_tree->ns_list_head;
else
- head = &ns_unified_list;
+ head = &ns_unified_root.ns_list_head;
rcu_read_lock();
diff --git a/kernel/pid.c b/kernel/pid.c
index a5a63dc0a491..a31771bc89c1 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -71,7 +71,7 @@ static int pid_max_max = PID_MAX_LIMIT;
* the scheme scales to up to 4 million PIDs, runtime.
*/
struct pid_namespace init_pid_ns = {
- .ns = NS_COMMON_INIT(init_pid_ns, 2),
+ .ns = NS_COMMON_INIT(init_pid_ns),
.idr = IDR_INIT(init_pid_ns.idr),
.pid_allocated = PIDNS_ADDING,
.level = 0,
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 650be58d8d18..e48f5de41361 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -184,7 +184,7 @@ struct pid_namespace *copy_pid_ns(u64 flags,
void put_pid_ns(struct pid_namespace *ns)
{
- if (ns && ns != &init_pid_ns && ns_ref_put(ns))
+ if (ns && ns_ref_put(ns))
schedule_work(&ns->work);
}
EXPORT_SYMBOL_GPL(put_pid_ns);
diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c
index 19911f88e2b8..e76be24b132c 100644
--- a/kernel/time/namespace.c
+++ b/kernel/time/namespace.c
@@ -478,7 +478,7 @@ const struct proc_ns_operations timens_for_children_operations = {
};
struct time_namespace init_time_ns = {
- .ns = NS_COMMON_INIT(init_time_ns, 3),
+ .ns = NS_COMMON_INIT(init_time_ns),
.user_ns = &init_user_ns,
.frozen_offsets = true,
};
diff --git a/kernel/user.c b/kernel/user.c
index 4b3132e786d9..7aef4e679a6a 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(init_binfmt_misc);
* and 1 for... ?
*/
struct user_namespace init_user_ns = {
- .ns = NS_COMMON_INIT(init_user_ns, 3),
+ .ns = NS_COMMON_INIT(init_user_ns),
.uid_map = {
{
.extent[0] = {
diff --git a/tools/testing/selftests/namespaces/nsid_test.c b/tools/testing/selftests/namespaces/nsid_test.c
index e28accd74a57..527ade0a8673 100644
--- a/tools/testing/selftests/namespaces/nsid_test.c
+++ b/tools/testing/selftests/namespaces/nsid_test.c
@@ -6,6 +6,7 @@
#include <libgen.h>
#include <limits.h>
#include <pthread.h>
+#include <signal.h>
#include <string.h>
#include <sys/mount.h>
#include <poll.h>
@@ -14,12 +15,30 @@
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/un.h>
+#include <sys/wait.h>
#include <unistd.h>
#include <linux/fs.h>
#include <linux/limits.h>
#include <linux/nsfs.h>
#include "../kselftest_harness.h"
+/* Fixture for tests that create child processes */
+FIXTURE(nsid) {
+ pid_t child_pid;
+};
+
+FIXTURE_SETUP(nsid) {
+ self->child_pid = 0;
+}
+
+FIXTURE_TEARDOWN(nsid) {
+ /* Clean up any child process that may still be running */
+ if (self->child_pid > 0) {
+ kill(self->child_pid, SIGKILL);
+ waitpid(self->child_pid, NULL, 0);
+ }
+}
+
TEST(nsid_mntns_basic)
{
__u64 mnt_ns_id = 0;
@@ -44,7 +63,7 @@ TEST(nsid_mntns_basic)
close(fd_mntns);
}
-TEST(nsid_mntns_separate)
+TEST_F(nsid, mntns_separate)
{
__u64 parent_mnt_ns_id = 0;
__u64 child_mnt_ns_id = 0;
@@ -90,6 +109,9 @@ TEST(nsid_mntns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -99,8 +121,6 @@ TEST(nsid_mntns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_mntns);
SKIP(return, "No permission to create mount namespace");
}
@@ -123,10 +143,6 @@ TEST(nsid_mntns_separate)
close(fd_parent_mntns);
close(fd_child_mntns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_cgroupns_basic)
@@ -153,7 +169,7 @@ TEST(nsid_cgroupns_basic)
close(fd_cgroupns);
}
-TEST(nsid_cgroupns_separate)
+TEST_F(nsid, cgroupns_separate)
{
__u64 parent_cgroup_ns_id = 0;
__u64 child_cgroup_ns_id = 0;
@@ -199,6 +215,9 @@ TEST(nsid_cgroupns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -208,8 +227,6 @@ TEST(nsid_cgroupns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_cgroupns);
SKIP(return, "No permission to create cgroup namespace");
}
@@ -232,10 +249,6 @@ TEST(nsid_cgroupns_separate)
close(fd_parent_cgroupns);
close(fd_child_cgroupns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_ipcns_basic)
@@ -262,7 +275,7 @@ TEST(nsid_ipcns_basic)
close(fd_ipcns);
}
-TEST(nsid_ipcns_separate)
+TEST_F(nsid, ipcns_separate)
{
__u64 parent_ipc_ns_id = 0;
__u64 child_ipc_ns_id = 0;
@@ -308,6 +321,9 @@ TEST(nsid_ipcns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -317,8 +333,6 @@ TEST(nsid_ipcns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_ipcns);
SKIP(return, "No permission to create IPC namespace");
}
@@ -341,10 +355,6 @@ TEST(nsid_ipcns_separate)
close(fd_parent_ipcns);
close(fd_child_ipcns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_utsns_basic)
@@ -371,7 +381,7 @@ TEST(nsid_utsns_basic)
close(fd_utsns);
}
-TEST(nsid_utsns_separate)
+TEST_F(nsid, utsns_separate)
{
__u64 parent_uts_ns_id = 0;
__u64 child_uts_ns_id = 0;
@@ -417,6 +427,9 @@ TEST(nsid_utsns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -426,8 +439,6 @@ TEST(nsid_utsns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_utsns);
SKIP(return, "No permission to create UTS namespace");
}
@@ -450,10 +461,6 @@ TEST(nsid_utsns_separate)
close(fd_parent_utsns);
close(fd_child_utsns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_userns_basic)
@@ -480,7 +487,7 @@ TEST(nsid_userns_basic)
close(fd_userns);
}
-TEST(nsid_userns_separate)
+TEST_F(nsid, userns_separate)
{
__u64 parent_user_ns_id = 0;
__u64 child_user_ns_id = 0;
@@ -526,6 +533,9 @@ TEST(nsid_userns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -535,8 +545,6 @@ TEST(nsid_userns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_userns);
SKIP(return, "No permission to create user namespace");
}
@@ -559,10 +567,6 @@ TEST(nsid_userns_separate)
close(fd_parent_userns);
close(fd_child_userns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_timens_basic)
@@ -591,7 +595,7 @@ TEST(nsid_timens_basic)
close(fd_timens);
}
-TEST(nsid_timens_separate)
+TEST_F(nsid, timens_separate)
{
__u64 parent_time_ns_id = 0;
__u64 child_time_ns_id = 0;
@@ -652,6 +656,9 @@ TEST(nsid_timens_separate)
}
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -660,8 +667,6 @@ TEST(nsid_timens_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_timens);
close(pipefd[0]);
SKIP(return, "Cannot create time namespace");
@@ -689,10 +694,6 @@ TEST(nsid_timens_separate)
close(fd_parent_timens);
close(fd_child_timens);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_pidns_basic)
@@ -719,7 +720,7 @@ TEST(nsid_pidns_basic)
close(fd_pidns);
}
-TEST(nsid_pidns_separate)
+TEST_F(nsid, pidns_separate)
{
__u64 parent_pid_ns_id = 0;
__u64 child_pid_ns_id = 0;
@@ -776,6 +777,9 @@ TEST(nsid_pidns_separate)
}
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -784,8 +788,6 @@ TEST(nsid_pidns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_pidns);
close(pipefd[0]);
SKIP(return, "No permission to create PID namespace");
@@ -813,10 +815,6 @@ TEST(nsid_pidns_separate)
close(fd_parent_pidns);
close(fd_child_pidns);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST(nsid_netns_basic)
@@ -860,7 +858,7 @@ TEST(nsid_netns_basic)
close(fd_netns);
}
-TEST(nsid_netns_separate)
+TEST_F(nsid, netns_separate)
{
__u64 parent_net_ns_id = 0;
__u64 parent_netns_cookie = 0;
@@ -920,6 +918,9 @@ TEST(nsid_netns_separate)
_exit(0);
}
+ /* Track child for cleanup */
+ self->child_pid = pid;
+
/* Parent process */
close(pipefd[1]);
@@ -929,8 +930,6 @@ TEST(nsid_netns_separate)
if (buf == 'S') {
/* Child couldn't create namespace, skip test */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
close(fd_parent_netns);
close(parent_sock);
SKIP(return, "No permission to create network namespace");
@@ -977,10 +976,6 @@ TEST(nsid_netns_separate)
close(fd_parent_netns);
close(fd_child_netns);
close(parent_sock);
-
- /* Clean up child process */
- kill(pid, SIGTERM);
- waitpid(pid, NULL, 0);
}
TEST_HARNESS_MAIN