summaryrefslogtreecommitdiff
path: root/fs/kernfs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-04-01 11:02:03 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2025-04-01 11:02:03 -0700
commit2cd5769fb0b78b8ef583ab4c0015c2c48d525dac (patch)
treee553ca1215f21488b9d2be5eadef668fb8b50bf7 /fs/kernfs
parentd6b02199cde4b9cb99b311eeab1cdbe23165082c (diff)
parent51d0de7596a458096756c895cfed6bc4a7ecac10 (diff)
Merge tag 'driver-core-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core
Pull driver core updatesk from Greg KH: "Here is the big set of driver core updates for 6.15-rc1. Lots of stuff happened this development cycle, including: - kernfs scaling changes to make it even faster thanks to rcu - bin_attribute constify work in many subsystems - faux bus minor tweaks for the rust bindings - rust binding updates for driver core, pci, and platform busses, making more functionaliy available to rust drivers. These are all due to people actually trying to use the bindings that were in 6.14. - make Rafael and Danilo full co-maintainers of the driver core codebase - other minor fixes and updates" * tag 'driver-core-6.15-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core: (52 commits) rust: platform: require Send for Driver trait implementers rust: pci: require Send for Driver trait implementers rust: platform: impl Send + Sync for platform::Device rust: pci: impl Send + Sync for pci::Device rust: platform: fix unrestricted &mut platform::Device rust: pci: fix unrestricted &mut pci::Device rust: device: implement device context marker rust: pci: use to_result() in enable_device_mem() MAINTAINERS: driver core: mark Rafael and Danilo as co-maintainers rust/kernel/faux: mark Registration methods inline driver core: faux: only create the device if probe() succeeds rust/faux: Add missing parent argument to Registration::new() rust/faux: Drop #[repr(transparent)] from faux::Registration rust: io: fix devres test with new io accessor functions rust: io: rename `io::Io` accessors kernfs: Move dput() outside of the RCU section. efi: rci2: mark bin_attribute as __ro_after_init rapidio: constify 'struct bin_attribute' firmware: qemu_fw_cfg: constify 'struct bin_attribute' powerpc/perf/hv-24x7: Constify 'struct bin_attribute' ...
Diffstat (limited to 'fs/kernfs')
-rw-r--r--fs/kernfs/dir.c209
-rw-r--r--fs/kernfs/file.c6
-rw-r--r--fs/kernfs/kernfs-internal.h37
-rw-r--r--fs/kernfs/mount.c44
-rw-r--r--fs/kernfs/symlink.c30
5 files changed, 210 insertions, 116 deletions
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index d296aad70800..fc70d72c3fe8 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -17,7 +17,7 @@
#include "kernfs-internal.h"
-static DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */
+DEFINE_RWLOCK(kernfs_rename_lock); /* kn->parent and ->name */
/*
* Don't use rename_lock to piggy back on pr_cont_buf. We don't want to
* call pr_cont() while holding rename_lock. Because sometimes pr_cont()
@@ -51,22 +51,14 @@ static bool kernfs_lockdep(struct kernfs_node *kn)
#endif
}
-static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
-{
- if (!kn)
- return strscpy(buf, "(null)", buflen);
-
- return strscpy(buf, kn->parent ? kn->name : "/", buflen);
-}
-
/* kernfs_node_depth - compute depth from @from to @to */
static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
size_t depth = 0;
- while (to->parent && to != from) {
+ while (rcu_dereference(to->__parent) && to != from) {
depth++;
- to = to->parent;
+ to = rcu_dereference(to->__parent);
}
return depth;
}
@@ -84,18 +76,18 @@ static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
db = kernfs_depth(rb->kn, b);
while (da > db) {
- a = a->parent;
+ a = rcu_dereference(a->__parent);
da--;
}
while (db > da) {
- b = b->parent;
+ b = rcu_dereference(b->__parent);
db--;
}
/* worst case b and a will be the same at root */
while (b != a) {
- b = b->parent;
- a = a->parent;
+ b = rcu_dereference(b->__parent);
+ a = rcu_dereference(a->__parent);
}
return a;
@@ -168,10 +160,13 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
/* Calculate how many bytes we need for the rest */
for (i = depth_to - 1; i >= 0; i--) {
+ const char *name;
+
for (kn = kn_to, j = 0; j < i; j++)
- kn = kn->parent;
+ kn = rcu_dereference(kn->__parent);
- len += scnprintf(buf + len, buflen - len, "/%s", kn->name);
+ name = rcu_dereference(kn->name);
+ len += scnprintf(buf + len, buflen - len, "/%s", name);
}
return len;
@@ -195,13 +190,18 @@ static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
*/
int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
{
- unsigned long flags;
- int ret;
+ struct kernfs_node *kn_parent;
- read_lock_irqsave(&kernfs_rename_lock, flags);
- ret = kernfs_name_locked(kn, buf, buflen);
- read_unlock_irqrestore(&kernfs_rename_lock, flags);
- return ret;
+ if (!kn)
+ return strscpy(buf, "(null)", buflen);
+
+ guard(rcu)();
+ /*
+ * KERNFS_ROOT_INVARIANT_PARENT is ignored here. The name is RCU freed and
+ * the parent is either existing or not.
+ */
+ kn_parent = rcu_dereference(kn->__parent);
+ return strscpy(buf, kn_parent ? rcu_dereference(kn->name) : "/", buflen);
}
/**
@@ -223,13 +223,17 @@ int kernfs_name(struct kernfs_node *kn, char *buf, size_t buflen)
int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
char *buf, size_t buflen)
{
- unsigned long flags;
- int ret;
+ struct kernfs_root *root;
- read_lock_irqsave(&kernfs_rename_lock, flags);
- ret = kernfs_path_from_node_locked(to, from, buf, buflen);
- read_unlock_irqrestore(&kernfs_rename_lock, flags);
- return ret;
+ guard(rcu)();
+ if (to) {
+ root = kernfs_root(to);
+ if (!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)) {
+ guard(read_lock_irqsave)(&kernfs_rename_lock);
+ return kernfs_path_from_node_locked(to, from, buf, buflen);
+ }
+ }
+ return kernfs_path_from_node_locked(to, from, buf, buflen);
}
EXPORT_SYMBOL_GPL(kernfs_path_from_node);
@@ -295,7 +299,7 @@ struct kernfs_node *kernfs_get_parent(struct kernfs_node *kn)
unsigned long flags;
read_lock_irqsave(&kernfs_rename_lock, flags);
- parent = kn->parent;
+ parent = kernfs_parent(kn);
kernfs_get(parent);
read_unlock_irqrestore(&kernfs_rename_lock, flags);
@@ -336,13 +340,13 @@ static int kernfs_name_compare(unsigned int hash, const char *name,
return -1;
if (ns > kn->ns)
return 1;
- return strcmp(name, kn->name);
+ return strcmp(name, kernfs_rcu_name(kn));
}
static int kernfs_sd_compare(const struct kernfs_node *left,
const struct kernfs_node *right)
{
- return kernfs_name_compare(left->hash, left->name, left->ns, right);
+ return kernfs_name_compare(left->hash, kernfs_rcu_name(left), left->ns, right);
}
/**
@@ -360,8 +364,12 @@ static int kernfs_sd_compare(const struct kernfs_node *left,
*/
static int kernfs_link_sibling(struct kernfs_node *kn)
{
- struct rb_node **node = &kn->parent->dir.children.rb_node;
struct rb_node *parent = NULL;
+ struct kernfs_node *kn_parent;
+ struct rb_node **node;
+
+ kn_parent = kernfs_parent(kn);
+ node = &kn_parent->dir.children.rb_node;
while (*node) {
struct kernfs_node *pos;
@@ -380,13 +388,13 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
/* add new node and rebalance the tree */
rb_link_node(&kn->rb, parent, node);
- rb_insert_color(&kn->rb, &kn->parent->dir.children);
+ rb_insert_color(&kn->rb, &kn_parent->dir.children);
/* successfully added, account subdir number */
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
if (kernfs_type(kn) == KERNFS_DIR)
- kn->parent->dir.subdirs++;
- kernfs_inc_rev(kn->parent);
+ kn_parent->dir.subdirs++;
+ kernfs_inc_rev(kn_parent);
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
return 0;
@@ -407,16 +415,19 @@ static int kernfs_link_sibling(struct kernfs_node *kn)
*/
static bool kernfs_unlink_sibling(struct kernfs_node *kn)
{
+ struct kernfs_node *kn_parent;
+
if (RB_EMPTY_NODE(&kn->rb))
return false;
+ kn_parent = kernfs_parent(kn);
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
if (kernfs_type(kn) == KERNFS_DIR)
- kn->parent->dir.subdirs--;
- kernfs_inc_rev(kn->parent);
+ kn_parent->dir.subdirs--;
+ kernfs_inc_rev(kn_parent);
up_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
- rb_erase(&kn->rb, &kn->parent->dir.children);
+ rb_erase(&kn->rb, &kn_parent->dir.children);
RB_CLEAR_NODE(&kn->rb);
return true;
}
@@ -533,7 +544,8 @@ static void kernfs_free_rcu(struct rcu_head *rcu)
{
struct kernfs_node *kn = container_of(rcu, struct kernfs_node, rcu);
- kfree_const(kn->name);
+ /* If the whole node goes away, then name can't be used outside */
+ kfree_const(rcu_access_pointer(kn->name));
if (kn->iattr) {
simple_xattrs_free(&kn->iattr->xattrs, NULL);
@@ -562,11 +574,12 @@ void kernfs_put(struct kernfs_node *kn)
* Moving/renaming is always done while holding reference.
* kn->parent won't change beneath us.
*/
- parent = kn->parent;
+ parent = kernfs_parent(kn);
WARN_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS,
"kernfs_put: %s/%s: released with incorrect active_ref %d\n",
- parent ? parent->name : "", kn->name, atomic_read(&kn->active));
+ parent ? rcu_dereference(parent->name) : "",
+ rcu_dereference(kn->name), atomic_read(&kn->active));
if (kernfs_type(kn) == KERNFS_LINK)
kernfs_put(kn->symlink.target_kn);
@@ -643,7 +656,7 @@ static struct kernfs_node *__kernfs_new_node(struct kernfs_root *root,
atomic_set(&kn->active, KN_DEACTIVATED_BIAS);
RB_CLEAR_NODE(&kn->rb);
- kn->name = name;
+ rcu_assign_pointer(kn->name, name);
kn->mode = mode;
kn->flags = flags;
@@ -701,7 +714,7 @@ struct kernfs_node *kernfs_new_node(struct kernfs_node *parent,
name, mode, uid, gid, flags);
if (kn) {
kernfs_get(parent);
- kn->parent = parent;
+ rcu_assign_pointer(kn->__parent, parent);
}
return kn;
}
@@ -769,18 +782,20 @@ err_unlock:
*/
int kernfs_add_one(struct kernfs_node *kn)
{
- struct kernfs_node *parent = kn->parent;
- struct kernfs_root *root = kernfs_root(parent);
+ struct kernfs_root *root = kernfs_root(kn);
struct kernfs_iattrs *ps_iattr;
+ struct kernfs_node *parent;
bool has_ns;
int ret;
down_write(&root->kernfs_rwsem);
+ parent = kernfs_parent(kn);
ret = -EINVAL;
has_ns = kernfs_ns_enabled(parent);
if (WARN(has_ns != (bool)kn->ns, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
- has_ns ? "required" : "invalid", parent->name, kn->name))
+ has_ns ? "required" : "invalid",
+ kernfs_rcu_name(parent), kernfs_rcu_name(kn)))
goto out_unlock;
if (kernfs_type(parent) != KERNFS_DIR)
@@ -790,7 +805,7 @@ int kernfs_add_one(struct kernfs_node *kn)
if (parent->flags & (KERNFS_REMOVING | KERNFS_EMPTY_DIR))
goto out_unlock;
- kn->hash = kernfs_name_hash(kn->name, kn->ns);
+ kn->hash = kernfs_name_hash(kernfs_rcu_name(kn), kn->ns);
ret = kernfs_link_sibling(kn);
if (ret)
@@ -846,7 +861,7 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent,
if (has_ns != (bool)ns) {
WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n",
- has_ns ? "required" : "invalid", parent->name, name);
+ has_ns ? "required" : "invalid", kernfs_rcu_name(parent), name);
return NULL;
}
@@ -949,6 +964,11 @@ struct kernfs_node *kernfs_walk_and_get_ns(struct kernfs_node *parent,
return kn;
}
+unsigned int kernfs_root_flags(struct kernfs_node *kn)
+{
+ return kernfs_root(kn)->flags;
+}
+
/**
* kernfs_create_root - create a new kernfs hierarchy
* @scops: optional syscall operations for the hierarchy
@@ -1112,7 +1132,7 @@ struct kernfs_node *kernfs_create_empty_dir(struct kernfs_node *parent,
static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,
struct dentry *dentry, unsigned int flags)
{
- struct kernfs_node *kn;
+ struct kernfs_node *kn, *parent;
struct kernfs_root *root;
if (flags & LOOKUP_RCU)
@@ -1120,8 +1140,6 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,
/* Negative hashed dentry? */
if (d_really_is_negative(dentry)) {
- struct kernfs_node *parent;
-
/* If the kernfs parent node has changed discard and
* proceed to ->lookup.
*
@@ -1163,16 +1181,17 @@ static int kernfs_dop_revalidate(struct inode *dir, const struct qstr *name,
if (!kernfs_active(kn))
goto out_bad;
+ parent = kernfs_parent(kn);
/* The kernfs node has been moved? */
- if (kernfs_dentry_node(dentry->d_parent) != kn->parent)
+ if (kernfs_dentry_node(dentry->d_parent) != parent)
goto out_bad;
/* The kernfs node has been renamed */
- if (strcmp(dentry->d_name.name, kn->name) != 0)
+ if (strcmp(dentry->d_name.name, kernfs_rcu_name(kn)) != 0)
goto out_bad;
/* The kernfs node has been moved to a different namespace */
- if (kn->parent && kernfs_ns_enabled(kn->parent) &&
+ if (parent && kernfs_ns_enabled(parent) &&
kernfs_info(dentry->d_sb)->ns != kn->ns)
goto out_bad;
@@ -1365,7 +1384,7 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos,
return kernfs_leftmost_descendant(rb_to_kn(rbn));
/* no sibling left, visit parent */
- return pos->parent;
+ return kernfs_parent(pos);
}
static void kernfs_activate_one(struct kernfs_node *kn)
@@ -1377,7 +1396,7 @@ static void kernfs_activate_one(struct kernfs_node *kn)
if (kernfs_active(kn) || (kn->flags & (KERNFS_HIDDEN | KERNFS_REMOVING)))
return;
- WARN_ON_ONCE(kn->parent && RB_EMPTY_NODE(&kn->rb));
+ WARN_ON_ONCE(rcu_access_pointer(kn->__parent) && RB_EMPTY_NODE(&kn->rb));
WARN_ON_ONCE(atomic_read(&kn->active) != KN_DEACTIVATED_BIAS);
atomic_sub(KN_DEACTIVATED_BIAS, &kn->active);
@@ -1447,7 +1466,7 @@ void kernfs_show(struct kernfs_node *kn, bool show)
static void __kernfs_remove(struct kernfs_node *kn)
{
- struct kernfs_node *pos;
+ struct kernfs_node *pos, *parent;
/* Short-circuit if non-root @kn has already finished removal. */
if (!kn)
@@ -1459,10 +1478,10 @@ static void __kernfs_remove(struct kernfs_node *kn)
* This is for kernfs_remove_self() which plays with active ref
* after removal.
*/
- if (kn->parent && RB_EMPTY_NODE(&kn->rb))
+ if (kernfs_parent(kn) && RB_EMPTY_NODE(&kn->rb))
return;
- pr_debug("kernfs %s: removing\n", kn->name);
+ pr_debug("kernfs %s: removing\n", kernfs_rcu_name(kn));
/* prevent new usage by marking all nodes removing and deactivating */
pos = NULL;
@@ -1485,14 +1504,14 @@ static void __kernfs_remove(struct kernfs_node *kn)
kernfs_get(pos);
kernfs_drain(pos);
-
+ parent = kernfs_parent(pos);
/*
* kernfs_unlink_sibling() succeeds once per node. Use it
* to decide who's responsible for cleanups.
*/
- if (!pos->parent || kernfs_unlink_sibling(pos)) {
+ if (!parent || kernfs_unlink_sibling(pos)) {
struct kernfs_iattrs *ps_iattr =
- pos->parent ? pos->parent->iattr : NULL;
+ parent ? parent->iattr : NULL;
/* update timestamps on the parent */
down_write(&kernfs_root(kn)->kernfs_iattr_rwsem);
@@ -1718,11 +1737,11 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
{
struct kernfs_node *old_parent;
struct kernfs_root *root;
- const char *old_name = NULL;
+ const char *old_name;
int error;
/* can't move or rename root */
- if (!kn->parent)
+ if (!rcu_access_pointer(kn->__parent))
return -EINVAL;
root = kernfs_root(kn);
@@ -1733,9 +1752,19 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
(new_parent->flags & KERNFS_EMPTY_DIR))
goto out;
+ old_parent = kernfs_parent(kn);
+ if (root->flags & KERNFS_ROOT_INVARIANT_PARENT) {
+ error = -EINVAL;
+ if (WARN_ON_ONCE(old_parent != new_parent))
+ goto out;
+ }
+
error = 0;
- if ((kn->parent == new_parent) && (kn->ns == new_ns) &&
- (strcmp(kn->name, new_name) == 0))
+ old_name = kernfs_rcu_name(kn);
+ if (!new_name)
+ new_name = old_name;
+ if ((old_parent == new_parent) && (kn->ns == new_ns) &&
+ (strcmp(old_name, new_name) == 0))
goto out; /* nothing to rename */
error = -EEXIST;
@@ -1743,7 +1772,7 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
goto out;
/* rename kernfs_node */
- if (strcmp(kn->name, new_name) != 0) {
+ if (strcmp(old_name, new_name) != 0) {
error = -ENOMEM;
new_name = kstrdup_const(new_name, GFP_KERNEL);
if (!new_name)
@@ -1756,27 +1785,32 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent,
* Move to the appropriate place in the appropriate directories rbtree.
*/
kernfs_unlink_sibling(kn);
- kernfs_get(new_parent);
- /* rename_lock protects ->parent and ->name accessors */
- write_lock_irq(&kernfs_rename_lock);
+ /* rename_lock protects ->parent accessors */
+ if (old_parent != new_parent) {
+ kernfs_get(new_parent);
+ write_lock_irq(&kernfs_rename_lock);
- old_parent = kn->parent;
- kn->parent = new_parent;
+ rcu_assign_pointer(kn->__parent, new_parent);
- kn->ns = new_ns;
- if (new_name) {
- old_name = kn->name;
- kn->name = new_name;
- }
+ kn->ns = new_ns;
+ if (new_name)
+ rcu_assign_pointer(kn->name, new_name);
- write_unlock_irq(&kernfs_rename_lock);
+ write_unlock_irq(&kernfs_rename_lock);
+ kernfs_put(old_parent);
+ } else {
+ /* name assignment is RCU protected, parent is the same */
+ kn->ns = new_ns;
+ if (new_name)
+ rcu_assign_pointer(kn->name, new_name);
+ }
- kn->hash = kernfs_name_hash(kn->name, kn->ns);
+ kn->hash = kernfs_name_hash(new_name ?: old_name, kn->ns);
kernfs_link_sibling(kn);
- kernfs_put(old_parent);
- kfree_const(old_name);
+ if (new_name && !is_kernel_rodata((unsigned long)old_name))
+ kfree_rcu_mightsleep(old_name);
error = 0;
out:
@@ -1795,7 +1829,8 @@ static struct kernfs_node *kernfs_dir_pos(const void *ns,
{
if (pos) {
int valid = kernfs_active(pos) &&
- pos->parent == parent && hash == pos->hash;
+ rcu_access_pointer(pos->__parent) == parent &&
+ hash == pos->hash;
kernfs_put(pos);
if (!valid)
pos = NULL;
@@ -1860,7 +1895,7 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
for (pos = kernfs_dir_pos(ns, parent, ctx->pos, pos);
pos;
pos = kernfs_dir_next_pos(ns, parent, ctx->pos, pos)) {
- const char *name = pos->name;
+ const char *name = kernfs_rcu_name(pos);
unsigned int type = fs_umode_to_dtype(pos->mode);
int len = strlen(name);
ino_t ino = kernfs_ino(pos);
@@ -1869,10 +1904,10 @@ static int kernfs_fop_readdir(struct file *file, struct dir_context *ctx)
file->private_data = pos;
kernfs_get(pos);
- up_read(&root->kernfs_rwsem);
- if (!dir_emit(ctx, name, len, ino, type))
+ if (!dir_emit(ctx, name, len, ino, type)) {
+ up_read(&root->kernfs_rwsem);
return 0;
- down_read(&root->kernfs_rwsem);
+ }
}
up_read(&root->kernfs_rwsem);
file->private_data = NULL;
diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 0eb320617d7b..66fe8fe41f06 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -911,9 +911,11 @@ repeat:
/* kick fsnotify */
down_read(&root->kernfs_supers_rwsem);
+ down_read(&root->kernfs_rwsem);
list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
struct kernfs_node *parent;
struct inode *p_inode = NULL;
+ const char *kn_name;
struct inode *inode;
struct qstr name;
@@ -927,7 +929,8 @@ repeat:
if (!inode)
continue;
- name = QSTR(kn->name);
+ kn_name = kernfs_rcu_name(kn);
+ name = QSTR(kn_name);
parent = kernfs_get_parent(kn);
if (parent) {
p_inode = ilookup(info->sb, kernfs_ino(parent));
@@ -947,6 +950,7 @@ repeat:
iput(inode);
}
+ up_read(&root->kernfs_rwsem);
up_read(&root->kernfs_supers_rwsem);
kernfs_put(kn);
goto repeat;
diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h
index b42ee6547cdc..40a2a9cd819d 100644
--- a/fs/kernfs/kernfs-internal.h
+++ b/fs/kernfs/kernfs-internal.h
@@ -19,6 +19,8 @@
#include <linux/kernfs.h>
#include <linux/fs_context.h>
+extern rwlock_t kernfs_rename_lock;
+
struct kernfs_iattrs {
kuid_t ia_uid;
kgid_t ia_gid;
@@ -64,11 +66,14 @@ struct kernfs_root {
*
* Return: the kernfs_root @kn belongs to.
*/
-static inline struct kernfs_root *kernfs_root(struct kernfs_node *kn)
+static inline struct kernfs_root *kernfs_root(const struct kernfs_node *kn)
{
+ const struct kernfs_node *knp;
/* if parent exists, it's always a dir; otherwise, @sd is a dir */
- if (kn->parent)
- kn = kn->parent;
+ guard(rcu)();
+ knp = rcu_dereference(kn->__parent);
+ if (knp)
+ kn = knp;
return kn->dir.root;
}
@@ -97,6 +102,32 @@ struct kernfs_super_info {
};
#define kernfs_info(SB) ((struct kernfs_super_info *)(SB->s_fs_info))
+static inline bool kernfs_root_is_locked(const struct kernfs_node *kn)
+{
+ return lockdep_is_held(&kernfs_root(kn)->kernfs_rwsem);
+}
+
+static inline const char *kernfs_rcu_name(const struct kernfs_node *kn)
+{
+ return rcu_dereference_check(kn->name, kernfs_root_is_locked(kn));
+}
+
+static inline struct kernfs_node *kernfs_parent(const struct kernfs_node *kn)
+{
+ /*
+ * The kernfs_node::__parent remains valid within a RCU section. The kn
+ * can be reparented (and renamed) which changes the entry. This can be
+ * avoided by locking kernfs_root::kernfs_rwsem or kernfs_rename_lock.
+ * Both locks can be used to obtain a reference on __parent. Once the
+ * reference count reaches 0 then the node is about to be freed
+ * and can not be renamed (or become a different parent) anymore.
+ */
+ return rcu_dereference_check(kn->__parent,
+ kernfs_root_is_locked(kn) ||
+ lockdep_is_held(&kernfs_rename_lock) ||
+ !atomic_read(&kn->count));
+}
+
static inline struct kernfs_node *kernfs_dentry_node(struct dentry *dentry)
{
if (d_really_is_negative(dentry))
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 1358c21837f1..5124e196c2bf 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -145,8 +145,10 @@ static struct dentry *kernfs_fh_to_parent(struct super_block *sb,
static struct dentry *kernfs_get_parent_dentry(struct dentry *child)
{
struct kernfs_node *kn = kernfs_dentry_node(child);
+ struct kernfs_root *root = kernfs_root(kn);
- return d_obtain_alias(kernfs_get_inode(child->d_sb, kn->parent));
+ guard(rwsem_read)(&root->kernfs_rwsem);
+ return d_obtain_alias(kernfs_get_inode(child->d_sb, kernfs_parent(kn)));
}
static const struct export_operations kernfs_export_ops = {
@@ -186,10 +188,10 @@ static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
return NULL;
}
- while (child->parent != parent) {
- if (!child->parent)
+ while (kernfs_parent(child) != parent) {
+ child = kernfs_parent(child);
+ if (!child)
return NULL;
- child = child->parent;
}
return child;
@@ -207,16 +209,27 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
{
struct dentry *dentry;
struct kernfs_node *knparent;
+ struct kernfs_root *root;
BUG_ON(sb->s_op != &kernfs_sops);
dentry = dget(sb->s_root);
/* Check if this is the root kernfs_node */
- if (!kn->parent)
+ if (!rcu_access_pointer(kn->__parent))
return dentry;
- knparent = find_next_ancestor(kn, NULL);
+ root = kernfs_root(kn);
+ /*
+ * As long as kn is valid, its parent can not vanish. This is cgroup's
+ * kn so it can't have its parent replaced. Therefore it is safe to use
+ * the ancestor node outside of the RCU or locked section.
+ */
+ if (WARN_ON_ONCE(!(root->flags & KERNFS_ROOT_INVARIANT_PARENT)))
+ return ERR_PTR(-EINVAL);
+ scoped_guard(rcu) {
+ knparent = find_next_ancestor(kn, NULL);
+ }
if (WARN_ON(!knparent)) {
dput(dentry);
return ERR_PTR(-EINVAL);
@@ -225,17 +238,26 @@ struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
do {
struct dentry *dtmp;
struct kernfs_node *kntmp;
+ const char *name;
if (kn == knparent)
return dentry;
- kntmp = find_next_ancestor(kn, knparent);
- if (WARN_ON(!kntmp)) {
+
+ scoped_guard(rwsem_read, &root->kernfs_rwsem) {
+ kntmp = find_next_ancestor(kn, knparent);
+ if (WARN_ON(!kntmp)) {
+ dput(dentry);
+ return ERR_PTR(-EINVAL);
+ }
+ name = kstrdup(kernfs_rcu_name(kntmp), GFP_KERNEL);
+ }
+ if (!name) {
dput(dentry);
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENOMEM);
}
- dtmp = lookup_positive_unlocked(kntmp->name, dentry,
- strlen(kntmp->name));
+ dtmp = lookup_positive_unlocked(name, dentry, strlen(name));
dput(dentry);
+ kfree(name);
if (IS_ERR(dtmp))
return dtmp;
knparent = kntmp;
diff --git a/fs/kernfs/symlink.c b/fs/kernfs/symlink.c
index 45371a70caa7..0bd8a2143723 100644
--- a/fs/kernfs/symlink.c
+++ b/fs/kernfs/symlink.c
@@ -62,10 +62,10 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
/* go up to the root, stop at the base */
base = parent;
- while (base->parent) {
- kn = target->parent;
- while (kn->parent && base != kn)
- kn = kn->parent;
+ while (kernfs_parent(base)) {
+ kn = kernfs_parent(target);
+ while (kernfs_parent(kn) && base != kn)
+ kn = kernfs_parent(kn);
if (base == kn)
break;
@@ -75,14 +75,14 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
strcpy(s, "../");
s += 3;
- base = base->parent;
+ base = kernfs_parent(base);
}
/* determine end of target string for reverse fillup */
kn = target;
- while (kn->parent && kn != base) {
- len += strlen(kn->name) + 1;
- kn = kn->parent;
+ while (kernfs_parent(kn) && kn != base) {
+ len += strlen(kernfs_rcu_name(kn)) + 1;
+ kn = kernfs_parent(kn);
}
/* check limits */
@@ -94,15 +94,16 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
/* reverse fillup of target string from target to base */
kn = target;
- while (kn->parent && kn != base) {
- int slen = strlen(kn->name);
+ while (kernfs_parent(kn) && kn != base) {
+ const char *name = kernfs_rcu_name(kn);
+ int slen = strlen(name);
len -= slen;
- memcpy(s + len, kn->name, slen);
+ memcpy(s + len, name, slen);
if (len)
s[--len] = '/';
- kn = kn->parent;
+ kn = kernfs_parent(kn);
}
return 0;
@@ -111,12 +112,13 @@ static int kernfs_get_target_path(struct kernfs_node *parent,
static int kernfs_getlink(struct inode *inode, char *path)
{
struct kernfs_node *kn = inode->i_private;
- struct kernfs_node *parent = kn->parent;
+ struct kernfs_node *parent;
struct kernfs_node *target = kn->symlink.target_kn;
- struct kernfs_root *root = kernfs_root(parent);
+ struct kernfs_root *root = kernfs_root(kn);
int error;
down_read(&root->kernfs_rwsem);
+ parent = kernfs_parent(kn);
error = kernfs_get_target_path(parent, target, path);
up_read(&root->kernfs_rwsem);