diff options
18 files changed, 1075 insertions, 97 deletions
diff --git a/fs/mount.h b/fs/mount.h index 179f690a0c72..ffb613cdfeee 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -8,15 +8,23 @@ struct mnt_namespace { struct ns_common ns; struct mount * root; - struct rb_root mounts; /* Protected by namespace_sem */ + struct { + struct rb_root mounts; /* Protected by namespace_sem */ + struct rb_node *mnt_last_node; /* last (rightmost) mount in the rbtree */ + struct rb_node *mnt_first_node; /* first (leftmost) mount in the rbtree */ + }; struct user_namespace *user_ns; struct ucounts *ucounts; u64 seq; /* Sequence number to prevent loops */ - wait_queue_head_t poll; + union { + wait_queue_head_t poll; + struct rcu_head mnt_ns_rcu; + }; u64 event; unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ + struct list_head mnt_ns_list; /* entry in the sequential list of mounts namespace */ refcount_t passive; /* number references not pinning @mounts */ } __randomize_layout; @@ -150,22 +158,21 @@ static inline bool mnt_ns_attached(const struct mount *mnt) static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) { + struct mnt_namespace *ns = mnt->mnt_ns; WARN_ON(!mnt_ns_attached(mnt)); - rb_erase(&mnt->mnt_node, &mnt->mnt_ns->mounts); + if (ns->mnt_last_node == &mnt->mnt_node) + ns->mnt_last_node = rb_prev(&mnt->mnt_node); + if (ns->mnt_first_node == &mnt->mnt_node) + ns->mnt_first_node = rb_next(&mnt->mnt_node); + rb_erase(&mnt->mnt_node, &ns->mounts); RB_CLEAR_NODE(&mnt->mnt_node); list_add_tail(&mnt->mnt_list, dt_list); } bool has_locked_children(struct mount *mnt, struct dentry *dentry); -struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mnt_ns, bool previous); -static inline struct mnt_namespace *lookup_next_mnt_ns(struct mnt_namespace *mntns) -{ - return __lookup_next_mnt_ns(mntns, false); -} -static inline struct mnt_namespace *lookup_prev_mnt_ns(struct mnt_namespace *mntns) -{ - return __lookup_next_mnt_ns(mntns, true); -} +struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mnt_ns, + bool previous); + static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); diff --git a/fs/namespace.c b/fs/namespace.c index 64deda6f5b2c..4013fbac354a 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -33,7 +33,6 @@ #include <linux/shmem_fs.h> #include <linux/mnt_idmapping.h> #include <linux/pidfs.h> -#include <linux/nospec.h> #include "pnode.h" #include "internal.h" @@ -67,12 +66,12 @@ static int __init set_mphash_entries(char *str) __setup("mphash_entries=", set_mphash_entries); static u64 event; -static DEFINE_IDA(mnt_id_ida); +static DEFINE_XARRAY_FLAGS(mnt_id_xa, XA_FLAGS_ALLOC); static DEFINE_IDA(mnt_group_ida); /* Don't allow confusion with old 32bit mount ID */ #define MNT_UNIQUE_ID_OFFSET (1ULL << 31) -static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET); +static u64 mnt_id_ctr = MNT_UNIQUE_ID_OFFSET; static struct hlist_head *mount_hashtable __ro_after_init; static struct hlist_head *mountpoint_hashtable __ro_after_init; @@ -80,8 +79,10 @@ static struct kmem_cache *mnt_cache __ro_after_init; static DECLARE_RWSEM(namespace_sem); static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ -static DEFINE_RWLOCK(mnt_ns_tree_lock); +static DEFINE_SEQLOCK(mnt_ns_tree_lock); + static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ +static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */ struct mount_kattr { unsigned int attr_set; @@ -107,17 +108,6 @@ EXPORT_SYMBOL_GPL(fs_kobj); */ __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock); -static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns) -{ - u64 seq_b = ns->seq; - - if (seq < seq_b) - return -1; - if (seq > seq_b) - return 1; - return 0; -} - static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) { if (!node) @@ -125,25 +115,52 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node); } -static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b) +static int mnt_ns_cmp(struct rb_node *a, const struct rb_node *b) { struct mnt_namespace *ns_a = node_to_mnt_ns(a); struct mnt_namespace *ns_b = node_to_mnt_ns(b); u64 seq_a = ns_a->seq; + u64 seq_b = ns_b->seq; - return mnt_ns_cmp(seq_a, ns_b) < 0; + if (seq_a < seq_b) + return -1; + if (seq_a > seq_b) + return 1; + return 0; +} + +static inline void mnt_ns_tree_write_lock(void) +{ + write_seqlock(&mnt_ns_tree_lock); +} + +static inline void mnt_ns_tree_write_unlock(void) +{ + write_sequnlock(&mnt_ns_tree_lock); } static void mnt_ns_tree_add(struct mnt_namespace *ns) { - guard(write_lock)(&mnt_ns_tree_lock); - rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less); + struct rb_node *node, *prev; + + mnt_ns_tree_write_lock(); + node = rb_find_add_rcu(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_cmp); + /* + * If there's no previous entry simply add it after the + * head and if there is add it after the previous entry. + */ + prev = rb_prev(&ns->mnt_ns_tree_node); + if (!prev) + list_add_rcu(&ns->mnt_ns_list, &mnt_ns_list); + else + list_add_rcu(&ns->mnt_ns_list, &node_to_mnt_ns(prev)->mnt_ns_list); + mnt_ns_tree_write_unlock(); + + WARN_ON_ONCE(node); } static void mnt_ns_release(struct mnt_namespace *ns) { - lockdep_assert_not_held(&mnt_ns_tree_lock); - /* keep alive for {list,stat}mount() */ if (refcount_dec_and_test(&ns->passive)) { put_user_ns(ns->user_ns); @@ -152,41 +169,34 @@ static void mnt_ns_release(struct mnt_namespace *ns) } DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T)) +static void mnt_ns_release_rcu(struct rcu_head *rcu) +{ + mnt_ns_release(container_of(rcu, struct mnt_namespace, mnt_ns_rcu)); +} + static void mnt_ns_tree_remove(struct mnt_namespace *ns) { /* remove from global mount namespace list */ if (!is_anon_ns(ns)) { - guard(write_lock)(&mnt_ns_tree_lock); + mnt_ns_tree_write_lock(); rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree); + list_bidir_del_rcu(&ns->mnt_ns_list); + mnt_ns_tree_write_unlock(); } - mnt_ns_release(ns); + call_rcu(&ns->mnt_ns_rcu, mnt_ns_release_rcu); } -/* - * Returns the mount namespace which either has the specified id, or has the - * next smallest id afer the specified one. - */ -static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id) +static int mnt_ns_find(const void *key, const struct rb_node *node) { - struct rb_node *node = mnt_ns_tree.rb_node; - struct mnt_namespace *ret = NULL; - - lockdep_assert_held(&mnt_ns_tree_lock); - - while (node) { - struct mnt_namespace *n = node_to_mnt_ns(node); + const u64 mnt_ns_id = *(u64 *)key; + const struct mnt_namespace *ns = node_to_mnt_ns(node); - if (mnt_ns_id <= n->seq) { - ret = node_to_mnt_ns(node); - if (mnt_ns_id == n->seq) - break; - node = node->rb_left; - } else { - node = node->rb_right; - } - } - return ret; + if (mnt_ns_id < ns->seq) + return -1; + if (mnt_ns_id > ns->seq) + return 1; + return 0; } /* @@ -196,18 +206,37 @@ static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id) * namespace the @namespace_sem must first be acquired. If the namespace has * already shut down before acquiring @namespace_sem, {list,stat}mount() will * see that the mount rbtree of the namespace is empty. + * + * Note the lookup is lockless protected by a sequence counter. We only + * need to guard against false negatives as false positives aren't + * possible. So if we didn't find a mount namespace and the sequence + * counter has changed we need to retry. If the sequence counter is + * still the same we know the search actually failed. */ static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id) { - struct mnt_namespace *ns; + struct mnt_namespace *ns; + struct rb_node *node; + unsigned int seq; + + guard(rcu)(); + do { + seq = read_seqbegin(&mnt_ns_tree_lock); + node = rb_find_rcu(&mnt_ns_id, &mnt_ns_tree, mnt_ns_find); + if (node) + break; + } while (read_seqretry(&mnt_ns_tree_lock, seq)); - guard(read_lock)(&mnt_ns_tree_lock); - ns = mnt_ns_find_id_at(mnt_ns_id); - if (!ns || ns->seq != mnt_ns_id) - return NULL; + if (!node) + return NULL; - refcount_inc(&ns->passive); - return ns; + /* + * The last reference count is put with RCU delay so we can + * unconditonally acquire a reference here. + */ + ns = node_to_mnt_ns(node); + refcount_inc(&ns->passive); + return ns; } static inline void lock_mount_hash(void) @@ -237,18 +266,19 @@ static inline struct hlist_head *mp_hash(struct dentry *dentry) static int mnt_alloc_id(struct mount *mnt) { - int res = ida_alloc(&mnt_id_ida, GFP_KERNEL); + int res; - if (res < 0) - return res; - mnt->mnt_id = res; - mnt->mnt_id_unique = atomic64_inc_return(&mnt_id_ctr); - return 0; + xa_lock(&mnt_id_xa); + res = __xa_alloc(&mnt_id_xa, &mnt->mnt_id, mnt, XA_LIMIT(1, INT_MAX), GFP_KERNEL); + if (!res) + mnt->mnt_id_unique = ++mnt_id_ctr; + xa_unlock(&mnt_id_xa); + return res; } static void mnt_free_id(struct mount *mnt) { - ida_free(&mnt_id_ida, mnt->mnt_id); + xa_erase(&mnt_id_xa, mnt->mnt_id); } /* @@ -1125,16 +1155,25 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) { struct rb_node **link = &ns->mounts.rb_node; struct rb_node *parent = NULL; + bool mnt_first_node = true, mnt_last_node = true; WARN_ON(mnt_ns_attached(mnt)); mnt->mnt_ns = ns; while (*link) { parent = *link; - if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) + if (mnt->mnt_id_unique < node_to_mount(parent)->mnt_id_unique) { link = &parent->rb_left; - else + mnt_last_node = false; + } else { link = &parent->rb_right; + mnt_first_node = false; + } } + + if (mnt_last_node) + ns->mnt_last_node = &mnt->mnt_node; + if (mnt_first_node) + ns->mnt_first_node = &mnt->mnt_node; rb_link_node(&mnt->mnt_node, parent, link); rb_insert_color(&mnt->mnt_node, &ns->mounts); } @@ -2070,30 +2109,34 @@ struct ns_common *from_mnt_ns(struct mnt_namespace *mnt) return &mnt->ns; } -struct mnt_namespace *__lookup_next_mnt_ns(struct mnt_namespace *mntns, bool previous) +struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool previous) { - guard(read_lock)(&mnt_ns_tree_lock); + guard(rcu)(); + for (;;) { - struct rb_node *node; + struct list_head *list; if (previous) - node = rb_prev(&mntns->mnt_ns_tree_node); + list = rcu_dereference(list_bidir_prev_rcu(&mntns->mnt_ns_list)); else - node = rb_next(&mntns->mnt_ns_tree_node); - if (!node) + list = rcu_dereference(list_next_rcu(&mntns->mnt_ns_list)); + if (list_is_head(list, &mnt_ns_list)) return ERR_PTR(-ENOENT); - mntns = node_to_mnt_ns(node); - node = &mntns->mnt_ns_tree_node; + mntns = list_entry_rcu(list, struct mnt_namespace, mnt_ns_list); + /* + * The last passive reference count is put with RCU + * delay so accessing the mount namespace is not just + * safe but all relevant members are still valid. + */ if (!ns_capable_noaudit(mntns->user_ns, CAP_SYS_ADMIN)) continue; /* - * Holding mnt_ns_tree_lock prevents the mount namespace from - * being freed but it may well be on it's deathbed. We want an - * active reference, not just a passive one here as we're - * persisting the mount namespace. + * We need an active reference count as we're persisting + * the mount namespace and it might already be on its + * deathbed. */ if (!refcount_inc_not_zero(&mntns->ns.count)) continue; @@ -3915,6 +3958,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a refcount_set(&new_ns->ns.count, 1); refcount_set(&new_ns->passive, 1); new_ns->mounts = RB_ROOT; + INIT_LIST_HEAD(&new_ns->mnt_ns_list); RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node); init_waitqueue_head(&new_ns->poll); new_ns->user_ns = get_user_ns(user_ns); @@ -3994,7 +4038,6 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, while (p->mnt.mnt_root != q->mnt.mnt_root) p = next_mnt(skip_mnt_tree(p), old); } - mnt_ns_tree_add(new_ns); namespace_unlock(); if (rootmnt) @@ -4002,6 +4045,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, if (pwdmnt) mntput(pwdmnt); + mnt_ns_tree_add(new_ns); return new_ns; } @@ -5048,6 +5092,10 @@ static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq) if (sb->s_op->show_options) { size_t start = seq->count; + err = security_sb_show_options(seq, sb); + if (err) + return err; + err = sb->s_op->show_options(seq, mnt->mnt_root); if (err) return err; @@ -5535,9 +5583,9 @@ static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id, if (!last_mnt_id) { if (reverse) - first = node_to_mount(rb_last(&ns->mounts)); + first = node_to_mount(ns->mnt_last_node); else - first = node_to_mount(rb_first(&ns->mounts)); + first = node_to_mount(ns->mnt_first_node); } else { if (reverse) first = mnt_find_id_at_reverse(ns, last_mnt_id - 1); diff --git a/fs/nsfs.c b/fs/nsfs.c index c675fc40ce2d..663f8656158d 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -274,10 +274,7 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl, if (usize < MNT_NS_INFO_SIZE_VER0) return -EINVAL; - if (previous) - mnt_ns = lookup_prev_mnt_ns(to_mnt_ns(ns)); - else - mnt_ns = lookup_next_mnt_ns(to_mnt_ns(ns)); + mnt_ns = get_sequential_mnt_ns(to_mnt_ns(ns), previous); if (IS_ERR(mnt_ns)) return PTR_ERR(mnt_ns); diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 14dfa6008467..1b11926ddd47 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -30,6 +30,17 @@ static inline void INIT_LIST_HEAD_RCU(struct list_head *list) * way, we must not access it directly */ #define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) +/* + * Return the ->prev pointer of a list_head in an rcu safe way. Don't + * access it directly. + * + * Any list traversed with list_bidir_prev_rcu() must never use + * list_del_rcu(). Doing so will poison the ->prev pointer that + * list_bidir_prev_rcu() relies on, which will result in segfaults. + * To prevent these segfaults, use list_bidir_del_rcu() instead + * of list_del_rcu(). + */ +#define list_bidir_prev_rcu(list) (*((struct list_head __rcu **)(&(list)->prev))) /** * list_tail_rcu - returns the prev pointer of the head of the list @@ -159,6 +170,39 @@ static inline void list_del_rcu(struct list_head *entry) } /** + * list_bidir_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * In contrast to list_del_rcu() doesn't poison the prev pointer thus + * allowing backwards traversal via list_bidir_prev_rcu(). + * + * Note: list_empty() on entry does not return true after this because + * the entry is in a special undefined state that permits RCU-based + * lockfree reverse traversal. In particular this means that we can not + * poison the forward and backwards pointers that may still be used for + * walking the list. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another list-mutation + * primitive, such as list_bidir_del_rcu() or list_add_rcu(), running on + * this same list. However, it is perfectly legal to run concurrently + * with the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that list_del_rcu() and list_bidir_del_rcu() must not be used on + * the same list. + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_bidir_del_rcu(struct list_head *entry) +{ + __list_del_entry(entry); +} + +/** * hlist_del_init_rcu - deletes entry from hash list with re-initialization * @n: the element to delete from the hash list. * diff --git a/samples/vfs/.gitignore b/samples/vfs/.gitignore index 79212d91285b..8708341bc082 100644 --- a/samples/vfs/.gitignore +++ b/samples/vfs/.gitignore @@ -1,3 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only /test-fsmount +/test-list-all-mounts /test-statx +/mountinfo diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile index 6377a678134a..6554b73a75c8 100644 --- a/samples/vfs/Makefile +++ b/samples/vfs/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -userprogs-always-y += test-fsmount test-statx +userprogs-always-y += test-fsmount test-statx mountinfo test-list-all-mounts userccflags += -I usr/include diff --git a/samples/vfs/mountinfo.c b/samples/vfs/mountinfo.c new file mode 100644 index 000000000000..bc78275cac69 --- /dev/null +++ b/samples/vfs/mountinfo.c @@ -0,0 +1,274 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* + * Use pidfds, nsfds, listmount() and statmount() mimic the + * contents of /proc/self/mountinfo. + */ +#define _GNU_SOURCE +#define __SANE_USERSPACE_TYPES__ +#include <stdio.h> +#include <stdint.h> +#include <unistd.h> +#include <alloca.h> +#include <getopt.h> +#include <stdlib.h> +#include <stdbool.h> +#include <errno.h> + +#include "samples-vfs.h" + +/* max mounts per listmount call */ +#define MAXMOUNTS 1024 + +/* size of struct statmount (including trailing string buffer) */ +#define STATMOUNT_BUFSIZE 4096 + +static bool ext_format; + +#ifndef __NR_pidfd_open +#define __NR_pidfd_open -1 +#endif + +/* + * There are no bindings in glibc for listmount() and statmount() (yet), + * make our own here. + */ +static int statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *buf, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = mask, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_statmount, &req, buf, bufsize, flags); +} + +static ssize_t listmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 last_mnt_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER0, + .mnt_id = mnt_id, + .param = last_mnt_id, + }; + + if (mnt_ns_id) { + req.size = MNT_ID_REQ_SIZE_VER1; + req.mnt_ns_id = mnt_ns_id; + } + + return syscall(__NR_listmount, &req, list, num, flags); +} + +static void show_mnt_attrs(__u64 flags) +{ + printf("%s", flags & MOUNT_ATTR_RDONLY ? "ro" : "rw"); + + if (flags & MOUNT_ATTR_NOSUID) + printf(",nosuid"); + if (flags & MOUNT_ATTR_NODEV) + printf(",nodev"); + if (flags & MOUNT_ATTR_NOEXEC) + printf(",noexec"); + + switch (flags & MOUNT_ATTR__ATIME) { + case MOUNT_ATTR_RELATIME: + printf(",relatime"); + break; + case MOUNT_ATTR_NOATIME: + printf(",noatime"); + break; + case MOUNT_ATTR_STRICTATIME: + /* print nothing */ + break; + } + + if (flags & MOUNT_ATTR_NODIRATIME) + printf(",nodiratime"); + if (flags & MOUNT_ATTR_NOSYMFOLLOW) + printf(",nosymfollow"); + if (flags & MOUNT_ATTR_IDMAP) + printf(",idmapped"); +} + +static void show_propagation(struct statmount *sm) +{ + if (sm->mnt_propagation & MS_SHARED) + printf(" shared:%llu", sm->mnt_peer_group); + if (sm->mnt_propagation & MS_SLAVE) { + printf(" master:%llu", sm->mnt_master); + if (sm->propagate_from && sm->propagate_from != sm->mnt_master) + printf(" propagate_from:%llu", sm->propagate_from); + } + if (sm->mnt_propagation & MS_UNBINDABLE) + printf(" unbindable"); +} + +static void show_sb_flags(__u64 flags) +{ + printf("%s", flags & MS_RDONLY ? "ro" : "rw"); + if (flags & MS_SYNCHRONOUS) + printf(",sync"); + if (flags & MS_DIRSYNC) + printf(",dirsync"); + if (flags & MS_MANDLOCK) + printf(",mand"); + if (flags & MS_LAZYTIME) + printf(",lazytime"); +} + +static int dump_mountinfo(__u64 mnt_id, __u64 mnt_ns_id) +{ + int ret; + struct statmount *buf = alloca(STATMOUNT_BUFSIZE); + const __u64 mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC | + STATMOUNT_PROPAGATE_FROM | STATMOUNT_FS_TYPE | + STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | + STATMOUNT_MNT_OPTS | STATMOUNT_FS_SUBTYPE | + STATMOUNT_SB_SOURCE; + + ret = statmount(mnt_id, mnt_ns_id, mask, buf, STATMOUNT_BUFSIZE, 0); + if (ret < 0) { + perror("statmount"); + return 1; + } + + if (ext_format) + printf("0x%llx 0x%llx 0x%llx ", mnt_ns_id, mnt_id, buf->mnt_parent_id); + + printf("%u %u %u:%u %s %s ", buf->mnt_id_old, buf->mnt_parent_id_old, + buf->sb_dev_major, buf->sb_dev_minor, + &buf->str[buf->mnt_root], + &buf->str[buf->mnt_point]); + show_mnt_attrs(buf->mnt_attr); + show_propagation(buf); + + printf(" - %s", &buf->str[buf->fs_type]); + if (buf->mask & STATMOUNT_FS_SUBTYPE) + printf(".%s", &buf->str[buf->fs_subtype]); + if (buf->mask & STATMOUNT_SB_SOURCE) + printf(" %s ", &buf->str[buf->sb_source]); + else + printf(" :none "); + + show_sb_flags(buf->sb_flags); + if (buf->mask & STATMOUNT_MNT_OPTS) + printf(",%s", &buf->str[buf->mnt_opts]); + printf("\n"); + return 0; +} + +static int dump_mounts(__u64 mnt_ns_id) +{ + __u64 mntid[MAXMOUNTS]; + __u64 last_mnt_id = 0; + ssize_t count; + int i; + + /* + * Get a list of all mntids in mnt_ns_id. If it returns MAXMOUNTS + * mounts, then go again until we get everything. + */ + do { + count = listmount(LSMT_ROOT, mnt_ns_id, last_mnt_id, mntid, MAXMOUNTS, 0); + if (count < 0 || count > MAXMOUNTS) { + errno = count < 0 ? errno : count; + perror("listmount"); + return 1; + } + + /* Walk the returned mntids and print info about each */ + for (i = 0; i < count; ++i) { + int ret = dump_mountinfo(mntid[i], mnt_ns_id); + + if (ret != 0) + return ret; + } + /* Set up last_mnt_id to pick up where we left off */ + last_mnt_id = mntid[count - 1]; + } while (count == MAXMOUNTS); + return 0; +} + +static void usage(const char * const prog) +{ + printf("Usage:\n"); + printf("%s [-e] [-p pid] [-r] [-h]\n", prog); + printf(" -e: extended format\n"); + printf(" -h: print usage message\n"); + printf(" -p: get mount namespace from given pid\n"); + printf(" -r: recursively print all mounts in all child namespaces\n"); +} + +int main(int argc, char * const *argv) +{ + struct mnt_ns_info mni = { .size = MNT_NS_INFO_SIZE_VER0 }; + int pidfd, mntns, ret, opt; + pid_t pid = getpid(); + bool recursive = false; + + while ((opt = getopt(argc, argv, "ehp:r")) != -1) { + switch (opt) { + case 'e': + ext_format = true; + break; + case 'h': + usage(argv[0]); + return 0; + case 'p': + pid = atoi(optarg); + break; + case 'r': + recursive = true; + break; + } + } + + /* Get a pidfd for pid */ + pidfd = syscall(__NR_pidfd_open, pid, 0); + if (pidfd < 0) { + perror("pidfd_open"); + return 1; + } + + /* Get the mnt namespace for pidfd */ + mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, NULL); + if (mntns < 0) { + perror("PIDFD_GET_MNT_NAMESPACE"); + return 1; + } + close(pidfd); + + /* get info about mntns. In particular, the mnt_ns_id */ + ret = ioctl(mntns, NS_MNT_GET_INFO, &mni); + if (ret < 0) { + perror("NS_MNT_GET_INFO"); + return 1; + } + + do { + int ret; + + ret = dump_mounts(mni.mnt_ns_id); + if (ret) + return ret; + + if (!recursive) + break; + + /* get the next mntns (and overwrite the old mount ns info) */ + ret = ioctl(mntns, NS_MNT_GET_NEXT, &mni); + close(mntns); + mntns = ret; + } while (mntns >= 0); + + return 0; +} diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h new file mode 100644 index 000000000000..103e1e7c4cec --- /dev/null +++ b/samples/vfs/samples-vfs.h @@ -0,0 +1,241 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef __SAMPLES_VFS_H +#define __SAMPLES_VFS_H + +#include <errno.h> +#include <linux/types.h> +#include <sys/ioctl.h> +#include <sys/syscall.h> + +#define die_errno(format, ...) \ + do { \ + fprintf(stderr, "%m | %s: %d: %s: " format "\n", __FILE__, \ + __LINE__, __func__, ##__VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +struct statmount { + __u32 size; /* Total size, including strings */ + __u32 mnt_opts; /* [str] Options (comma separated, escaped) */ + __u64 mask; /* What results were written */ + __u32 sb_dev_major; /* Device ID */ + __u32 sb_dev_minor; + __u64 sb_magic; /* ..._SUPER_MAGIC */ + __u32 sb_flags; /* SB_{RDONLY,SYNCHRONOUS,DIRSYNC,LAZYTIME} */ + __u32 fs_type; /* [str] Filesystem type */ + __u64 mnt_id; /* Unique ID of mount */ + __u64 mnt_parent_id; /* Unique ID of parent (for root == mnt_id) */ + __u32 mnt_id_old; /* Reused IDs used in proc/.../mountinfo */ + __u32 mnt_parent_id_old; + __u64 mnt_attr; /* MOUNT_ATTR_... */ + __u64 mnt_propagation; /* MS_{SHARED,SLAVE,PRIVATE,UNBINDABLE} */ + __u64 mnt_peer_group; /* ID of shared peer group */ + __u64 mnt_master; /* Mount receives propagation from this ID */ + __u64 propagate_from; /* Propagation from in current namespace */ + __u32 mnt_root; /* [str] Root of mount relative to root of fs */ + __u32 mnt_point; /* [str] Mountpoint relative to current root */ + __u64 mnt_ns_id; /* ID of the mount namespace */ + __u32 fs_subtype; /* [str] Subtype of fs_type (if any) */ + __u32 sb_source; /* [str] Source string of the mount */ + __u32 opt_num; /* Number of fs options */ + __u32 opt_array; /* [str] Array of nul terminated fs options */ + __u32 opt_sec_num; /* Number of security options */ + __u32 opt_sec_array; /* [str] Array of nul terminated security options */ + __u64 __spare2[46]; + char str[]; /* Variable size part containing strings */ +}; + +struct mnt_id_req { + __u32 size; + __u32 spare; + __u64 mnt_id; + __u64 param; + __u64 mnt_ns_id; +}; + +#ifndef MNT_ID_REQ_SIZE_VER0 +#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */ +#endif + +#ifndef MNT_ID_REQ_SIZE_VER1 +#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */ +#endif + +/* Get the id for a mount namespace */ +#ifndef NS_GET_MNTNS_ID +#define NS_GET_MNTNS_ID _IO(0xb7, 0x5) +#endif + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#ifndef MNT_NS_INFO_SIZE_VER0 +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ +#endif + +#ifndef NS_MNT_GET_INFO +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_NEXT +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +#endif + +#ifndef NS_MNT_GET_PREV +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) +#endif + +#ifndef PIDFD_GET_MNT_NAMESPACE +#define PIDFD_GET_MNT_NAMESPACE _IO(0xFF, 3) +#endif + +#ifndef __NR_listmount +#define __NR_listmount 458 +#endif + +#ifndef __NR_statmount +#define __NR_statmount 457 +#endif + +#ifndef LSMT_ROOT +#define LSMT_ROOT 0xffffffffffffffff /* root mount */ +#endif + +/* @mask bits for statmount(2) */ +#ifndef STATMOUNT_SB_BASIC +#define STATMOUNT_SB_BASIC 0x00000001U /* Want/got sb_... */ +#endif + +#ifndef STATMOUNT_MNT_BASIC +#define STATMOUNT_MNT_BASIC 0x00000002U /* Want/got mnt_... */ +#endif + +#ifndef STATMOUNT_PROPAGATE_FROM +#define STATMOUNT_PROPAGATE_FROM 0x00000004U /* Want/got propagate_from */ +#endif + +#ifndef STATMOUNT_MNT_ROOT +#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */ +#endif + +#ifndef STATMOUNT_MNT_POINT +#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */ +#endif + +#ifndef STATMOUNT_FS_TYPE +#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */ +#endif + +#ifndef STATMOUNT_MNT_NS_ID +#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */ +#endif + +#ifndef STATMOUNT_MNT_OPTS +#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */ +#endif + +#ifndef STATMOUNT_FS_SUBTYPE +#define STATMOUNT_FS_SUBTYPE 0x00000100U /* Want/got fs_subtype */ +#endif + +#ifndef STATMOUNT_SB_SOURCE +#define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ +#endif + +#ifndef STATMOUNT_OPT_ARRAY +#define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ +#endif + +#ifndef STATMOUNT_OPT_SEC_ARRAY +#define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#endif + +#ifndef STATX_MNT_ID_UNIQUE +#define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ +#endif + +#ifndef MOUNT_ATTR_RDONLY +#define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ +#endif + +#ifndef MOUNT_ATTR_NOSUID +#define MOUNT_ATTR_NOSUID 0x00000002 /* Ignore suid and sgid bits */ +#endif + +#ifndef MOUNT_ATTR_NODEV +#define MOUNT_ATTR_NODEV 0x00000004 /* Disallow access to device special files */ +#endif + +#ifndef MOUNT_ATTR_NOEXEC +#define MOUNT_ATTR_NOEXEC 0x00000008 /* Disallow program execution */ +#endif + +#ifndef MOUNT_ATTR__ATIME +#define MOUNT_ATTR__ATIME 0x00000070 /* Setting on how atime should be updated */ +#endif + +#ifndef MOUNT_ATTR_RELATIME +#define MOUNT_ATTR_RELATIME 0x00000000 /* - Update atime relative to mtime/ctime. */ +#endif + +#ifndef MOUNT_ATTR_NOATIME +#define MOUNT_ATTR_NOATIME 0x00000010 /* - Do not update access times. */ +#endif + +#ifndef MOUNT_ATTR_STRICTATIME +#define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */ +#endif + +#ifndef MOUNT_ATTR_NODIRATIME +#define MOUNT_ATTR_NODIRATIME 0x00000080 /* Do not update directory access times */ +#endif + +#ifndef MOUNT_ATTR_IDMAP +#define MOUNT_ATTR_IDMAP 0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */ +#endif + +#ifndef MOUNT_ATTR_NOSYMFOLLOW +#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000 /* Do not follow symlinks */ +#endif + +#ifndef MS_RDONLY +#define MS_RDONLY 1 /* Mount read-only */ +#endif + +#ifndef MS_SYNCHRONOUS +#define MS_SYNCHRONOUS 16 /* Writes are synced at once */ +#endif + +#ifndef MS_MANDLOCK +#define MS_MANDLOCK 64 /* Allow mandatory locks on an FS */ +#endif + +#ifndef MS_DIRSYNC +#define MS_DIRSYNC 128 /* Directory modifications are synchronous */ +#endif + +#ifndef MS_UNBINDABLE +#define MS_UNBINDABLE (1<<17) /* change to unbindable */ +#endif + +#ifndef MS_PRIVATE +#define MS_PRIVATE (1<<18) /* change to private */ +#endif + +#ifndef MS_SLAVE +#define MS_SLAVE (1<<19) /* change to slave */ +#endif + +#ifndef MS_SHARED +#define MS_SHARED (1<<20) /* change to shared */ +#endif + +#ifndef MS_LAZYTIME +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ +#endif + +#endif /* __SAMPLES_VFS_H */ diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c new file mode 100644 index 000000000000..1a02ea4593e3 --- /dev/null +++ b/samples/vfs/test-list-all-mounts.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <errno.h> +#include <limits.h> +#include <linux/types.h> +#include <inttypes.h> +#include <stdio.h> + +#include "../../tools/testing/selftests/pidfd/pidfd.h" +#include "samples-vfs.h" + +static int __statmount(__u64 mnt_id, __u64 mnt_ns_id, __u64 mask, + struct statmount *stmnt, size_t bufsize, + unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = mask, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_statmount, &req, stmnt, bufsize, flags); +} + +static struct statmount *sys_statmount(__u64 mnt_id, __u64 mnt_ns_id, + __u64 mask, unsigned int flags) +{ + size_t bufsize = 1 << 15; + struct statmount *stmnt = NULL, *tmp = NULL; + int ret; + + for (;;) { + tmp = realloc(stmnt, bufsize); + if (!tmp) + goto out; + + stmnt = tmp; + ret = __statmount(mnt_id, mnt_ns_id, mask, stmnt, bufsize, flags); + if (!ret) + return stmnt; + + if (errno != EOVERFLOW) + goto out; + + bufsize <<= 1; + if (bufsize >= UINT_MAX / 2) + goto out; + } + +out: + free(stmnt); + return NULL; +} + +static ssize_t sys_listmount(__u64 mnt_id, __u64 last_mnt_id, __u64 mnt_ns_id, + __u64 list[], size_t num, unsigned int flags) +{ + struct mnt_id_req req = { + .size = MNT_ID_REQ_SIZE_VER1, + .mnt_id = mnt_id, + .param = last_mnt_id, + .mnt_ns_id = mnt_ns_id, + }; + + return syscall(__NR_listmount, &req, list, num, flags); +} + +int main(int argc, char *argv[]) +{ +#define LISTMNT_BUFFER 10 + __u64 list[LISTMNT_BUFFER], last_mnt_id = 0; + int ret, pidfd, fd_mntns; + struct mnt_ns_info info = {}; + + pidfd = sys_pidfd_open(getpid(), 0); + if (pidfd < 0) + die_errno("pidfd_open failed"); + + fd_mntns = ioctl(pidfd, PIDFD_GET_MNT_NAMESPACE, 0); + if (fd_mntns < 0) + die_errno("ioctl(PIDFD_GET_MNT_NAMESPACE) failed"); + + ret = ioctl(fd_mntns, NS_MNT_GET_INFO, &info); + if (ret < 0) + die_errno("ioctl(NS_GET_MNTNS_ID) failed"); + + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + for (;;) { + ssize_t nr_mounts; +next: + nr_mounts = sys_listmount(LSMT_ROOT, last_mnt_id, + info.mnt_ns_id, list, LISTMNT_BUFFER, + 0); + if (nr_mounts <= 0) { + int fd_mntns_next; + + printf("Finished listing %u mounts for mount namespace %" PRIu64 "\n\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + fd_mntns_next = ioctl(fd_mntns, NS_MNT_GET_NEXT, &info); + if (fd_mntns_next < 0) { + if (errno == ENOENT) { + printf("Finished listing all mount namespaces\n"); + exit(0); + } + die_errno("ioctl(NS_MNT_GET_NEXT) failed"); + } + close(fd_mntns); + fd_mntns = fd_mntns_next; + last_mnt_id = 0; + printf("Listing %u mounts for mount namespace %" PRIu64 "\n", + info.nr_mounts, (uint64_t)info.mnt_ns_id); + goto next; + } + + for (size_t cur = 0; cur < nr_mounts; cur++) { + struct statmount *stmnt; + + last_mnt_id = list[cur]; + + stmnt = sys_statmount(last_mnt_id, info.mnt_ns_id, + STATMOUNT_SB_BASIC | + STATMOUNT_MNT_BASIC | + STATMOUNT_MNT_ROOT | + STATMOUNT_MNT_POINT | + STATMOUNT_MNT_NS_ID | + STATMOUNT_MNT_OPTS | + STATMOUNT_FS_TYPE, 0); + if (!stmnt) { + printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", + (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); + continue; + } + + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n", + (uint64_t)stmnt->mnt_id, + (uint64_t)stmnt->mnt_parent_id, + stmnt->str + stmnt->fs_type, + stmnt->str + stmnt->mnt_root, + stmnt->str + stmnt->mnt_point, + stmnt->str + stmnt->mnt_opts); + free(stmnt); + } + } + + exit(0); +} diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/filesystems/nsfs/.gitignore index ed79ebdf286e..92a8249006d1 100644 --- a/tools/testing/selftests/nsfs/.gitignore +++ b/tools/testing/selftests/filesystems/nsfs/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only owner pidns +iterate_mntns diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/filesystems/nsfs/Makefile index dd9bd50b7b93..231aaa7dfd95 100644 --- a/tools/testing/selftests/nsfs/Makefile +++ b/tools/testing/selftests/filesystems/nsfs/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := owner pidns +TEST_GEN_PROGS := owner pidns iterate_mntns CFLAGS := -Wall -Werror -include ../lib.mk +include ../../lib.mk diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/filesystems/nsfs/config index 598d0a225fc9..598d0a225fc9 100644 --- a/tools/testing/selftests/nsfs/config +++ b/tools/testing/selftests/filesystems/nsfs/config diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c new file mode 100644 index 000000000000..457cf76f3c5f --- /dev/null +++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "../../kselftest_harness.h" + +#define MNT_NS_COUNT 11 +#define MNT_NS_LAST_INDEX 10 + +struct mnt_ns_info { + __u32 size; + __u32 nr_mounts; + __u64 mnt_ns_id; +}; + +#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */ + +/* Get information about namespace. */ +#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info) +/* Get next namespace. */ +#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info) +/* Get previous namespace. */ +#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info) + +FIXTURE(iterate_mount_namespaces) { + int fd_mnt_ns[MNT_NS_COUNT]; + __u64 mnt_ns_id[MNT_NS_COUNT]; +}; + +FIXTURE_SETUP(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) + self->fd_mnt_ns[i] = -EBADF; + + /* + * Creating a new user namespace let's us guarantee that we only see + * mount namespaces that we did actually create. + */ + ASSERT_EQ(unshare(CLONE_NEWUSER), 0); + + for (int i = 0; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC); + ASSERT_GE(self->fd_mnt_ns[i], 0); + ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0); + self->mnt_ns_id[i] = info.mnt_ns_id; + } +} + +FIXTURE_TEARDOWN(iterate_mount_namespaces) +{ + for (int i = 0; i < MNT_NS_COUNT; i++) { + if (self->fd_mnt_ns[i] < 0) + continue; + ASSERT_EQ(close(self->fd_mnt_ns[i]), 0); + } +} + +TEST_F(iterate_mount_namespaces, iterate_all_forward) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + if (fd_mnt_ns_next < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_all_backwards) +{ + int fd_mnt_ns_cur, count = 0; + + fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC); + ASSERT_GE(fd_mnt_ns_cur, 0); + + for (;; count++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + if (fd_mnt_ns_prev < 0 && errno == ENOENT) + break; + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + } + ASSERT_EQ(count, MNT_NS_LAST_INDEX); +} + +TEST_F(iterate_mount_namespaces, iterate_forward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[0]; + for (int i = 1; i < MNT_NS_COUNT; i++) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_next; + + fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info); + ASSERT_GE(fd_mnt_ns_next, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_next; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_F(iterate_mount_namespaces, iterate_backward) +{ + int fd_mnt_ns_cur; + + ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0); + + fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX]; + for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) { + struct mnt_ns_info info = {}; + int fd_mnt_ns_prev; + + fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info); + ASSERT_GE(fd_mnt_ns_prev, 0); + ASSERT_EQ(close(fd_mnt_ns_cur), 0); + fd_mnt_ns_cur = fd_mnt_ns_prev; + ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/filesystems/nsfs/owner.c index 96a976c74550..96a976c74550 100644 --- a/tools/testing/selftests/nsfs/owner.c +++ b/tools/testing/selftests/filesystems/nsfs/owner.c diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/filesystems/nsfs/pidns.c index e3c772c6a7c7..e3c772c6a7c7 100644 --- a/tools/testing/selftests/nsfs/pidns.c +++ b/tools/testing/selftests/filesystems/nsfs/pidns.c diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile index 3af3136e35a4..14ee91a41650 100644 --- a/tools/testing/selftests/filesystems/statmount/Makefile +++ b/tools/testing/selftests/filesystems/statmount/Makefile @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) -TEST_GEN_PROGS := statmount_test statmount_test_ns +TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c new file mode 100644 index 000000000000..15f0834f7557 --- /dev/null +++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2024 Christian Brauner <brauner@kernel.org> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <unistd.h> + +#include "statmount.h" +#include "../../kselftest_harness.h" + +#ifndef LISTMOUNT_REVERSE +#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */ +#endif + +#define LISTMNT_BUFFER 10 + +/* Check that all mount ids are in increasing order. */ +TEST(listmount_forward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, 0); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_LT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +/* Check that all mount ids are in decreasing order. */ +TEST(listmount_backward) +{ + uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0; + + for (;;) { + ssize_t nr_mounts; + + nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id, + list, LISTMNT_BUFFER, LISTMOUNT_REVERSE); + ASSERT_GE(nr_mounts, 0); + if (nr_mounts == 0) + break; + + for (size_t cur = 0; cur < nr_mounts; cur++) { + if (cur < nr_mounts - 1) + ASSERT_GT(list[cur], list[cur + 1]); + last_mnt_id = list[cur]; + } + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index 28a471c88c51..0b96ac4b8ce5 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -12,7 +12,6 @@ #include <stdlib.h> #include <string.h> #include <syscall.h> -#include <sys/mount.h> #include <sys/types.h> #include <sys/wait.h> |