diff options
52 files changed, 1567 insertions, 175 deletions
diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index c59d53d6d3f3..2dd6340de6b4 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -506,3 +506,4 @@ 574 common getxattrat sys_getxattrat 575 common listxattrat sys_listxattrat 576 common removexattrat sys_removexattrat +577 common open_tree_attr sys_open_tree_attr diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 49eeb2ad8dbd..27c1d5ebcd91 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/arm64/tools/syscall_32.tbl b/arch/arm64/tools/syscall_32.tbl index 69a829912a05..0765b3a8d6d6 100644 --- a/arch/arm64/tools/syscall_32.tbl +++ b/arch/arm64/tools/syscall_32.tbl @@ -478,3 +478,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f5ed71f1910d..9fe47112c586 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -466,3 +466,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 680f568b77f2..7b6e97828e55 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -472,3 +472,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 0b9b7e25b69a..aa70e371bb54 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -405,3 +405,4 @@ 464 n32 getxattrat sys_getxattrat 465 n32 listxattrat sys_listxattrat 466 n32 removexattrat sys_removexattrat +467 n32 open_tree_attr sys_open_tree_attr diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index c844cd5cda62..1e8c44c7b614 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 349b8aad1159..114a5a1a6230 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -454,3 +454,4 @@ 464 o32 getxattrat sys_getxattrat 465 o32 listxattrat sys_listxattrat 466 o32 removexattrat sys_removexattrat +467 o32 open_tree_attr sys_open_tree_attr diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index d9fc94c86965..94df3cb957e9 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -465,3 +465,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index d8b4ab78bef0..9a084bdb8926 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index c8cad33bf250..52a7652fcff6 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 727f99d333b3..83e45eb6c095 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e2..3f0ec87d5db4 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..cfb5ca41e30d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 37effc1b134e..f657a77314f8 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 77c7991d89aa..23cea74f9933 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -218,6 +218,8 @@ void autofs_clean_ino(struct autofs_info *); static inline int autofs_check_pipe(struct file *pipe) { + if (pipe->f_mode & FMODE_PATH) + return -EINVAL; if (!(pipe->f_mode & FMODE_CAN_WRITE)) return -EINVAL; if (!S_ISFIFO(file_inode(pipe)->i_mode)) diff --git a/fs/fsopen.c b/fs/fsopen.c index 094a7f510edf..1aaf4cb2afb2 100644 --- a/fs/fsopen.c +++ b/fs/fsopen.c @@ -453,7 +453,7 @@ SYSCALL_DEFINE5(fsconfig, case FSCONFIG_SET_FD: param.type = fs_value_is_file; ret = -EBADF; - param.file = fget(aux); + param.file = fget_raw(aux); if (!param.file) goto out_key; param.dirfd = aux; diff --git a/fs/internal.h b/fs/internal.h index 82127c69e641..4c19d15ef08a 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path) } void file_f_owner_release(struct file *file); bool file_seek_cur_needs_f_lock(struct file *file); +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 7b1df8cc2821..a37991fdb194 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -6,6 +6,7 @@ #include <linux/mnt_idmapping.h> #include <linux/slab.h> #include <linux/user_namespace.h> +#include <linux/seq_file.h> #include "internal.h" @@ -334,3 +335,53 @@ void mnt_idmap_put(struct mnt_idmap *idmap) free_mnt_idmap(idmap); } EXPORT_SYMBOL_GPL(mnt_idmap_put); + +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) +{ + struct uid_gid_map *map, *map_up; + u32 idx, nr_mappings; + + if (!is_valid_mnt_idmap(idmap)) + return 0; + + /* + * Idmappings are shown relative to the caller's idmapping. + * This is both the most intuitive and most useful solution. + */ + if (uid_map) { + map = &idmap->uid_map; + map_up = ¤t_user_ns()->uid_map; + } else { + map = &idmap->gid_map; + map_up = ¤t_user_ns()->gid_map; + } + + for (idx = 0, nr_mappings = 0; idx < map->nr_extents; idx++) { + uid_t lower; + struct uid_gid_extent *extent; + + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) + extent = &map->extent[idx]; + else + extent = &map->forward[idx]; + + /* + * Verify that the whole range of the mapping can be + * resolved in the caller's idmapping. If it cannot be + * resolved skip the mapping. + */ + lower = map_id_range_up(map_up, extent->lower_first, extent->count); + if (lower == (uid_t) -1) + continue; + + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); + + seq->count++; /* mappings are separated by \0 */ + if (seq_has_overflowed(seq)) + return -EAGAIN; + + nr_mappings++; + } + + return nr_mappings; +} diff --git a/fs/mount.h b/fs/mount.h index ffb613cdfeee..946dc8b792d7 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -5,6 +5,8 @@ #include <linux/ns_common.h> #include <linux/fs_pin.h> +extern struct list_head notify_list; + struct mnt_namespace { struct ns_common ns; struct mount * root; @@ -21,6 +23,10 @@ struct mnt_namespace { struct rcu_head mnt_ns_rcu; }; u64 event; +#ifdef CONFIG_FSNOTIFY + __u32 n_fsnotify_mask; + struct fsnotify_mark_connector __rcu *n_fsnotify_marks; +#endif unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */ @@ -76,6 +82,8 @@ struct mount { #ifdef CONFIG_FSNOTIFY struct fsnotify_mark_connector __rcu *mnt_fsnotify_marks; __u32 mnt_fsnotify_mask; + struct list_head to_notify; /* need to queue notification */ + struct mnt_namespace *prev_ns; /* previous namespace (NULL if none) */ #endif int mnt_id; /* mount identifier, reused */ u64 mnt_id_unique; /* mount ID unique until reboot */ @@ -177,3 +185,21 @@ static inline struct mnt_namespace *to_mnt_ns(struct ns_common *ns) { return container_of(ns, struct mnt_namespace, ns); } + +#ifdef CONFIG_FSNOTIFY +static inline void mnt_notify_add(struct mount *m) +{ + /* Optimize the case where there are no watches */ + if ((m->mnt_ns && m->mnt_ns->n_fsnotify_marks) || + (m->prev_ns && m->prev_ns->n_fsnotify_marks)) + list_add_tail(&m->to_notify, ¬ify_list); + else + m->prev_ns = m->mnt_ns; +} +#else +static inline void mnt_notify_add(struct mount *m) +{ +} +#endif + +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry); diff --git a/fs/namespace.c b/fs/namespace.c index 8f1000f9f3df..4c8bd48c8a62 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -81,15 +81,23 @@ static HLIST_HEAD(unmounted); /* protected by namespace_sem */ static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */ static DEFINE_SEQLOCK(mnt_ns_tree_lock); +#ifdef CONFIG_FSNOTIFY +LIST_HEAD(notify_list); /* protected by namespace_sem */ +#endif static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */ static LIST_HEAD(mnt_ns_list); /* protected by mnt_ns_tree_lock */ +enum mount_kattr_flags_t { + MOUNT_KATTR_RECURSE = (1 << 0), + MOUNT_KATTR_IDMAP_REPLACE = (1 << 1), +}; + struct mount_kattr { unsigned int attr_set; unsigned int attr_clr; unsigned int propagation; unsigned int lookup_flags; - bool recurse; + enum mount_kattr_flags_t kflags; struct user_namespace *mnt_userns; struct mnt_idmap *mnt_idmap; }; @@ -163,6 +171,7 @@ static void mnt_ns_release(struct mnt_namespace *ns) { /* keep alive for {list,stat}mount() */ if (refcount_dec_and_test(&ns->passive)) { + fsnotify_mntns_delete(ns); put_user_ns(ns->user_ns); kfree(ns); } @@ -1176,6 +1185,8 @@ static void mnt_add_to_ns(struct mnt_namespace *ns, struct mount *mnt) ns->mnt_first_node = &mnt->mnt_node; rb_link_node(&mnt->mnt_node, parent, link); rb_insert_color(&mnt->mnt_node, &ns->mounts); + + mnt_notify_add(mnt); } /* @@ -1723,6 +1734,50 @@ int may_umount(struct vfsmount *mnt) EXPORT_SYMBOL(may_umount); +#ifdef CONFIG_FSNOTIFY +static void mnt_notify(struct mount *p) +{ + if (!p->prev_ns && p->mnt_ns) { + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } else if (p->prev_ns && !p->mnt_ns) { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + } else if (p->prev_ns == p->mnt_ns) { + fsnotify_mnt_move(p->mnt_ns, &p->mnt); + } else { + fsnotify_mnt_detach(p->prev_ns, &p->mnt); + fsnotify_mnt_attach(p->mnt_ns, &p->mnt); + } + p->prev_ns = p->mnt_ns; +} + +static void notify_mnt_list(void) +{ + struct mount *m, *tmp; + /* + * Notify about mounts that were added/reparented/detached/remain + * connected after unmount. + */ + list_for_each_entry_safe(m, tmp, ¬ify_list, to_notify) { + mnt_notify(m); + list_del_init(&m->to_notify); + } +} + +static bool need_notify_mnt_list(void) +{ + return !list_empty(¬ify_list); +} +#else +static void notify_mnt_list(void) +{ +} + +static bool need_notify_mnt_list(void) +{ + return false; +} +#endif + static void namespace_unlock(void) { struct hlist_head head; @@ -1733,7 +1788,18 @@ static void namespace_unlock(void) hlist_move_list(&unmounted, &head); list_splice_init(&ex_mountpoints, &list); - up_write(&namespace_sem); + if (need_notify_mnt_list()) { + /* + * No point blocking out concurrent readers while notifications + * are sent. This will also allow statmount()/listmount() to run + * concurrently. + */ + downgrade_write(&namespace_sem); + notify_mnt_list(); + up_read(&namespace_sem); + } else { + up_write(&namespace_sem); + } shrink_dentry_list(&list); @@ -1846,6 +1912,19 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) change_mnt_propagation(p, MS_PRIVATE); if (disconnect) hlist_add_head(&p->mnt_umount, &unmounted); + + /* + * At this point p->mnt_ns is NULL, notification will be queued + * only if + * + * - p->prev_ns is non-NULL *and* + * - p->prev_ns->n_fsnotify_marks is non-NULL + * + * This will preclude queuing the mount if this is a cleanup + * after a failed copy_tree() or destruction of an anonymous + * namespace, etc. + */ + mnt_notify_add(p); } } @@ -2026,6 +2105,7 @@ static void warn_mandlock(void) static int can_umount(const struct path *path, int flags) { struct mount *mnt = real_mount(path->mnt); + struct super_block *sb = path->dentry->d_sb; if (!may_mount()) return -EPERM; @@ -2035,7 +2115,7 @@ static int can_umount(const struct path *path, int flags) return -EINVAL; if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */ return -EINVAL; - if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN)) + if (flags & MNT_FORCE && !ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) return -EPERM; return 0; } @@ -2145,16 +2225,24 @@ struct mnt_namespace *get_sequential_mnt_ns(struct mnt_namespace *mntns, bool pr } } +struct mnt_namespace *mnt_ns_from_dentry(struct dentry *dentry) +{ + if (!is_mnt_ns_file(dentry)) + return NULL; + + return to_mnt_ns(get_proc_ns(dentry->d_inode)); +} + static bool mnt_ns_loop(struct dentry *dentry) { /* Could bind mounting the mount namespace inode cause a * mount namespace loop? */ - struct mnt_namespace *mnt_ns; - if (!is_mnt_ns_file(dentry)) + struct mnt_namespace *mnt_ns = mnt_ns_from_dentry(dentry); + + if (!mnt_ns) return false; - mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)); return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; } @@ -2287,6 +2375,28 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) return false; } +/* + * Check that there aren't references to earlier/same mount namespaces in the + * specified subtree. Such references can act as pins for mount namespaces + * that aren't checked by the mount-cycle checking code, thereby allowing + * cycles to be made. + */ +static bool check_for_nsfs_mounts(struct mount *subtree) +{ + struct mount *p; + bool ret = false; + + lock_mount_hash(); + for (p = subtree; p; p = next_mnt(p, subtree)) + if (mnt_ns_loop(p->mnt.mnt_root)) + goto out; + + ret = true; +out: + unlock_mount_hash(); + return ret; +} + /** * clone_private_mount - create a private clone of a path * @path: path to clone @@ -2295,6 +2405,8 @@ bool has_locked_children(struct mount *mnt, struct dentry *dentry) * will not be attached anywhere in the namespace and will be private (i.e. * changes to the originating mount won't be propagated into this). * + * This assumes caller has called or done the equivalent of may_mount(). + * * Release with mntput(). */ struct vfsmount *clone_private_mount(const struct path *path) @@ -2302,30 +2414,36 @@ struct vfsmount *clone_private_mount(const struct path *path) struct mount *old_mnt = real_mount(path->mnt); struct mount *new_mnt; - down_read(&namespace_sem); + scoped_guard(rwsem_read, &namespace_sem) if (IS_MNT_UNBINDABLE(old_mnt)) - goto invalid; + return ERR_PTR(-EINVAL); + + if (mnt_has_parent(old_mnt)) { + if (!check_mnt(old_mnt)) + return ERR_PTR(-EINVAL); + } else { + if (!is_mounted(&old_mnt->mnt)) + return ERR_PTR(-EINVAL); - if (!check_mnt(old_mnt)) - goto invalid; + /* Make sure this isn't something purely kernel internal. */ + if (!is_anon_ns(old_mnt->mnt_ns)) + return ERR_PTR(-EINVAL); + + /* Make sure we don't create mount namespace loops. */ + if (!check_for_nsfs_mounts(old_mnt)) + return ERR_PTR(-EINVAL); + } if (has_locked_children(old_mnt, path->dentry)) - goto invalid; + return ERR_PTR(-EINVAL); new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE); - up_read(&namespace_sem); - if (IS_ERR(new_mnt)) - return ERR_CAST(new_mnt); + return ERR_PTR(-EINVAL); /* Longterm mount to be removed by kern_unmount*() */ new_mnt->mnt_ns = MNT_NS_INTERNAL; - return &new_mnt->mnt; - -invalid: - up_read(&namespace_sem); - return ERR_PTR(-EINVAL); } EXPORT_SYMBOL_GPL(clone_private_mount); @@ -2547,6 +2665,7 @@ static int attach_recursive_mnt(struct mount *source_mnt, dest_mp = smp; unhash_mnt(source_mnt); attach_mnt(source_mnt, top_mnt, dest_mp, beneath); + mnt_notify_add(source_mnt); touch_mnt_namespace(source_mnt->mnt_ns); } else { if (source_mnt->mnt_ns) { @@ -2889,24 +3008,22 @@ static struct file *open_detached_copy(struct path *path, bool recursive) return file; } -SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) +static struct file *vfs_open_tree(int dfd, const char __user *filename, unsigned int flags) { - struct file *file; - struct path path; + int ret; + struct path path __free(path_put) = {}; int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; bool detached = flags & OPEN_TREE_CLONE; - int error; - int fd; BUILD_BUG_ON(OPEN_TREE_CLOEXEC != O_CLOEXEC); if (flags & ~(AT_EMPTY_PATH | AT_NO_AUTOMOUNT | AT_RECURSIVE | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC)) - return -EINVAL; + return ERR_PTR(-EINVAL); if ((flags & (AT_RECURSIVE | OPEN_TREE_CLONE)) == AT_RECURSIVE) - return -EINVAL; + return ERR_PTR(-EINVAL); if (flags & AT_NO_AUTOMOUNT) lookup_flags &= ~LOOKUP_AUTOMOUNT; @@ -2916,27 +3033,32 @@ SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, fl lookup_flags |= LOOKUP_EMPTY; if (detached && !may_mount()) - return -EPERM; + return ERR_PTR(-EPERM); + + ret = user_path_at(dfd, filename, lookup_flags, &path); + if (unlikely(ret)) + return ERR_PTR(ret); + + if (detached) + return open_detached_copy(&path, flags & AT_RECURSIVE); + + return dentry_open(&path, O_PATH, current_cred()); +} + +SYSCALL_DEFINE3(open_tree, int, dfd, const char __user *, filename, unsigned, flags) +{ + int fd; + struct file *file __free(fput) = NULL; + + file = vfs_open_tree(dfd, filename, flags); + if (IS_ERR(file)) + return PTR_ERR(file); fd = get_unused_fd_flags(flags & O_CLOEXEC); if (fd < 0) return fd; - error = user_path_at(dfd, filename, lookup_flags, &path); - if (unlikely(error)) { - file = ERR_PTR(error); - } else { - if (detached) - file = open_detached_copy(&path, flags & AT_RECURSIVE); - else - file = dentry_open(&path, O_PATH, current_cred()); - path_put(&path); - } - if (IS_ERR(file)) { - put_unused_fd(fd); - return PTR_ERR(file); - } - fd_install(fd, file); + fd_install(fd, no_free_ptr(file)); return fd; } @@ -3123,28 +3245,6 @@ static inline int tree_contains_unbindable(struct mount *mnt) return 0; } -/* - * Check that there aren't references to earlier/same mount namespaces in the - * specified subtree. Such references can act as pins for mount namespaces - * that aren't checked by the mount-cycle checking code, thereby allowing - * cycles to be made. - */ -static bool check_for_nsfs_mounts(struct mount *subtree) -{ - struct mount *p; - bool ret = false; - - lock_mount_hash(); - for (p = subtree; p; p = next_mnt(p, subtree)) - if (mnt_ns_loop(p->mnt.mnt_root)) - goto out; - - ret = true; -out: - unlock_mount_hash(); - return ret; -} - static int do_set_group(struct path *from_path, struct path *to_path) { struct mount *from, *to; @@ -4468,6 +4568,8 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, list_del_init(&new_mnt->mnt_expire); put_mountpoint(root_mp); unlock_mount_hash(); + mnt_notify_add(root_mnt); + mnt_notify_add(new_mnt); chroot_fs_refs(&root, &new); error = 0; out4: @@ -4512,11 +4614,10 @@ static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) return -EINVAL; /* - * Once a mount has been idmapped we don't allow it to change its - * mapping. It makes things simpler and callers can just create - * another bind-mount they can idmap if they want to. + * We only allow an mount to change it's idmapping if it has + * never been accessible to userspace. */ - if (is_idmapped_mnt(m)) + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE) && is_idmapped_mnt(m)) return -EPERM; /* The underlying filesystem doesn't support idmapped mounts yet. */ @@ -4576,7 +4677,7 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) break; } - if (!kattr->recurse) + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) return 0; } @@ -4606,18 +4707,16 @@ static int mount_setattr_prepare(struct mount_kattr *kattr, struct mount *mnt) static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt) { + struct mnt_idmap *old_idmap; + if (!kattr->mnt_idmap) return; - /* - * Pairs with smp_load_acquire() in mnt_idmap(). - * - * Since we only allow a mount to change the idmapping once and - * verified this in can_idmap_mount() we know that the mount has - * @nop_mnt_idmap attached to it. So there's no need to drop any - * references. - */ + old_idmap = mnt_idmap(&mnt->mnt); + + /* Pairs with smp_load_acquire() in mnt_idmap(). */ smp_store_release(&mnt->mnt.mnt_idmap, mnt_idmap_get(kattr->mnt_idmap)); + mnt_idmap_put(old_idmap); } static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) @@ -4637,7 +4736,7 @@ static void mount_setattr_commit(struct mount_kattr *kattr, struct mount *mnt) if (kattr->propagation) change_mnt_propagation(m, kattr->propagation); - if (!kattr->recurse) + if (!(kattr->kflags & MOUNT_KATTR_RECURSE)) break; } touch_mnt_namespace(mnt->mnt_ns); @@ -4667,7 +4766,7 @@ static int do_mount_setattr(struct path *path, struct mount_kattr *kattr) */ namespace_lock(); if (kattr->propagation == MS_SHARED) { - err = invent_group_ids(mnt, kattr->recurse); + err = invent_group_ids(mnt, kattr->kflags & MOUNT_KATTR_RECURSE); if (err) { namespace_unlock(); return err; @@ -4718,7 +4817,7 @@ out: } static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, - struct mount_kattr *kattr, unsigned int flags) + struct mount_kattr *kattr) { struct ns_common *ns; struct user_namespace *mnt_userns; @@ -4726,13 +4825,23 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP)) return 0; - /* - * We currently do not support clearing an idmapped mount. If this ever - * is a use-case we can revisit this but for now let's keep it simple - * and not allow it. - */ - if (attr->attr_clr & MOUNT_ATTR_IDMAP) - return -EINVAL; + if (attr->attr_clr & MOUNT_ATTR_IDMAP) { + /* + * We can only remove an idmapping if it's never been + * exposed to userspace. + */ + if (!(kattr->kflags & MOUNT_KATTR_IDMAP_REPLACE)) + return -EINVAL; + + /* + * Removal of idmappings is equivalent to setting + * nop_mnt_idmap. + */ + if (!(attr->attr_set & MOUNT_ATTR_IDMAP)) { + kattr->mnt_idmap = &nop_mnt_idmap; + return 0; + } + } if (attr->userns_fd > INT_MAX) return -EINVAL; @@ -4769,22 +4878,8 @@ static int build_mount_idmapped(const struct mount_attr *attr, size_t usize, } static int build_mount_kattr(const struct mount_attr *attr, size_t usize, - struct mount_kattr *kattr, unsigned int flags) + struct mount_kattr *kattr) { - unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; - - if (flags & AT_NO_AUTOMOUNT) - lookup_flags &= ~LOOKUP_AUTOMOUNT; - if (flags & AT_SYMLINK_NOFOLLOW) - lookup_flags &= ~LOOKUP_FOLLOW; - if (flags & AT_EMPTY_PATH) - lookup_flags |= LOOKUP_EMPTY; - - *kattr = (struct mount_kattr) { - .lookup_flags = lookup_flags, - .recurse = !!(flags & AT_RECURSIVE), - }; - if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS) return -EINVAL; if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1) @@ -4832,35 +4927,28 @@ static int build_mount_kattr(const struct mount_attr *attr, size_t usize, return -EINVAL; } - return build_mount_idmapped(attr, usize, kattr, flags); + return build_mount_idmapped(attr, usize, kattr); } static void finish_mount_kattr(struct mount_kattr *kattr) { - put_user_ns(kattr->mnt_userns); - kattr->mnt_userns = NULL; + if (kattr->mnt_userns) { + put_user_ns(kattr->mnt_userns); + kattr->mnt_userns = NULL; + } if (kattr->mnt_idmap) mnt_idmap_put(kattr->mnt_idmap); } -SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, - unsigned int, flags, struct mount_attr __user *, uattr, - size_t, usize) +static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { - int err; - struct path target; + int ret; struct mount_attr attr; - struct mount_kattr kattr; BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0); - if (flags & ~(AT_EMPTY_PATH | - AT_RECURSIVE | - AT_SYMLINK_NOFOLLOW | - AT_NO_AUTOMOUNT)) - return -EINVAL; - if (unlikely(usize > PAGE_SIZE)) return -E2BIG; if (unlikely(usize < MOUNT_ATTR_SIZE_VER0)) @@ -4869,9 +4957,9 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (!may_mount()) return -EPERM; - err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); - if (err) - return err; + ret = copy_struct_from_user(&attr, sizeof(attr), uattr, usize); + if (ret) + return ret; /* Don't bother walking through the mounts if this is a nop. */ if (attr.attr_set == 0 && @@ -4879,7 +4967,39 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, attr.propagation == 0) return 0; - err = build_mount_kattr(&attr, usize, &kattr, flags); + return build_mount_kattr(&attr, usize, kattr); +} + +SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, + unsigned int, flags, struct mount_attr __user *, uattr, + size_t, usize) +{ + int err; + struct path target; + struct mount_kattr kattr; + unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW; + + if (flags & ~(AT_EMPTY_PATH | + AT_RECURSIVE | + AT_SYMLINK_NOFOLLOW | + AT_NO_AUTOMOUNT)) + return -EINVAL; + + if (flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_EMPTY_PATH) + lookup_flags |= LOOKUP_EMPTY; + + kattr = (struct mount_kattr) { + .lookup_flags = lookup_flags, + }; + + if (flags & AT_RECURSIVE) + kattr.kflags |= MOUNT_KATTR_RECURSE; + + err = copy_mount_setattr(uattr, usize, &kattr); if (err) return err; @@ -4892,6 +5012,47 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, return err; } +SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, + unsigned, flags, struct mount_attr __user *, uattr, + size_t, usize) +{ + struct file __free(fput) *file = NULL; + int fd; + + if (!uattr && usize) + return -EINVAL; + + file = vfs_open_tree(dfd, filename, flags); + if (IS_ERR(file)) + return PTR_ERR(file); + + if (uattr) { + int ret; + struct mount_kattr kattr = {}; + + kattr.kflags = MOUNT_KATTR_IDMAP_REPLACE; + if (flags & AT_RECURSIVE) + kattr.kflags |= MOUNT_KATTR_RECURSE; + + ret = copy_mount_setattr(uattr, usize, &kattr); + if (ret) + return ret; + + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; + + finish_mount_kattr(&kattr); + } + + fd = get_unused_fd_flags(flags & O_CLOEXEC); + if (fd < 0) + return fd; + + fd_install(fd, no_free_ptr(file)); + return fd; +} + int show_path(struct seq_file *m, struct dentry *root) { if (root->d_sb->s_op->show_path) @@ -4915,6 +5076,7 @@ struct kstatmount { struct statmount __user *buf; size_t bufsize; struct vfsmount *mnt; + struct mnt_idmap *idmap; u64 mask; struct path root; struct statmount sm; @@ -5184,6 +5346,46 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq) return 0; } +static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, true); + if (ret < 0) + return ret; + + s->sm.mnt_uidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_UIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_UIDMAP; + return 0; +} + +static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) +{ + int ret; + + ret = statmount_mnt_idmap(s->idmap, seq, false); + if (ret < 0) + return ret; + + s->sm.mnt_gidmap_num = ret; + /* + * Always raise STATMOUNT_MNT_GIDMAP even if there are no valid + * mappings. This allows userspace to distinguish between a + * non-idmapped mount and an idmapped mount where none of the + * individual mappings are valid in the caller's idmapping. + */ + if (is_valid_mnt_idmap(s->idmap)) + s->sm.mask |= STATMOUNT_MNT_GIDMAP; + return 0; +} + static int statmount_string(struct kstatmount *s, u64 flag) { int ret = 0; @@ -5231,6 +5433,14 @@ static int statmount_string(struct kstatmount *s, u64 flag) offp = &sm->sb_source; ret = statmount_sb_source(s, seq); break; + case STATMOUNT_MNT_UIDMAP: + sm->mnt_uidmap = start; + ret = statmount_mnt_uidmap(s, seq); + break; + case STATMOUNT_MNT_GIDMAP: + sm->mnt_gidmap = start; + ret = statmount_mnt_gidmap(s, seq); + break; default: WARN_ON_ONCE(true); return -EINVAL; @@ -5323,6 +5533,21 @@ static int grab_requested_root(struct mnt_namespace *ns, struct path *root) return 0; } +/* This must be updated whenever a new flag is added */ +#define STATMOUNT_SUPPORTED (STATMOUNT_SB_BASIC | \ + STATMOUNT_MNT_BASIC | \ + STATMOUNT_PROPAGATE_FROM | \ + STATMOUNT_MNT_ROOT | \ + STATMOUNT_MNT_POINT | \ + STATMOUNT_FS_TYPE | \ + STATMOUNT_MNT_NS_ID | \ + STATMOUNT_MNT_OPTS | \ + STATMOUNT_FS_SUBTYPE | \ + STATMOUNT_SB_SOURCE | \ + STATMOUNT_OPT_ARRAY | \ + STATMOUNT_OPT_SEC_ARRAY | \ + STATMOUNT_SUPPORTED_MASK) + static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, struct mnt_namespace *ns) { @@ -5356,6 +5581,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, return err; s->root = root; + s->idmap = mnt_idmap(s->mnt); if (s->mask & STATMOUNT_SB_BASIC) statmount_sb_basic(s); @@ -5389,12 +5615,26 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, if (!err && s->mask & STATMOUNT_SB_SOURCE) err = statmount_string(s, STATMOUNT_SB_SOURCE); + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); + + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); + if (!err && s->mask & STATMOUNT_MNT_NS_ID) statmount_mnt_ns_id(s, ns); + if (!err && s->mask & STATMOUNT_SUPPORTED_MASK) { + s->sm.mask |= STATMOUNT_SUPPORTED_MASK; + s->sm.supported_mask = STATMOUNT_SUPPORTED; + } + if (err) return err; + /* Are there bits in the return mask not present in STATMOUNT_SUPPORTED? */ + WARN_ON_ONCE(~STATMOUNT_SUPPORTED & s->sm.mask); + return 0; } @@ -5412,7 +5652,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, struct statmount __user *buf, size_t bufsize, diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index 95646f7c46ca..6d386080faf2 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -166,6 +166,8 @@ static bool fanotify_should_merge(struct fanotify_event *old, case FANOTIFY_EVENT_TYPE_FS_ERROR: return fanotify_error_event_equal(FANOTIFY_EE(old), FANOTIFY_EE(new)); + case FANOTIFY_EVENT_TYPE_MNT: + return false; default: WARN_ON_ONCE(1); } @@ -312,7 +314,10 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group, pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n", __func__, iter_info->report_mask, event_mask, data, data_type); - if (!fid_mode) { + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (data_type != FSNOTIFY_EVENT_MNT) + return 0; + } else if (!fid_mode) { /* Do we have path to open a file descriptor? */ if (!path) return 0; @@ -557,6 +562,20 @@ static struct fanotify_event *fanotify_alloc_path_event(const struct path *path, return &pevent->fae; } +static struct fanotify_event *fanotify_alloc_mnt_event(u64 mnt_id, gfp_t gfp) +{ + struct fanotify_mnt_event *pevent; + + pevent = kmem_cache_alloc(fanotify_mnt_event_cachep, gfp); + if (!pevent) + return NULL; + + pevent->fae.type = FANOTIFY_EVENT_TYPE_MNT; + pevent->mnt_id = mnt_id; + + return &pevent->fae; +} + static struct fanotify_event *fanotify_alloc_perm_event(const void *data, int data_type, gfp_t gfp) @@ -731,6 +750,7 @@ static struct fanotify_event *fanotify_alloc_event( fid_mode); struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir); const struct path *path = fsnotify_data_path(data, data_type); + u64 mnt_id = fsnotify_data_mnt_id(data, data_type); struct mem_cgroup *old_memcg; struct dentry *moved = NULL; struct inode *child = NULL; @@ -826,8 +846,12 @@ static struct fanotify_event *fanotify_alloc_event( moved, &hash, gfp); } else if (fid_mode) { event = fanotify_alloc_fid_event(id, fsid, &hash, gfp); - } else { + } else if (path) { event = fanotify_alloc_path_event(path, &hash, gfp); + } else if (mnt_id) { + event = fanotify_alloc_mnt_event(mnt_id, gfp); + } else { + WARN_ON_ONCE(1); } if (!event) @@ -927,7 +951,7 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask, BUILD_BUG_ON(FAN_RENAME != FS_RENAME); BUILD_BUG_ON(FAN_PRE_ACCESS != FS_PRE_ACCESS); - BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 22); + BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 24); mask = fanotify_group_event_mask(group, iter_info, &match_mask, mask, data, data_type, dir); @@ -1028,6 +1052,11 @@ static void fanotify_free_error_event(struct fsnotify_group *group, mempool_free(fee, &group->fanotify_data.error_events_pool); } +static void fanotify_free_mnt_event(struct fanotify_event *event) +{ + kmem_cache_free(fanotify_mnt_event_cachep, FANOTIFY_ME(event)); +} + static void fanotify_free_event(struct fsnotify_group *group, struct fsnotify_event *fsn_event) { @@ -1054,6 +1083,9 @@ static void fanotify_free_event(struct fsnotify_group *group, case FANOTIFY_EVENT_TYPE_FS_ERROR: fanotify_free_error_event(group, event); break; + case FANOTIFY_EVENT_TYPE_MNT: + fanotify_free_mnt_event(event); + break; default: WARN_ON_ONCE(1); } diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h index c12cbc270539..b44e70e44be6 100644 --- a/fs/notify/fanotify/fanotify.h +++ b/fs/notify/fanotify/fanotify.h @@ -9,6 +9,7 @@ extern struct kmem_cache *fanotify_mark_cache; extern struct kmem_cache *fanotify_fid_event_cachep; extern struct kmem_cache *fanotify_path_event_cachep; extern struct kmem_cache *fanotify_perm_event_cachep; +extern struct kmem_cache *fanotify_mnt_event_cachep; /* Possible states of the permission event */ enum { @@ -244,6 +245,7 @@ enum fanotify_event_type { FANOTIFY_EVENT_TYPE_PATH_PERM, FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */ FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */ + FANOTIFY_EVENT_TYPE_MNT, __FANOTIFY_EVENT_TYPE_NUM }; @@ -409,12 +411,23 @@ struct fanotify_path_event { struct path path; }; +struct fanotify_mnt_event { + struct fanotify_event fae; + u64 mnt_id; +}; + static inline struct fanotify_path_event * FANOTIFY_PE(struct fanotify_event *event) { return container_of(event, struct fanotify_path_event, fae); } +static inline struct fanotify_mnt_event * +FANOTIFY_ME(struct fanotify_event *event) +{ + return container_of(event, struct fanotify_mnt_event, fae); +} + /* * Structure for permission fanotify events. It gets allocated and freed in * fanotify_handle_event() since we wait there for user response. When the @@ -466,6 +479,11 @@ static inline bool fanotify_is_error_event(u32 mask) return mask & FAN_FS_ERROR; } +static inline bool fanotify_is_mnt_event(u32 mask) +{ + return mask & (FAN_MNT_ATTACH | FAN_MNT_DETACH); +} + static inline const struct path *fanotify_event_path(struct fanotify_event *event) { if (event->type == FANOTIFY_EVENT_TYPE_PATH) diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index ba3e2d09eb44..f2d840ae4ded 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -113,6 +113,7 @@ struct kmem_cache *fanotify_mark_cache __ro_after_init; struct kmem_cache *fanotify_fid_event_cachep __ro_after_init; struct kmem_cache *fanotify_path_event_cachep __ro_after_init; struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; +struct kmem_cache *fanotify_mnt_event_cachep __ro_after_init; #define FANOTIFY_EVENT_ALIGN 4 #define FANOTIFY_FID_INFO_HDR_LEN \ @@ -123,6 +124,8 @@ struct kmem_cache *fanotify_perm_event_cachep __ro_after_init; (sizeof(struct fanotify_event_info_error)) #define FANOTIFY_RANGE_INFO_LEN \ (sizeof(struct fanotify_event_info_range)) +#define FANOTIFY_MNT_INFO_LEN \ + (sizeof(struct fanotify_event_info_mnt)) static int fanotify_fid_info_len(int fh_len, int name_len) { @@ -178,6 +181,8 @@ static size_t fanotify_event_len(unsigned int info_mode, fh_len = fanotify_event_object_fh_len(event); event_len += fanotify_fid_info_len(fh_len, dot_len); } + if (fanotify_is_mnt_event(event->mask)) + event_len += FANOTIFY_MNT_INFO_LEN; if (info_mode & FAN_REPORT_PIDFD) event_len += FANOTIFY_PIDFD_INFO_LEN; @@ -405,6 +410,25 @@ static int process_access_response(struct fsnotify_group *group, return -ENOENT; } +static size_t copy_mnt_info_to_user(struct fanotify_event *event, + char __user *buf, int count) +{ + struct fanotify_event_info_mnt info = { }; + + info.hdr.info_type = FAN_EVENT_INFO_TYPE_MNT; + info.hdr.len = FANOTIFY_MNT_INFO_LEN; + + if (WARN_ON(count < info.hdr.len)) + return -EFAULT; + + info.mnt_id = FANOTIFY_ME(event)->mnt_id; + + if (copy_to_user(buf, &info, sizeof(info))) + return -EFAULT; + + return info.hdr.len; +} + static size_t copy_error_info_to_user(struct fanotify_event *event, char __user *buf, int count) { @@ -700,6 +724,15 @@ static int copy_info_records_to_user(struct fanotify_event *event, total_bytes += ret; } + if (fanotify_is_mnt_event(event->mask)) { + ret = copy_mnt_info_to_user(event, buf, count); + if (ret < 0) + return ret; + buf += ret; + count -= ret; + total_bytes += ret; + } + return total_bytes; } @@ -1508,6 +1541,14 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, unsigned int, event_f_flags) if ((flags & FAN_REPORT_PIDFD) && (flags & FAN_REPORT_TID)) return -EINVAL; + /* Don't allow mixing mnt events with inode events for now */ + if (flags & FAN_REPORT_MNT) { + if (class != FAN_CLASS_NOTIF) + return -EINVAL; + if (flags & (FANOTIFY_FID_BITS | FAN_REPORT_FD_ERROR)) + return -EINVAL; + } + if (event_f_flags & ~FANOTIFY_INIT_ALL_EVENT_F_BITS) return -EINVAL; @@ -1767,7 +1808,6 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, int dfd, const char __user *pathname) { struct inode *inode = NULL; - struct vfsmount *mnt = NULL; struct fsnotify_group *group; struct path path; struct fan_fsid __fsid, *fsid = NULL; @@ -1776,7 +1816,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, unsigned int mark_cmd = flags & FANOTIFY_MARK_CMD_BITS; unsigned int ignore = flags & FANOTIFY_MARK_IGNORE_BITS; unsigned int obj_type, fid_mode; - void *obj; + void *obj = NULL; u32 umask = 0; int ret; @@ -1800,6 +1840,9 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, case FAN_MARK_FILESYSTEM: obj_type = FSNOTIFY_OBJ_TYPE_SB; break; + case FAN_MARK_MNTNS: + obj_type = FSNOTIFY_OBJ_TYPE_MNTNS; + break; default: return -EINVAL; } @@ -1847,6 +1890,19 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, return -EINVAL; group = fd_file(f)->private_data; + /* Only report mount events on mnt namespace */ + if (FAN_GROUP_FLAG(group, FAN_REPORT_MNT)) { + if (mask & ~FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type != FAN_MARK_MNTNS) + return -EINVAL; + } else { + if (mask & FANOTIFY_MOUNT_EVENTS) + return -EINVAL; + if (mark_type == FAN_MARK_MNTNS) + return -EINVAL; + } + /* * An unprivileged user is not allowed to setup mount nor filesystem * marks. This also includes setting up such marks by a group that @@ -1888,7 +1944,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, * point. */ fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS); - if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_EVENT_FLAGS) && + if (mask & ~(FANOTIFY_FD_EVENTS|FANOTIFY_MOUNT_EVENTS|FANOTIFY_EVENT_FLAGS) && (!fid_mode || mark_type == FAN_MARK_MOUNT)) return -EINVAL; @@ -1938,17 +1994,21 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* inode held in place by reference to path; group by fget on fd */ - if (mark_type == FAN_MARK_INODE) { + if (obj_type == FSNOTIFY_OBJ_TYPE_INODE) { inode = path.dentry->d_inode; obj = inode; - } else { - mnt = path.mnt; - if (mark_type == FAN_MARK_MOUNT) - obj = mnt; - else - obj = mnt->mnt_sb; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_VFSMOUNT) { + obj = path.mnt; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_SB) { + obj = path.mnt->mnt_sb; + } else if (obj_type == FSNOTIFY_OBJ_TYPE_MNTNS) { + obj = mnt_ns_from_dentry(path.dentry); } + ret = -EINVAL; + if (!obj) + goto path_put_and_out; + /* * If some other task has this inode open for write we should not add * an ignore mask, unless that ignore mask is supposed to survive @@ -1956,10 +2016,10 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, */ if (mark_cmd == FAN_MARK_ADD && (flags & FANOTIFY_MARK_IGNORE_BITS) && !(flags & FAN_MARK_IGNORED_SURV_MODIFY)) { - ret = mnt ? -EINVAL : -EISDIR; + ret = !inode ? -EINVAL : -EISDIR; /* FAN_MARK_IGNORE requires SURV_MODIFY for sb/mount/dir marks */ if (ignore == FAN_MARK_IGNORE && - (mnt || S_ISDIR(inode->i_mode))) + (!inode || S_ISDIR(inode->i_mode))) goto path_put_and_out; ret = 0; @@ -1968,7 +2028,7 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask, } /* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */ - if (mnt || !S_ISDIR(inode->i_mode)) { + if (!inode || !S_ISDIR(inode->i_mode)) { mask &= ~FAN_EVENT_ON_CHILD; umask = FAN_EVENT_ON_CHILD; /* @@ -2042,7 +2102,7 @@ static int __init fanotify_user_setup(void) FANOTIFY_DEFAULT_MAX_USER_MARKS); BUILD_BUG_ON(FANOTIFY_INIT_FLAGS & FANOTIFY_INTERNAL_GROUP_FLAGS); - BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 13); + BUILD_BUG_ON(HWEIGHT32(FANOTIFY_INIT_FLAGS) != 14); BUILD_BUG_ON(HWEIGHT32(FANOTIFY_MARK_FLAGS) != 11); fanotify_mark_cache = KMEM_CACHE(fanotify_mark, @@ -2055,6 +2115,7 @@ static int __init fanotify_user_setup(void) fanotify_perm_event_cachep = KMEM_CACHE(fanotify_perm_event, SLAB_PANIC); } + fanotify_mnt_event_cachep = KMEM_CACHE(fanotify_mnt_event, SLAB_PANIC); fanotify_max_queued_events = FANOTIFY_DEFAULT_MAX_EVENTS; init_user_ns.ucount_max[UCOUNT_FANOTIFY_GROUPS] = diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c index e933f9c65d90..1161eabf11ee 100644 --- a/fs/notify/fdinfo.c +++ b/fs/notify/fdinfo.c @@ -121,6 +121,11 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark) seq_printf(m, "fanotify sdev:%x mflags:%x mask:%x ignored_mask:%x\n", sb->s_dev, mflags, mark->mask, mark->ignore_mask); + } else if (mark->connector->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + struct mnt_namespace *mnt_ns = fsnotify_conn_mntns(mark->connector); + + seq_printf(m, "fanotify mnt_ns:%u mflags:%x mask:%x ignored_mask:%x\n", + mnt_ns->ns.inum, mflags, mark->mask, mark->ignore_mask); } } diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c index fae1b6d397ea..e2b4f17a48bb 100644 --- a/fs/notify/fsnotify.c +++ b/fs/notify/fsnotify.c @@ -28,6 +28,11 @@ void __fsnotify_vfsmount_delete(struct vfsmount *mnt) fsnotify_clear_marks_by_mount(mnt); } +void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + fsnotify_clear_marks_by_mntns(mntns); +} + /** * fsnotify_unmount_inodes - an sb is unmounting. handle any watched inodes. * @sb: superblock being unmounted. @@ -420,7 +425,7 @@ static int send_to_group(__u32 mask, const void *data, int data_type, file_name, cookie, iter_info); } -static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector **connp) +static struct fsnotify_mark *fsnotify_first_mark(struct fsnotify_mark_connector *const *connp) { struct fsnotify_mark_connector *conn; struct hlist_node *node = NULL; @@ -538,14 +543,15 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, { const struct path *path = fsnotify_data_path(data, data_type); struct super_block *sb = fsnotify_data_sb(data, data_type); - struct fsnotify_sb_info *sbinfo = fsnotify_sb_info(sb); + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + struct fsnotify_sb_info *sbinfo = sb ? fsnotify_sb_info(sb) : NULL; struct fsnotify_iter_info iter_info = {}; struct mount *mnt = NULL; struct inode *inode2 = NULL; struct dentry *moved; int inode2_type; int ret = 0; - __u32 test_mask, marks_mask; + __u32 test_mask, marks_mask = 0; if (path) mnt = real_mount(path->mnt); @@ -578,17 +584,20 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, if ((!sbinfo || !sbinfo->sb_marks) && (!mnt || !mnt->mnt_fsnotify_marks) && (!inode || !inode->i_fsnotify_marks) && - (!inode2 || !inode2->i_fsnotify_marks)) + (!inode2 || !inode2->i_fsnotify_marks) && + (!mnt_data || !mnt_data->ns->n_fsnotify_marks)) return 0; - marks_mask = READ_ONCE(sb->s_fsnotify_mask); + if (sb) + marks_mask |= READ_ONCE(sb->s_fsnotify_mask); if (mnt) marks_mask |= READ_ONCE(mnt->mnt_fsnotify_mask); if (inode) marks_mask |= READ_ONCE(inode->i_fsnotify_mask); if (inode2) marks_mask |= READ_ONCE(inode2->i_fsnotify_mask); - + if (mnt_data) + marks_mask |= READ_ONCE(mnt_data->ns->n_fsnotify_mask); /* * If this is a modify event we may need to clear some ignore masks. @@ -618,6 +627,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir, iter_info.marks[inode2_type] = fsnotify_first_mark(&inode2->i_fsnotify_marks); } + if (mnt_data) { + iter_info.marks[FSNOTIFY_ITER_TYPE_MNTNS] = + fsnotify_first_mark(&mnt_data->ns->n_fsnotify_marks); + } /* * We need to merge inode/vfsmount/sb mark lists so that e.g. inode mark @@ -708,11 +721,31 @@ void file_set_fsnotify_mode_from_watchers(struct file *file) } #endif +void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{ + struct fsnotify_mnt data = { + .ns = ns, + .mnt_id = real_mount(mnt)->mnt_id_unique, + }; + + if (WARN_ON_ONCE(!ns)) + return; + + /* + * This is an optimization as well as making sure fsnotify_init() has + * been called. + */ + if (!ns->n_fsnotify_marks) + return; + + fsnotify(mask, &data, FSNOTIFY_EVENT_MNT, NULL, NULL, NULL, 0); +} + static __init int fsnotify_init(void) { int ret; - BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 24); + BUILD_BUG_ON(HWEIGHT32(ALL_FSNOTIFY_BITS) != 26); ret = init_srcu_struct(&fsnotify_mark_srcu); if (ret) diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h index 663759ed6fbc..5950c7a67f41 100644 --- a/fs/notify/fsnotify.h +++ b/fs/notify/fsnotify.h @@ -33,6 +33,12 @@ static inline struct super_block *fsnotify_conn_sb( return conn->obj; } +static inline struct mnt_namespace *fsnotify_conn_mntns( + struct fsnotify_mark_connector *conn) +{ + return conn->obj; +} + static inline struct super_block *fsnotify_object_sb(void *obj, enum fsnotify_obj_type obj_type) { @@ -89,6 +95,11 @@ static inline void fsnotify_clear_marks_by_sb(struct super_block *sb) fsnotify_destroy_marks(fsnotify_sb_marks(sb)); } +static inline void fsnotify_clear_marks_by_mntns(struct mnt_namespace *mntns) +{ + fsnotify_destroy_marks(&mntns->n_fsnotify_marks); +} + /* * update the dentry->d_flags of all of inode's children to indicate if inode cares * about events that happen to its children. diff --git a/fs/notify/mark.c b/fs/notify/mark.c index 4981439e6209..798340db69d7 100644 --- a/fs/notify/mark.c +++ b/fs/notify/mark.c @@ -107,6 +107,8 @@ static fsnotify_connp_t *fsnotify_object_connp(void *obj, return &real_mount(obj)->mnt_fsnotify_marks; case FSNOTIFY_OBJ_TYPE_SB: return fsnotify_sb_marks(obj); + case FSNOTIFY_OBJ_TYPE_MNTNS: + return &((struct mnt_namespace *)obj)->n_fsnotify_marks; default: return NULL; } @@ -120,6 +122,8 @@ static __u32 *fsnotify_conn_mask_p(struct fsnotify_mark_connector *conn) return &fsnotify_conn_mount(conn)->mnt_fsnotify_mask; else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) return &fsnotify_conn_sb(conn)->s_fsnotify_mask; + else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) + return &fsnotify_conn_mntns(conn)->n_fsnotify_mask; return NULL; } @@ -346,12 +350,15 @@ static void *fsnotify_detach_connector_from_object( fsnotify_conn_mount(conn)->mnt_fsnotify_mask = 0; } else if (conn->type == FSNOTIFY_OBJ_TYPE_SB) { fsnotify_conn_sb(conn)->s_fsnotify_mask = 0; + } else if (conn->type == FSNOTIFY_OBJ_TYPE_MNTNS) { + fsnotify_conn_mntns(conn)->n_fsnotify_mask = 0; } rcu_assign_pointer(*connp, NULL); conn->obj = NULL; conn->type = FSNOTIFY_OBJ_TYPE_DETACHED; - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); return inode; } @@ -724,7 +731,7 @@ static int fsnotify_add_mark_list(struct fsnotify_mark *mark, void *obj, * Attach the sb info before attaching a connector to any object on sb. * The sb info will remain attached as long as sb lives. */ - if (!fsnotify_sb_info(sb)) { + if (sb && !fsnotify_sb_info(sb)) { err = fsnotify_attach_info_to_sb(sb); if (err) return err; @@ -770,7 +777,8 @@ restart: /* mark should be the last entry. last is the current last entry */ hlist_add_behind_rcu(&mark->obj_list, &last->obj_list); added: - fsnotify_update_sb_watchers(sb, conn); + if (sb) + fsnotify_update_sb_watchers(sb, conn); /* * Since connector is attached to object using cmpxchg() we are * guaranteed that connector initialization is fully visible by anyone diff --git a/fs/pnode.c b/fs/pnode.c index ef048f008bdd..82d809c785ec 100644 --- a/fs/pnode.c +++ b/fs/pnode.c @@ -549,8 +549,10 @@ static void restore_mounts(struct list_head *to_restore) mp = parent->mnt_mp; parent = parent->mnt_parent; } - if (parent != mnt->mnt_parent) + if (parent != mnt->mnt_parent) { mnt_change_mountpoint(parent, mp, mnt); + mnt_notify_add(mnt); + } } } diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h index 78f660ebc318..3c817dc6292e 100644 --- a/include/linux/fanotify.h +++ b/include/linux/fanotify.h @@ -25,7 +25,7 @@ #define FANOTIFY_FID_BITS (FAN_REPORT_DFID_NAME_TARGET) -#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD) +#define FANOTIFY_INFO_MODES (FANOTIFY_FID_BITS | FAN_REPORT_PIDFD | FAN_REPORT_MNT) /* * fanotify_init() flags that require CAP_SYS_ADMIN. @@ -38,7 +38,8 @@ FAN_REPORT_PIDFD | \ FAN_REPORT_FD_ERROR | \ FAN_UNLIMITED_QUEUE | \ - FAN_UNLIMITED_MARKS) + FAN_UNLIMITED_MARKS | \ + FAN_REPORT_MNT) /* * fanotify_init() flags that are allowed for user without CAP_SYS_ADMIN. @@ -58,7 +59,7 @@ #define FANOTIFY_INTERNAL_GROUP_FLAGS (FANOTIFY_UNPRIV) #define FANOTIFY_MARK_TYPE_BITS (FAN_MARK_INODE | FAN_MARK_MOUNT | \ - FAN_MARK_FILESYSTEM) + FAN_MARK_FILESYSTEM | FAN_MARK_MNTNS) #define FANOTIFY_MARK_CMD_BITS (FAN_MARK_ADD | FAN_MARK_REMOVE | \ FAN_MARK_FLUSH) @@ -109,10 +110,13 @@ /* Events that can only be reported with data type FSNOTIFY_EVENT_ERROR */ #define FANOTIFY_ERROR_EVENTS (FAN_FS_ERROR) +#define FANOTIFY_MOUNT_EVENTS (FAN_MNT_ATTACH | FAN_MNT_DETACH) + /* Events that user can request to be notified on */ #define FANOTIFY_EVENTS (FANOTIFY_PATH_EVENTS | \ FANOTIFY_INODE_EVENTS | \ - FANOTIFY_ERROR_EVENTS) + FANOTIFY_ERROR_EVENTS | \ + FANOTIFY_MOUNT_EVENTS) /* Extra flags that may be reported with event or control handling of events */ #define FANOTIFY_EVENT_FLAGS (FAN_EVENT_ON_CHILD | FAN_ONDIR) diff --git a/include/linux/fsnotify.h b/include/linux/fsnotify.h index 83d3ac97f826..454d8e466958 100644 --- a/include/linux/fsnotify.h +++ b/include/linux/fsnotify.h @@ -320,6 +320,11 @@ static inline void fsnotify_vfsmount_delete(struct vfsmount *mnt) __fsnotify_vfsmount_delete(mnt); } +static inline void fsnotify_mntns_delete(struct mnt_namespace *mntns) +{ + __fsnotify_mntns_delete(mntns); +} + /* * fsnotify_inoderemove - an inode is going away */ @@ -528,4 +533,19 @@ static inline int fsnotify_sb_error(struct super_block *sb, struct inode *inode, NULL, NULL, NULL, 0); } +static inline void fsnotify_mnt_attach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_ATTACH, ns, mnt); +} + +static inline void fsnotify_mnt_detach(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_DETACH, ns, mnt); +} + +static inline void fsnotify_mnt_move(struct mnt_namespace *ns, struct vfsmount *mnt) +{ + fsnotify_mnt(FS_MNT_MOVE, ns, mnt); +} + #endif /* _LINUX_FS_NOTIFY_H */ diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 0d24a21a8e60..6cd8d1d28b8b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -59,6 +59,10 @@ #define FS_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FS_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FS_MNT_DETACH 0x02000000 /* Mount was detached */ +#define FS_MNT_MOVE (FS_MNT_ATTACH | FS_MNT_DETACH) + /* * Set on inode mark that cares about things that happen to its children. * Always set for dnotify and inotify. @@ -80,6 +84,9 @@ */ #define ALL_FSNOTIFY_DIRENT_EVENTS (FS_CREATE | FS_DELETE | FS_MOVE | FS_RENAME) +/* Mount namespace events */ +#define FSNOTIFY_MNT_EVENTS (FS_MNT_ATTACH | FS_MNT_DETACH) + /* Content events can be used to inspect file content */ #define FSNOTIFY_CONTENT_PERM_EVENTS (FS_OPEN_PERM | FS_OPEN_EXEC_PERM | \ FS_ACCESS_PERM) @@ -108,6 +115,7 @@ /* Events that can be reported to backends */ #define ALL_FSNOTIFY_EVENTS (ALL_FSNOTIFY_DIRENT_EVENTS | \ + FSNOTIFY_MNT_EVENTS | \ FS_EVENTS_POSS_ON_CHILD | \ FS_DELETE_SELF | FS_MOVE_SELF | \ FS_UNMOUNT | FS_Q_OVERFLOW | FS_IN_IGNORED | \ @@ -298,6 +306,7 @@ enum fsnotify_data_type { FSNOTIFY_EVENT_PATH, FSNOTIFY_EVENT_INODE, FSNOTIFY_EVENT_DENTRY, + FSNOTIFY_EVENT_MNT, FSNOTIFY_EVENT_ERROR, }; @@ -318,6 +327,11 @@ static inline const struct path *file_range_path(const struct file_range *range) return range->path; } +struct fsnotify_mnt { + const struct mnt_namespace *ns; + u64 mnt_id; +}; + static inline struct inode *fsnotify_data_inode(const void *data, int data_type) { switch (data_type) { @@ -383,6 +397,24 @@ static inline struct super_block *fsnotify_data_sb(const void *data, } } +static inline const struct fsnotify_mnt *fsnotify_data_mnt(const void *data, + int data_type) +{ + switch (data_type) { + case FSNOTIFY_EVENT_MNT: + return data; + default: + return NULL; + } +} + +static inline u64 fsnotify_data_mnt_id(const void *data, int data_type) +{ + const struct fsnotify_mnt *mnt_data = fsnotify_data_mnt(data, data_type); + + return mnt_data ? mnt_data->mnt_id : 0; +} + static inline struct fs_error_report *fsnotify_data_error_report( const void *data, int data_type) @@ -420,6 +452,7 @@ enum fsnotify_iter_type { FSNOTIFY_ITER_TYPE_SB, FSNOTIFY_ITER_TYPE_PARENT, FSNOTIFY_ITER_TYPE_INODE2, + FSNOTIFY_ITER_TYPE_MNTNS, FSNOTIFY_ITER_TYPE_COUNT }; @@ -429,6 +462,7 @@ enum fsnotify_obj_type { FSNOTIFY_OBJ_TYPE_INODE, FSNOTIFY_OBJ_TYPE_VFSMOUNT, FSNOTIFY_OBJ_TYPE_SB, + FSNOTIFY_OBJ_TYPE_MNTNS, FSNOTIFY_OBJ_TYPE_COUNT, FSNOTIFY_OBJ_TYPE_DETACHED = FSNOTIFY_OBJ_TYPE_COUNT }; @@ -613,8 +647,10 @@ extern int __fsnotify_parent(struct dentry *dentry, __u32 mask, const void *data extern void __fsnotify_inode_delete(struct inode *inode); extern void __fsnotify_vfsmount_delete(struct vfsmount *mnt); extern void fsnotify_sb_delete(struct super_block *sb); +extern void __fsnotify_mntns_delete(struct mnt_namespace *mntns); extern void fsnotify_sb_free(struct super_block *sb); extern u32 fsnotify_get_cookie(void); +extern void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt); static inline __u32 fsnotify_parent_needed_mask(__u32 mask) { @@ -928,6 +964,9 @@ static inline void __fsnotify_vfsmount_delete(struct vfsmount *mnt) static inline void fsnotify_sb_delete(struct super_block *sb) {} +static inline void __fsnotify_mntns_delete(struct mnt_namespace *mntns) +{} + static inline void fsnotify_sb_free(struct super_block *sb) {} @@ -942,6 +981,9 @@ static inline u32 fsnotify_get_cookie(void) static inline void fsnotify_unmount_inodes(struct super_block *sb) {} +static inline void fsnotify_mnt(__u32 mask, struct mnt_namespace *ns, struct vfsmount *mnt) +{} + #endif /* CONFIG_FSNOTIFY */ #endif /* __KERNEL __ */ diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h index b1b219bc3422..e71a6070a8f8 100644 --- a/include/linux/mnt_idmapping.h +++ b/include/linux/mnt_idmapping.h @@ -25,6 +25,11 @@ static_assert(sizeof(vfsgid_t) == sizeof(kgid_t)); static_assert(offsetof(vfsuid_t, val) == offsetof(kuid_t, val)); static_assert(offsetof(vfsgid_t, val) == offsetof(kgid_t, val)); +static inline bool is_valid_mnt_idmap(const struct mnt_idmap *idmap) +{ + return idmap != &nop_mnt_idmap && idmap != &invalid_mnt_idmap; +} + #ifdef CONFIG_MULTIUSER static inline uid_t __vfsuid_val(vfsuid_t uid) { diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index bae4490c1dda..e5603cc91963 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -951,6 +951,10 @@ asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, asmlinkage long sys_rseq(struct rseq __user *rseq, uint32_t rseq_len, int flags, uint32_t sig); asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags); +asmlinkage long sys_open_tree_attr(int dfd, const char __user *path, + unsigned flags, + struct mount_attr __user *uattr, + size_t usize); asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path, int to_dfd, const char __user *to_path, unsigned int ms_flags); diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h index f85ec5613721..2dc767e08f54 100644 --- a/include/linux/uidgid.h +++ b/include/linux/uidgid.h @@ -132,6 +132,7 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid) u32 map_id_down(struct uid_gid_map *map, u32 id); u32 map_id_up(struct uid_gid_map *map, u32 id); +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count); #else @@ -186,6 +187,11 @@ static inline u32 map_id_down(struct uid_gid_map *map, u32 id) return id; } +static inline u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) +{ + return id; +} + static inline u32 map_id_up(struct uid_gid_map *map, u32 id) { return id; diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h index bd8167979707..e710967c7c26 100644 --- a/include/uapi/linux/fanotify.h +++ b/include/uapi/linux/fanotify.h @@ -28,6 +28,8 @@ /* #define FAN_DIR_MODIFY 0x00080000 */ /* Deprecated (reserved) */ #define FAN_PRE_ACCESS 0x00100000 /* Pre-content access hook */ +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ #define FAN_EVENT_ON_CHILD 0x08000000 /* Interested in child events */ @@ -64,6 +66,7 @@ #define FAN_REPORT_NAME 0x00000800 /* Report events with name */ #define FAN_REPORT_TARGET_FID 0x00001000 /* Report dirent target id */ #define FAN_REPORT_FD_ERROR 0x00002000 /* event->fd can report error */ +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ /* Convenience macro - FAN_REPORT_NAME requires FAN_REPORT_DIR_FID */ #define FAN_REPORT_DFID_NAME (FAN_REPORT_DIR_FID | FAN_REPORT_NAME) @@ -94,6 +97,7 @@ #define FAN_MARK_INODE 0x00000000 #define FAN_MARK_MOUNT 0x00000010 #define FAN_MARK_FILESYSTEM 0x00000100 +#define FAN_MARK_MNTNS 0x00000110 /* * Convenience macro - FAN_MARK_IGNORE requires FAN_MARK_IGNORED_SURV_MODIFY @@ -147,6 +151,7 @@ struct fanotify_event_metadata { #define FAN_EVENT_INFO_TYPE_PIDFD 4 #define FAN_EVENT_INFO_TYPE_ERROR 5 #define FAN_EVENT_INFO_TYPE_RANGE 6 +#define FAN_EVENT_INFO_TYPE_MNT 7 /* Special info types for FAN_RENAME */ #define FAN_EVENT_INFO_TYPE_OLD_DFID_NAME 10 @@ -200,6 +205,11 @@ struct fanotify_event_info_range { __u64 count; }; +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; + /* * User space may need to record additional information about its decision. * The extra information type records what kind of information is included. diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h index c07008816aca..7fa67c2031a5 100644 --- a/include/uapi/linux/mount.h +++ b/include/uapi/linux/mount.h @@ -179,7 +179,12 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u64 supported_mask; /* Mask flags that this kernel supports */ + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ + __u64 __spare2[43]; char str[]; /* Variable size part containing strings */ }; @@ -217,6 +222,9 @@ struct mnt_id_req { #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ +#define STATMOUNT_SUPPORTED_MASK 0x00001000U /* Want/got supported mask flags */ +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ /* * Special @mnt_id values that can be passed to listmount diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index aa0b2e47f2f2..682f40d5632d 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -238,7 +238,7 @@ EXPORT_SYMBOL(__put_user_ns); struct idmap_key { bool map_up; /* true -> id from kid; false -> kid from id */ u32 id; /* id to find */ - u32 count; /* == 0 unless used with map_id_range_down() */ + u32 count; }; /* @@ -343,16 +343,19 @@ u32 map_id_down(struct uid_gid_map *map, u32 id) * UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * -map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) +map_id_range_up_base(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { unsigned idx; - u32 first, last; + u32 first, last, id2; + + id2 = id + count - 1; /* Find the matching extent */ for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; - if (id >= first && id <= last) + if (id >= first && id <= last && + (id2 >= first && id2 <= last)) return &map->extent[idx]; } return NULL; @@ -363,28 +366,28 @@ map_id_up_base(unsigned extents, struct uid_gid_map *map, u32 id) * Can only be called if number of mappings exceeds UID_GID_MAP_MAX_BASE_EXTENTS. */ static struct uid_gid_extent * -map_id_up_max(unsigned extents, struct uid_gid_map *map, u32 id) +map_id_range_up_max(unsigned extents, struct uid_gid_map *map, u32 id, u32 count) { struct idmap_key key; key.map_up = true; - key.count = 1; + key.count = count; key.id = id; return bsearch(&key, map->reverse, extents, sizeof(struct uid_gid_extent), cmp_map_id); } -u32 map_id_up(struct uid_gid_map *map, u32 id) +u32 map_id_range_up(struct uid_gid_map *map, u32 id, u32 count) { struct uid_gid_extent *extent; unsigned extents = map->nr_extents; smp_rmb(); if (extents <= UID_GID_MAP_MAX_BASE_EXTENTS) - extent = map_id_up_base(extents, map, id); + extent = map_id_range_up_base(extents, map, id, count); else - extent = map_id_up_max(extents, map, id); + extent = map_id_range_up_max(extents, map, id, count); /* Map the id or note failure */ if (extent) @@ -395,6 +398,11 @@ u32 map_id_up(struct uid_gid_map *map, u32 id) return id; } +u32 map_id_up(struct uid_gid_map *map, u32 id) +{ + return map_id_range_up(map, id, 1); +} + /** * make_kuid - Map a user-namespace uid pair into a kuid. * @ns: User namespace that the uid is in diff --git a/samples/vfs/samples-vfs.h b/samples/vfs/samples-vfs.h index 103e1e7c4cec..498baf581b56 100644 --- a/samples/vfs/samples-vfs.h +++ b/samples/vfs/samples-vfs.h @@ -42,7 +42,11 @@ struct statmount { __u32 opt_array; /* [str] Array of nul terminated fs options */ __u32 opt_sec_num; /* Number of security options */ __u32 opt_sec_array; /* [str] Array of nul terminated security options */ - __u64 __spare2[46]; + __u32 mnt_uidmap_num; /* Number of uid mappings */ + __u32 mnt_uidmap; /* [str] Array of uid mappings */ + __u32 mnt_gidmap_num; /* Number of gid mappings */ + __u32 mnt_gidmap; /* [str] Array of gid mappings */ + __u64 __spare2[44]; char str[]; /* Variable size part containing strings */ }; @@ -158,6 +162,14 @@ struct mnt_ns_info { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #endif +#ifndef STATMOUNT_MNT_UIDMAP +#define STATMOUNT_MNT_UIDMAP 0x00002000U /* Want/got uidmap... */ +#endif + +#ifndef STATMOUNT_MNT_GIDMAP +#define STATMOUNT_MNT_GIDMAP 0x00004000U /* Want/got gidmap... */ +#endif + #ifndef MOUNT_ATTR_RDONLY #define MOUNT_ATTR_RDONLY 0x00000001 /* Mount read-only */ #endif diff --git a/samples/vfs/test-list-all-mounts.c b/samples/vfs/test-list-all-mounts.c index 1a02ea4593e3..713c174626aa 100644 --- a/samples/vfs/test-list-all-mounts.c +++ b/samples/vfs/test-list-all-mounts.c @@ -128,20 +128,43 @@ next: STATMOUNT_MNT_POINT | STATMOUNT_MNT_NS_ID | STATMOUNT_MNT_OPTS | - STATMOUNT_FS_TYPE, 0); + STATMOUNT_FS_TYPE | + STATMOUNT_MNT_UIDMAP | + STATMOUNT_MNT_GIDMAP, 0); if (!stmnt) { printf("Failed to statmount(%" PRIu64 ") in mount namespace(%" PRIu64 ")\n", (uint64_t)last_mnt_id, (uint64_t)info.mnt_ns_id); continue; } - printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n\n", + printf("mnt_id:\t\t%" PRIu64 "\nmnt_parent_id:\t%" PRIu64 "\nfs_type:\t%s\nmnt_root:\t%s\nmnt_point:\t%s\nmnt_opts:\t%s\n", (uint64_t)stmnt->mnt_id, (uint64_t)stmnt->mnt_parent_id, - stmnt->str + stmnt->fs_type, - stmnt->str + stmnt->mnt_root, - stmnt->str + stmnt->mnt_point, - stmnt->str + stmnt->mnt_opts); + (stmnt->mask & STATMOUNT_FS_TYPE) ? stmnt->str + stmnt->fs_type : "", + (stmnt->mask & STATMOUNT_MNT_ROOT) ? stmnt->str + stmnt->mnt_root : "", + (stmnt->mask & STATMOUNT_MNT_POINT) ? stmnt->str + stmnt->mnt_point : "", + (stmnt->mask & STATMOUNT_MNT_OPTS) ? stmnt->str + stmnt->mnt_opts : ""); + + if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_uidmap; + + for (size_t idx = 0; idx < stmnt->mnt_uidmap_num; idx++) { + printf("mnt_uidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + if (stmnt->mask & STATMOUNT_MNT_GIDMAP) { + const char *idmap = stmnt->str + stmnt->mnt_gidmap; + + for (size_t idx = 0; idx < stmnt->mnt_gidmap_num; idx++) { + printf("mnt_gidmap[%zu]:\t%s\n", idx, idmap); + idmap += strlen(idmap) + 1; + } + } + + printf("\n"); + free(stmnt); } } diff --git a/scripts/syscall.tbl b/scripts/syscall.tbl index ebbdb3c42e9f..580b4e246aec 100644 --- a/scripts/syscall.tbl +++ b/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 7b867dfec88b..212cdead2b52 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3395,6 +3395,9 @@ static int selinux_path_notify(const struct path *path, u64 mask, case FSNOTIFY_OBJ_TYPE_INODE: perm = FILE__WATCH; break; + case FSNOTIFY_OBJ_TYPE_MNTNS: + perm = FILE__WATCH_MOUNTNS; + break; default: return -EINVAL; } diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h index 03e82477dce9..f9b5ca92a825 100644 --- a/security/selinux/include/classmap.h +++ b/security/selinux/include/classmap.h @@ -8,7 +8,7 @@ COMMON_FILE_SOCK_PERMS, "unlink", "link", "rename", "execute", \ "quotaon", "mounton", "audit_access", "open", "execmod", \ "watch", "watch_mount", "watch_sb", "watch_with_perm", \ - "watch_reads" + "watch_reads", "watch_mountns" #define COMMON_SOCK_PERMS \ COMMON_FILE_SOCK_PERMS, "bind", "connect", "listen", "accept", \ diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 8daac70c2f9d..2ebaf5e6942e 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -35,6 +35,7 @@ TARGETS += filesystems/epoll TARGETS += filesystems/fat TARGETS += filesystems/overlayfs TARGETS += filesystems/statmount +TARGETS += filesystems/mount-notify TARGETS += firmware TARGETS += fpu TARGETS += ftrace diff --git a/tools/testing/selftests/filesystems/mount-notify/.gitignore b/tools/testing/selftests/filesystems/mount-notify/.gitignore new file mode 100644 index 000000000000..82a4846cbc4b --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/*_test diff --git a/tools/testing/selftests/filesystems/mount-notify/Makefile b/tools/testing/selftests/filesystems/mount-notify/Makefile new file mode 100644 index 000000000000..10be0227b5ae --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES) +TEST_GEN_PROGS := mount-notify_test + +include ../../lib.mk diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c new file mode 100644 index 000000000000..4a2d5c454fd1 --- /dev/null +++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c @@ -0,0 +1,516 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu> + +#define _GNU_SOURCE +#include <fcntl.h> +#include <sched.h> +#include <stdio.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <linux/fanotify.h> +#include <unistd.h> +#include <sys/fanotify.h> +#include <sys/syscall.h> + +#include "../../kselftest_harness.h" +#include "../statmount/statmount.h" + +#ifndef FAN_MNT_ATTACH +struct fanotify_event_info_mnt { + struct fanotify_event_info_header hdr; + __u64 mnt_id; +}; +#define FAN_MNT_ATTACH 0x01000000 /* Mount was attached */ +#endif + +#ifndef FAN_MNT_DETACH +#define FAN_MNT_DETACH 0x02000000 /* Mount was detached */ +#endif + +#ifndef FAN_REPORT_MNT +#define FAN_REPORT_MNT 0x00004000 /* Report mount events */ +#endif + +#ifndef FAN_MARK_MNTNS +#define FAN_MARK_MNTNS 0x00000110 +#endif + +static uint64_t get_mnt_id(struct __test_metadata *const _metadata, + const char *path) +{ + struct statx sx; + + ASSERT_EQ(statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx), 0); + ASSERT_TRUE(!!(sx.stx_mask & STATX_MNT_ID_UNIQUE)); + return sx.stx_mnt_id; +} + +static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX"; + +FIXTURE(fanotify) { + int fan_fd; + char buf[256]; + unsigned int rem; + void *next; + char root_mntpoint[sizeof(root_mntpoint_templ)]; + int orig_root; + int ns_fd; + uint64_t root_id; +}; + +FIXTURE_SETUP(fanotify) +{ + int ret; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + + self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY); + ASSERT_GE(self->ns_fd, 0); + + ASSERT_EQ(mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL), 0); + + strcpy(self->root_mntpoint, root_mntpoint_templ); + ASSERT_NE(mkdtemp(self->root_mntpoint), NULL); + + self->orig_root = open("/", O_PATH | O_CLOEXEC); + ASSERT_GE(self->orig_root, 0); + + ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0); + + ASSERT_EQ(chroot(self->root_mntpoint), 0); + + ASSERT_EQ(chdir("/"), 0); + + ASSERT_EQ(mkdir("a", 0700), 0); + + ASSERT_EQ(mkdir("b", 0700), 0); + + self->root_id = get_mnt_id(_metadata, "/"); + ASSERT_NE(self->root_id, 0); + + self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0); + ASSERT_GE(self->fan_fd, 0); + + ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS, + FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL); + ASSERT_EQ(ret, 0); + + self->rem = 0; +} + +FIXTURE_TEARDOWN(fanotify) +{ + ASSERT_EQ(self->rem, 0); + close(self->fan_fd); + + ASSERT_EQ(fchdir(self->orig_root), 0); + + ASSERT_EQ(chroot("."), 0); + + EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0); + EXPECT_EQ(chdir(self->root_mntpoint), 0); + EXPECT_EQ(chdir("/"), 0); + EXPECT_EQ(rmdir(self->root_mntpoint), 0); +} + +static uint64_t expect_notify(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t *mask) +{ + struct fanotify_event_metadata *meta; + struct fanotify_event_info_mnt *mnt; + unsigned int thislen; + + if (!self->rem) { + ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf)); + ASSERT_GT(len, 0); + + self->rem = len; + self->next = (void *) self->buf; + } + + meta = self->next; + ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem)); + + thislen = meta->event_len; + self->rem -= thislen; + self->next += thislen; + + *mask = meta->mask; + thislen -= sizeof(*meta); + + mnt = ((void *) meta) + meta->event_len - thislen; + + ASSERT_EQ(thislen, sizeof(*mnt)); + + return mnt->mnt_id; +} + +static void expect_notify_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + unsigned int n, uint64_t mask[], uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify(_metadata, self, &mask[i]); +} + +static uint64_t expect_notify_mask(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t expect_mask) +{ + uint64_t mntid, mask; + + mntid = expect_notify(_metadata, self, &mask); + ASSERT_EQ(expect_mask, mask); + + return mntid; +} + + +static void expect_notify_mask_n(struct __test_metadata *const _metadata, + FIXTURE_DATA(fanotify) *self, + uint64_t mask, unsigned int n, uint64_t mnts[]) +{ + unsigned int i; + + for (i = 0; i < n; i++) + mnts[i] = expect_notify_mask(_metadata, self, mask); +} + +static void verify_mount_ids(struct __test_metadata *const _metadata, + const uint64_t list1[], const uint64_t list2[], + size_t num) +{ + unsigned int i, j; + + // Check that neither list has any duplicates + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (i != j) { + ASSERT_NE(list1[i], list1[j]); + ASSERT_NE(list2[i], list2[j]); + } + } + } + // Check that all list1 memebers can be found in list2. Together with + // the above it means that the list1 and list2 represent the same sets. + for (i = 0; i < num; i++) { + for (j = 0; j < num; j++) { + if (list1[i] == list2[j]) + break; + } + ASSERT_NE(j, num); + } +} + +static void check_mounted(struct __test_metadata *const _metadata, + const uint64_t mnts[], size_t num) +{ + ssize_t ret; + uint64_t *list; + + list = malloc((num + 1) * sizeof(list[0])); + ASSERT_NE(list, NULL); + + ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0); + ASSERT_EQ(ret, num); + + verify_mount_ids(_metadata, mnts, list, num); + + free(list); +} + +static void setup_mount_tree(struct __test_metadata *const _metadata, + int log2_num) +{ + int ret, i; + + ret = mount("", "/", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + for (i = 0; i < log2_num; i++) { + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + } +} + +TEST_F(fanotify, bind) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + + ret = mount("/", "/", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, move) +{ + int ret; + uint64_t mnts[2] = { self->root_id }; + uint64_t move_id; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0); + ASSERT_EQ(ret, 0); + + move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH); + ASSERT_EQ(move_id, mnts[1]); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, propagate) +{ + const unsigned int log2_num = 4; + const unsigned int num = (1 << log2_num); + uint64_t mnts[num]; + + setup_mount_tree(_metadata, log2_num); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1); + + mnts[0] = self->root_id; + check_mounted(_metadata, mnts, num); + + // Cleanup + int ret; + uint64_t mnts2[num]; + ret = umount2("/", MNT_DETACH); + ASSERT_EQ(ret, 0); + + ret = mount("", "/", NULL, MS_PRIVATE, NULL); + ASSERT_EQ(ret, 0); + + mnts2[0] = self->root_id; + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1); + verify_mount_ids(_metadata, mnts, mnts2, num); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, fsmount) +{ + int ret, fs, mnt; + uint64_t mnts[2] = { self->root_id }; + + fs = fsopen("tmpfs", 0); + ASSERT_GE(fs, 0); + + ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0); + ASSERT_EQ(ret, 0); + + mnt = fsmount(fs, 0, 0); + ASSERT_GE(mnt, 0); + + close(fs); + + ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH); + ASSERT_EQ(ret, 0); + + close(mnt); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + ASSERT_NE(mnts[0], mnts[1]); + + check_mounted(_metadata, mnts, 2); + + // Cleanup + uint64_t detach_id; + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH); + ASSERT_EQ(detach_id, mnts[1]); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, reparent) +{ + uint64_t mnts[6] = { self->root_id }; + uint64_t dmnts[3]; + uint64_t masks[3]; + unsigned int i; + int ret; + + // Create setup with a[1] -> b[2] propagation + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/a", NULL, MS_SHARED, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("", "/b", NULL, MS_SLAVE, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + // Mount on a[3], which is propagated to b[4] + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3); + + check_mounted(_metadata, mnts, 5); + + // Mount on b[5], not propagated + ret = mount("/", "/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + check_mounted(_metadata, mnts, 6); + + // Umount a[3], which is propagated to b[4], but not b[5] + // This will result in b[5] "falling" on b[2] + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + expect_notify_n(_metadata, self, 3, masks, dmnts); + verify_mount_ids(_metadata, mnts + 3, dmnts, 3); + + for (i = 0; i < 3; i++) { + if (dmnts[i] == mnts[5]) { + ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH); + } else { + ASSERT_EQ(masks[i], FAN_MNT_DETACH); + } + } + + mnts[3] = mnts[5]; + check_mounted(_metadata, mnts, 4); + + // Cleanup + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + ret = umount("/b"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts); + verify_mount_ids(_metadata, mnts + 1, dmnts, 3); + + check_mounted(_metadata, mnts, 1); +} + +TEST_F(fanotify, rmdir) +{ + uint64_t mnts[3] = { self->root_id }; + int ret; + + ret = mount("/", "/a", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + ret = mount("/", "/a/b", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1); + + check_mounted(_metadata, mnts, 3); + + ret = chdir("/a"); + ASSERT_EQ(ret, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret == 0) { + chdir("/"); + unshare(CLONE_NEWNS); + mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL); + umount2("/a", MNT_DETACH); + // This triggers a detach in the other namespace + rmdir("/a"); + exit(0); + } + wait(NULL); + + expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1); + check_mounted(_metadata, mnts, 1); + + // Cleanup + ret = chdir("/"); + ASSERT_EQ(ret, 0); +} + +TEST_F(fanotify, pivot_root) +{ + uint64_t mnts[3] = { self->root_id }; + uint64_t mnts2[3]; + int ret; + + ret = mount("tmpfs", "/a", "tmpfs", 0, NULL); + ASSERT_EQ(ret, 0); + + mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + + ret = mkdir("/a/new", 0700); + ASSERT_EQ(ret, 0); + + ret = mkdir("/a/old", 0700); + ASSERT_EQ(ret, 0); + + ret = mount("/a", "/a/new", NULL, MS_BIND, NULL); + ASSERT_EQ(ret, 0); + + mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH); + check_mounted(_metadata, mnts, 3); + + ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old"); + ASSERT_EQ(ret, 0); + + expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2); + verify_mount_ids(_metadata, mnts, mnts2, 2); + check_mounted(_metadata, mnts, 3); + + // Cleanup + ret = syscall(SYS_pivot_root, "/old", "/old/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a/new"); + ASSERT_EQ(ret, 0); + + ret = umount("/a"); + ASSERT_EQ(ret, 0); + + check_mounted(_metadata, mnts, 1); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c index 1d0ae785a667..e65d95d97846 100644 --- a/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c +++ b/tools/testing/selftests/filesystems/overlayfs/set_layers_via_fds.c @@ -20,12 +20,16 @@ FIXTURE(set_layers_via_fds) { FIXTURE_SETUP(set_layers_via_fds) { ASSERT_EQ(mkdir("/set_layers_via_fds", 0755), 0); + ASSERT_EQ(mkdir("/set_layers_via_fds_tmpfs", 0755), 0); } FIXTURE_TEARDOWN(set_layers_via_fds) { umount2("/set_layers_via_fds", 0); ASSERT_EQ(rmdir("/set_layers_via_fds"), 0); + + umount2("/set_layers_via_fds_tmpfs", 0); + ASSERT_EQ(rmdir("/set_layers_via_fds_tmpfs"), 0); } TEST_F(set_layers_via_fds, set_layers_via_fds) @@ -214,4 +218,195 @@ TEST_F(set_layers_via_fds, set_500_layers_via_fds) ASSERT_EQ(close(fd_overlay), 0); } +TEST_F(set_layers_via_fds, set_500_layers_via_opath_fds) +{ + int fd_context, fd_tmpfs, fd_overlay, fd_work, fd_upper, fd_lower; + int layer_fds[500] = { [0 ... 499] = -EBADF }; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + char path[100]; + + sprintf(path, "l%d", i); + ASSERT_EQ(mkdirat(fd_tmpfs, path, 0755), 0); + layer_fds[i] = openat(fd_tmpfs, path, O_DIRECTORY | O_PATH); + ASSERT_GE(layer_fds[i], 0); + } + + ASSERT_EQ(mkdirat(fd_tmpfs, "w", 0755), 0); + fd_work = openat(fd_tmpfs, "w", O_DIRECTORY | O_PATH); + ASSERT_GE(fd_work, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + fd_upper = openat(fd_tmpfs, "u", O_DIRECTORY | O_PATH); + ASSERT_GE(fd_upper, 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "l501", 0755), 0); + fd_lower = openat(fd_tmpfs, "l501", O_DIRECTORY | O_PATH); + ASSERT_GE(fd_lower, 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/tmp", MOVE_MOUNT_F_EMPTY_PATH), 0); + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, fd_work), 0); + ASSERT_EQ(close(fd_work), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, fd_upper), 0); + ASSERT_EQ(close(fd_upper), 0); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[i]), 0); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, fd_lower), 0); + ASSERT_EQ(close(fd_lower), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); +} + +TEST_F(set_layers_via_fds, set_layers_via_detached_mount_fds) +{ + int fd_context, fd_tmpfs, fd_overlay, fd_tmp; + int layer_fds[] = { [0 ... 8] = -EBADF }; + bool layers_found[] = { [0 ... 8] = false }; + size_t len = 0; + char *line = NULL; + FILE *f_mountinfo; + + ASSERT_EQ(unshare(CLONE_NEWNS), 0); + ASSERT_EQ(sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL), 0); + + fd_context = sys_fsopen("tmpfs", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + fd_tmpfs = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_tmpfs, 0); + ASSERT_EQ(close(fd_context), 0); + + ASSERT_EQ(mkdirat(fd_tmpfs, "u", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "u/upper", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "u/work", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l3", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "l4", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d1", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d2", 0755), 0); + ASSERT_EQ(mkdirat(fd_tmpfs, "d3", 0755), 0); + + ASSERT_EQ(sys_move_mount(fd_tmpfs, "", -EBADF, "/set_layers_via_fds_tmpfs", MOVE_MOUNT_F_EMPTY_PATH), 0); + + fd_tmp = open_tree(fd_tmpfs, "u", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(fd_tmp, 0); + + layer_fds[0] = openat(fd_tmp, "upper", O_CLOEXEC | O_DIRECTORY | O_PATH); + ASSERT_GE(layer_fds[0], 0); + + layer_fds[1] = openat(fd_tmp, "work", O_CLOEXEC | O_DIRECTORY | O_PATH); + ASSERT_GE(layer_fds[1], 0); + + layer_fds[2] = open_tree(fd_tmpfs, "l1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[2], 0); + + layer_fds[3] = open_tree(fd_tmpfs, "l2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[3], 0); + + layer_fds[4] = open_tree(fd_tmpfs, "l3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[4], 0); + + layer_fds[5] = open_tree(fd_tmpfs, "l4", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[5], 0); + + layer_fds[6] = open_tree(fd_tmpfs, "d1", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[6], 0); + + layer_fds[7] = open_tree(fd_tmpfs, "d2", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[7], 0); + + layer_fds[8] = open_tree(fd_tmpfs, "d3", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC); + ASSERT_GE(layer_fds[8], 0); + + ASSERT_EQ(close(fd_tmpfs), 0); + + fd_context = sys_fsopen("overlay", 0); + ASSERT_GE(fd_context, 0); + + ASSERT_NE(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir", NULL, layer_fds[2]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "upperdir", NULL, layer_fds[0]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "workdir", NULL, layer_fds[1]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[2]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[3]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[4]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "lowerdir+", NULL, layer_fds[5]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[6]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[7]), 0); + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_FD, "datadir+", NULL, layer_fds[8]), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_SET_STRING, "metacopy", "on", 0), 0); + + ASSERT_EQ(sys_fsconfig(fd_context, FSCONFIG_CMD_CREATE, NULL, NULL, 0), 0); + + fd_overlay = sys_fsmount(fd_context, 0, 0); + ASSERT_GE(fd_overlay, 0); + + ASSERT_EQ(sys_move_mount(fd_overlay, "", -EBADF, "/set_layers_via_fds", MOVE_MOUNT_F_EMPTY_PATH), 0); + + f_mountinfo = fopen("/proc/self/mountinfo", "r"); + ASSERT_NE(f_mountinfo, NULL); + + while (getline(&line, &len, f_mountinfo) != -1) { + char *haystack = line; + + if (strstr(haystack, "workdir=/tmp/w")) + layers_found[0] = true; + if (strstr(haystack, "upperdir=/tmp/u")) + layers_found[1] = true; + if (strstr(haystack, "lowerdir+=/tmp/l1")) + layers_found[2] = true; + if (strstr(haystack, "lowerdir+=/tmp/l2")) + layers_found[3] = true; + if (strstr(haystack, "lowerdir+=/tmp/l3")) + layers_found[4] = true; + if (strstr(haystack, "lowerdir+=/tmp/l4")) + layers_found[5] = true; + if (strstr(haystack, "datadir+=/tmp/d1")) + layers_found[6] = true; + if (strstr(haystack, "datadir+=/tmp/d2")) + layers_found[7] = true; + if (strstr(haystack, "datadir+=/tmp/d3")) + layers_found[8] = true; + } + free(line); + + for (int i = 0; i < ARRAY_SIZE(layer_fds); i++) { + ASSERT_EQ(layers_found[i], true); + ASSERT_EQ(close(layer_fds[i]), 0); + } + + ASSERT_EQ(close(fd_context), 0); + ASSERT_EQ(close(fd_overlay), 0); + ASSERT_EQ(fclose(f_mountinfo), 0); +} + TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/filesystems/overlayfs/wrappers.h b/tools/testing/selftests/filesystems/overlayfs/wrappers.h index 071b95fd2ac0..c38bc48e0cfa 100644 --- a/tools/testing/selftests/filesystems/overlayfs/wrappers.h +++ b/tools/testing/selftests/filesystems/overlayfs/wrappers.h @@ -44,4 +44,21 @@ static inline int sys_move_mount(int from_dfd, const char *from_pathname, to_pathname, flags); } +#ifndef OPEN_TREE_CLONE +#define OPEN_TREE_CLONE 1 +#endif + +#ifndef OPEN_TREE_CLOEXEC +#define OPEN_TREE_CLOEXEC O_CLOEXEC +#endif + +#ifndef AT_RECURSIVE +#define AT_RECURSIVE 0x8000 +#endif + +static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags) +{ + return syscall(__NR_open_tree, dfd, filename, flags); +} + #endif diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h index f4294bab9d73..a7a5289ddae9 100644 --- a/tools/testing/selftests/filesystems/statmount/statmount.h +++ b/tools/testing/selftests/filesystems/statmount/statmount.h @@ -25,7 +25,7 @@ static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask, return syscall(__NR_statmount, &req, buf, bufsize, flags); } -static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, +static inline ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t last_mnt_id, uint64_t list[], size_t num, unsigned int flags) { |