diff options
Diffstat (limited to 'fs/namei.c')
| -rw-r--r-- | fs/namei.c | 1059 |
1 files changed, 869 insertions, 190 deletions
diff --git a/fs/namei.c b/fs/namei.c index 7377020a2cba..bf0f66f0e9b9 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -282,7 +282,7 @@ void putname(struct filename *name) return; refcnt = atomic_read(&name->refcnt); - if (refcnt != 1) { + if (unlikely(refcnt != 1)) { if (WARN_ON_ONCE(!refcnt)) return; @@ -290,7 +290,7 @@ void putname(struct filename *name) return; } - if (name->name != name->iname) { + if (unlikely(name->name != name->iname)) { __putname(name->name); kfree(name); } else @@ -540,10 +540,13 @@ static inline int do_inode_permission(struct mnt_idmap *idmap, * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. + * + * Note: lookup_inode_permission_may_exec() does not call here. If you add + * MAY_EXEC checks, adjust it. */ static int sb_permission(struct super_block *sb, struct inode *inode, int mask) { - if (unlikely(mask & MAY_WRITE)) { + if (mask & MAY_WRITE) { umode_t mode = inode->i_mode; /* Nobody gets write access to a read-only fs. */ @@ -574,7 +577,7 @@ int inode_permission(struct mnt_idmap *idmap, if (unlikely(retval)) return retval; - if (unlikely(mask & MAY_WRITE)) { + if (mask & MAY_WRITE) { /* * Nobody gets write access to an immutable file. */ @@ -602,6 +605,42 @@ int inode_permission(struct mnt_idmap *idmap, } EXPORT_SYMBOL(inode_permission); +/* + * lookup_inode_permission_may_exec - Check traversal right for given inode + * + * This is a special case routine for may_lookup() making assumptions specific + * to path traversal. Use inode_permission() if you are doing something else. + * + * Work is shaved off compared to inode_permission() as follows: + * - we know for a fact there is no MAY_WRITE to worry about + * - it is an invariant the inode is a directory + * + * Since majority of real-world traversal happens on inodes which grant it for + * everyone, we check it upfront and only resort to more expensive work if it + * fails. + * + * Filesystems which have their own ->permission hook and consequently miss out + * on IOP_FASTPERM can still get the optimization if they set IOP_FASTPERM_MAY_EXEC + * on their directory inodes. + */ +static __always_inline int lookup_inode_permission_may_exec(struct mnt_idmap *idmap, + struct inode *inode, int mask) +{ + /* Lookup already checked this to return -ENOTDIR */ + VFS_BUG_ON_INODE(!S_ISDIR(inode->i_mode), inode); + VFS_BUG_ON((mask & ~MAY_NOT_BLOCK) != 0); + + mask |= MAY_EXEC; + + if (unlikely(!(inode->i_opflags & (IOP_FASTPERM | IOP_FASTPERM_MAY_EXEC)))) + return inode_permission(idmap, inode, mask); + + if (unlikely(((inode->i_mode & 0111) != 0111) || !no_acl_inode(inode))) + return inode_permission(idmap, inode, mask); + + return security_inode_permission(inode, mask); +} + /** * path_get - get a reference to a path * @path: path to get the reference to @@ -746,7 +785,8 @@ static void leave_rcu(struct nameidata *nd) static void terminate_walk(struct nameidata *nd) { - drop_links(nd); + if (unlikely(nd->depth)) + drop_links(nd); if (!(nd->flags & LOOKUP_RCU)) { int i; path_put(&nd->path); @@ -843,7 +883,7 @@ static bool try_to_unlazy(struct nameidata *nd) BUG_ON(!(nd->flags & LOOKUP_RCU)); - if (unlikely(!legitimize_links(nd))) + if (unlikely(nd->depth && !legitimize_links(nd))) goto out1; if (unlikely(!legitimize_path(nd, &nd->path, nd->seq))) goto out; @@ -878,7 +918,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry) int res; BUG_ON(!(nd->flags & LOOKUP_RCU)); - if (unlikely(!legitimize_links(nd))) + if (unlikely(nd->depth && !legitimize_links(nd))) goto out2; res = __legitimize_mnt(nd->path.mnt, nd->m_seq); if (unlikely(res)) { @@ -951,8 +991,8 @@ static int complete_walk(struct nameidata *nd) * We don't want to zero nd->root for scoped-lookups or * externally-managed nd->root. */ - if (!(nd->state & ND_ROOT_PRESET)) - if (!(nd->flags & LOOKUP_IS_SCOPED)) + if (likely(!(nd->state & ND_ROOT_PRESET))) + if (likely(!(nd->flags & LOOKUP_IS_SCOPED))) nd->root.mnt = NULL; nd->flags &= ~LOOKUP_CACHED; if (!try_to_unlazy(nd)) @@ -1034,7 +1074,7 @@ static int nd_jump_root(struct nameidata *nd) } if (!nd->root.mnt) { int error = set_root(nd); - if (error) + if (unlikely(error)) return error; } if (nd->flags & LOOKUP_RCU) { @@ -1632,13 +1672,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry, path->dentry = dentry; if (nd->flags & LOOKUP_RCU) { unsigned int seq = nd->next_seq; + if (likely(!d_managed(dentry))) + return 0; if (likely(__follow_mount_rcu(nd, path))) return 0; // *path and nd->next_seq might've been clobbered path->mnt = nd->path.mnt; path->dentry = dentry; nd->next_seq = seq; - if (!try_to_unlazy_next(nd, dentry)) + if (unlikely(!try_to_unlazy_next(nd, dentry))) return -ECHILD; } ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags); @@ -1823,7 +1865,7 @@ again: return dentry; } -static struct dentry *lookup_slow(const struct qstr *name, +static noinline struct dentry *lookup_slow(const struct qstr *name, struct dentry *dir, unsigned int flags) { @@ -1855,7 +1897,7 @@ static inline int may_lookup(struct mnt_idmap *idmap, int err, mask; mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0; - err = inode_permission(idmap, nd->inode, mask | MAY_EXEC); + err = lookup_inode_permission_may_exec(idmap, nd->inode, mask); if (likely(!err)) return 0; @@ -1870,7 +1912,7 @@ static inline int may_lookup(struct mnt_idmap *idmap, if (err != -ECHILD) // hard error return err; - return inode_permission(idmap, nd->inode, MAY_EXEC); + return lookup_inode_permission_may_exec(idmap, nd->inode, 0); } static int reserve_stack(struct nameidata *nd, struct path *link) @@ -1901,13 +1943,23 @@ static int reserve_stack(struct nameidata *nd, struct path *link) enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4}; -static const char *pick_link(struct nameidata *nd, struct path *link, +static noinline const char *pick_link(struct nameidata *nd, struct path *link, struct inode *inode, int flags) { struct saved *last; const char *res; - int error = reserve_stack(nd, link); + int error; + if (nd->flags & LOOKUP_RCU) { + /* make sure that d_is_symlink from step_into_slowpath() matches the inode */ + if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq)) + return ERR_PTR(-ECHILD); + } else { + if (link->mnt == nd->path.mnt) + mntget(link->mnt); + } + + error = reserve_stack(nd, link); if (unlikely(error)) { if (!(nd->flags & LOOKUP_RCU)) path_put(link); @@ -1981,14 +2033,15 @@ all_done: // pure jump * * NOTE: dentry must be what nd->next_seq had been sampled from. */ -static const char *step_into(struct nameidata *nd, int flags, +static noinline const char *step_into_slowpath(struct nameidata *nd, int flags, struct dentry *dentry) { struct path path; struct inode *inode; - int err = handle_mounts(nd, dentry, &path); + int err; - if (err < 0) + err = handle_mounts(nd, dentry, &path); + if (unlikely(err < 0)) return ERR_PTR(err); inode = path.dentry->d_inode; if (likely(!d_is_symlink(path.dentry)) || @@ -2010,15 +2063,32 @@ static const char *step_into(struct nameidata *nd, int flags, nd->seq = nd->next_seq; return NULL; } - if (nd->flags & LOOKUP_RCU) { - /* make sure that d_is_symlink above matches inode */ - if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq)) + return pick_link(nd, &path, inode, flags); +} + +static __always_inline const char *step_into(struct nameidata *nd, int flags, + struct dentry *dentry) +{ + /* + * In the common case we are in rcu-walk and traversing over a non-mounted on + * directory (as opposed to e.g., a symlink). + * + * We can handle that and negative entries with the checks below. + */ + if (likely((nd->flags & LOOKUP_RCU) && + !d_managed(dentry) && !d_is_symlink(dentry))) { + struct inode *inode = dentry->d_inode; + if (read_seqcount_retry(&dentry->d_seq, nd->next_seq)) return ERR_PTR(-ECHILD); - } else { - if (path.mnt == nd->path.mnt) - mntget(path.mnt); + if (unlikely(!inode)) + return ERR_PTR(-ENOENT); + nd->path.dentry = dentry; + /* nd->path.mnt is retained on purpose */ + nd->inode = inode; + nd->seq = nd->next_seq; + return NULL; } - return pick_link(nd, &path, inode, flags); + return step_into_slowpath(nd, flags, dentry); } static struct dentry *follow_dotdot_rcu(struct nameidata *nd) @@ -2101,7 +2171,7 @@ static const char *handle_dots(struct nameidata *nd, int type) if (!nd->root.mnt) { error = ERR_PTR(set_root(nd)); - if (error) + if (unlikely(error)) return error; } if (nd->flags & LOOKUP_RCU) @@ -2131,7 +2201,7 @@ static const char *handle_dots(struct nameidata *nd, int type) return NULL; } -static const char *walk_component(struct nameidata *nd, int flags) +static __always_inline const char *walk_component(struct nameidata *nd, int flags) { struct dentry *dentry; /* @@ -2140,7 +2210,7 @@ static const char *walk_component(struct nameidata *nd, int flags) * parent relationships. */ if (unlikely(nd->last_type != LAST_NORM)) { - if (!(flags & WALK_MORE) && nd->depth) + if (unlikely(nd->depth) && !(flags & WALK_MORE)) put_link(nd); return handle_dots(nd, nd->last_type); } @@ -2152,7 +2222,7 @@ static const char *walk_component(struct nameidata *nd, int flags) if (IS_ERR(dentry)) return ERR_CAST(dentry); } - if (!(flags & WALK_MORE) && nd->depth) + if (unlikely(nd->depth) && !(flags & WALK_MORE)) put_link(nd); return step_into(nd, flags, dentry); } @@ -2505,7 +2575,7 @@ static int link_path_walk(const char *name, struct nameidata *nd) if (unlikely(!*name)) { OK: /* pathname or trailing symlink, done */ - if (!depth) { + if (likely(!depth)) { nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode); nd->dir_mode = nd->inode->i_mode; nd->flags &= ~LOOKUP_PARENT; @@ -2543,10 +2613,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags) const char *s = nd->pathname; /* LOOKUP_CACHED requires RCU, ask caller to retry */ - if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED) + if (unlikely((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)) return ERR_PTR(-EAGAIN); - if (!*s) + if (unlikely(!*s)) flags &= ~LOOKUP_RCU; if (flags & LOOKUP_RCU) rcu_read_lock(); @@ -2560,7 +2630,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount); smp_rmb(); - if (nd->state & ND_ROOT_PRESET) { + if (unlikely(nd->state & ND_ROOT_PRESET)) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; if (*s && unlikely(!d_can_lookup(root))) @@ -2579,7 +2649,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->root.mnt = NULL; /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ - if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { + if (*s == '/' && likely(!(flags & LOOKUP_IN_ROOT))) { error = nd_jump_root(nd); if (unlikely(error)) return ERR_PTR(error); @@ -2632,7 +2702,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } /* For scoped-lookups we need to set the root to the dirfd as well. */ - if (flags & LOOKUP_IS_SCOPED) { + if (unlikely(flags & LOOKUP_IS_SCOPED)) { nd->root = nd->path; if (flags & LOOKUP_RCU) { nd->root_seq = nd->seq; @@ -2765,6 +2835,62 @@ static int filename_parentat(int dfd, struct filename *name, return __filename_parentat(dfd, name, flags, parent, last, type, NULL); } +/** + * start_dirop - begin a create or remove dirop, performing locking and lookup + * @parent: the dentry of the parent in which the operation will occur + * @name: a qstr holding the name within that parent + * @lookup_flags: intent and other lookup flags. + * + * The lookup is performed and necessary locks are taken so that, on success, + * the returned dentry can be operated on safely. + * The qstr must already have the hash value calculated. + * + * Returns: a locked dentry, or an error. + * + */ +static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags, + unsigned int state) +{ + struct dentry *dentry; + struct inode *dir = d_inode(parent); + + if (state == TASK_KILLABLE) { + int ret = down_write_killable_nested(&dir->i_rwsem, + I_MUTEX_PARENT); + if (ret) + return ERR_PTR(ret); + } else { + inode_lock_nested(dir, I_MUTEX_PARENT); + } + dentry = lookup_one_qstr_excl(name, parent, lookup_flags); + if (IS_ERR(dentry)) + inode_unlock(dir); + return dentry; +} + +struct dentry *start_dirop(struct dentry *parent, struct qstr *name, + unsigned int lookup_flags) +{ + return __start_dirop(parent, name, lookup_flags, TASK_NORMAL); +} + +/** + * end_dirop - signal completion of a dirop + * @de: the dentry which was returned by start_dirop or similar. + * + * If the de is an error, nothing happens. Otherwise any lock taken to + * protect the dentry is dropped and the dentry itself is release (dput()). + */ +void end_dirop(struct dentry *de) +{ + if (!IS_ERR(de)) { + inode_unlock(de->d_parent->d_inode); + dput(de); + } +} +EXPORT_SYMBOL(end_dirop); + /* does lookup, returns the object with parent locked */ static struct dentry *__start_removing_path(int dfd, struct filename *name, struct path *path) @@ -2781,10 +2907,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name, return ERR_PTR(-EINVAL); /* don't fail immediately if it's r/o, at least try to report other errors */ error = mnt_want_write(parent_path.mnt); - inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT); - d = lookup_one_qstr_excl(&last, parent_path.dentry, 0); + d = start_dirop(parent_path.dentry, &last, 0); if (IS_ERR(d)) - goto unlock; + goto drop; if (error) goto fail; path->dentry = no_free_ptr(parent_path.dentry); @@ -2792,10 +2917,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name, return d; fail: - dput(d); + end_dirop(d); d = ERR_PTR(error); -unlock: - inode_unlock(parent_path.dentry->d_inode); +drop: if (!error) mnt_drop_write(parent_path.mnt); return d; @@ -2910,7 +3034,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt, } EXPORT_SYMBOL(vfs_path_lookup); -static int lookup_noperm_common(struct qstr *qname, struct dentry *base) +int lookup_noperm_common(struct qstr *qname, struct dentry *base) { const char *name = qname->name; u32 len = qname->len; @@ -3181,6 +3305,234 @@ struct dentry *lookup_noperm_positive_unlocked(struct qstr *name, } EXPORT_SYMBOL(lookup_noperm_positive_unlocked); +/** + * start_creating - prepare to create a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup is performed prior to creating + * an object in a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name already exists, a positive dentry is returned, so + * behaviour is similar to O_CREAT without O_EXCL, which doesn't fail + * with -EEXIST. + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, LOOKUP_CREATE); +} +EXPORT_SYMBOL(start_creating); + +/** + * start_removing - prepare to remove a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, 0); +} +EXPORT_SYMBOL(start_removing); + +/** + * start_creating_killable - prepare to create a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup in performed prior to creating + * an object in a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name already exists, a positive dentry is returned. + * + * If a signal is received or was already pending, the function aborts + * with -EINTR; + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE); +} +EXPORT_SYMBOL(start_creating_killable); + +/** + * start_removing_killable - prepare to remove a given name with permission checking + * @idmap: idmap of the mount + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. Permission checking (MAY_EXEC) is performed + * against @idmap. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * If a signal is received or was already pending, the function aborts + * with -EINTR; + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing_killable(struct mnt_idmap *idmap, + struct dentry *parent, + struct qstr *name) +{ + int err = lookup_one_common(idmap, name, parent); + + if (err) + return ERR_PTR(err); + return __start_dirop(parent, name, 0, TASK_KILLABLE); +} +EXPORT_SYMBOL(start_removing_killable); + +/** + * start_creating_noperm - prepare to create a given name without permission checking + * @parent: directory in which to prepare to create the name + * @name: the name to be created + * + * Locks are taken and a lookup in performed prior to creating + * an object in a directory. + * + * If the name already exists, a positive dentry is returned. + * + * Returns: a negative or positive dentry, or an error. + */ +struct dentry *start_creating_noperm(struct dentry *parent, + struct qstr *name) +{ + int err = lookup_noperm_common(name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, LOOKUP_CREATE); +} +EXPORT_SYMBOL(start_creating_noperm); + +/** + * start_removing_noperm - prepare to remove a given name without permission checking + * @parent: directory in which to find the name + * @name: the name to be removed + * + * Locks are taken and a lookup in performed prior to removing + * an object from a directory. + * + * If the name doesn't exist, an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: a positive dentry, or an error. + */ +struct dentry *start_removing_noperm(struct dentry *parent, + struct qstr *name) +{ + int err = lookup_noperm_common(name, parent); + + if (err) + return ERR_PTR(err); + return start_dirop(parent, name, 0); +} +EXPORT_SYMBOL(start_removing_noperm); + +/** + * start_creating_dentry - prepare to create a given dentry + * @parent: directory from which dentry should be removed + * @child: the dentry to be removed + * + * A lock is taken to protect the dentry again other dirops and + * the validity of the dentry is checked: correct parent and still hashed. + * + * If the dentry is valid and negative a reference is taken and + * returned. If not an error is returned. + * + * end_creating() should be called when creation is complete, or aborted. + * + * Returns: the valid dentry, or an error. + */ +struct dentry *start_creating_dentry(struct dentry *parent, + struct dentry *child) +{ + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + if (unlikely(IS_DEADDIR(parent->d_inode) || + child->d_parent != parent || + d_unhashed(child))) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EINVAL); + } + if (d_is_positive(child)) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EEXIST); + } + return dget(child); +} +EXPORT_SYMBOL(start_creating_dentry); + +/** + * start_removing_dentry - prepare to remove a given dentry + * @parent: directory from which dentry should be removed + * @child: the dentry to be removed + * + * A lock is taken to protect the dentry again other dirops and + * the validity of the dentry is checked: correct parent and still hashed. + * + * If the dentry is valid and positive, a reference is taken and + * returned. If not an error is returned. + * + * end_removing() should be called when removal is complete, or aborted. + * + * Returns: the valid dentry, or an error. + */ +struct dentry *start_removing_dentry(struct dentry *parent, + struct dentry *child) +{ + inode_lock_nested(parent->d_inode, I_MUTEX_PARENT); + if (unlikely(IS_DEADDIR(parent->d_inode) || + child->d_parent != parent || + d_unhashed(child))) { + inode_unlock(parent->d_inode); + return ERR_PTR(-EINVAL); + } + if (d_is_negative(child)) { + inode_unlock(parent->d_inode); + return ERR_PTR(-ENOENT); + } + return dget(child); +} +EXPORT_SYMBOL(start_removing_dentry); + #ifdef CONFIG_UNIX98_PTYS int path_pts(struct path *path) { @@ -3419,6 +3771,290 @@ void unlock_rename(struct dentry *p1, struct dentry *p2) EXPORT_SYMBOL(unlock_rename); /** + * __start_renaming - lookup and lock names for rename + * @rd: rename data containing parents and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_last: name of object in @rd.old_parent + * @new_last: name of object in @rd.new_parent + * + * Look up two names and ensure locks are in place for + * rename. + * + * On success the found dentries are stored in @rd.old_dentry, + * @rd.new_dentry and an extra ref is taken on @rd.old_parent. + * These references and the lock are dropped by end_renaming(). + * + * The passed in qstrs must have the hash calculated, and no permission + * checking is performed. + * + * Returns: zero or an error. + */ +static int +__start_renaming(struct renamedata *rd, int lookup_flags, + struct qstr *old_last, struct qstr *new_last) +{ + struct dentry *trap; + struct dentry *d1, *d2; + int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + int err; + + if (rd->flags & RENAME_EXCHANGE) + target_flags = 0; + if (rd->flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; + + trap = lock_rename(rd->old_parent, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + + d1 = lookup_one_qstr_excl(old_last, rd->old_parent, + lookup_flags); + err = PTR_ERR(d1); + if (IS_ERR(d1)) + goto out_unlock; + + d2 = lookup_one_qstr_excl(new_last, rd->new_parent, + lookup_flags | target_flags); + err = PTR_ERR(d2); + if (IS_ERR(d2)) + goto out_dput_d1; + + if (d1 == trap) { + /* source is an ancestor of target */ + err = -EINVAL; + goto out_dput_d2; + } + + if (d2 == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_dput_d2; + } + + rd->old_dentry = d1; + rd->new_dentry = d2; + dget(rd->old_parent); + return 0; + +out_dput_d2: + dput(d2); +out_dput_d1: + dput(d1); +out_unlock: + unlock_rename(rd->old_parent, rd->new_parent); + return err; +} + +/** + * start_renaming - lookup and lock names for rename with permission checking + * @rd: rename data containing parents and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_last: name of object in @rd.old_parent + * @new_last: name of object in @rd.new_parent + * + * Look up two names and ensure locks are in place for + * rename. + * + * On success the found dentries are stored in @rd.old_dentry, + * @rd.new_dentry. Also the refcount on @rd->old_parent is increased. + * These references and the lock are dropped by end_renaming(). + * + * The passed in qstrs need not have the hash calculated, and basic + * eXecute permission checking is performed against @rd.mnt_idmap. + * + * Returns: zero or an error. + */ +int start_renaming(struct renamedata *rd, int lookup_flags, + struct qstr *old_last, struct qstr *new_last) +{ + int err; + + err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent); + if (err) + return err; + err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent); + if (err) + return err; + return __start_renaming(rd, lookup_flags, old_last, new_last); +} +EXPORT_SYMBOL(start_renaming); + +static int +__start_renaming_dentry(struct renamedata *rd, int lookup_flags, + struct dentry *old_dentry, struct qstr *new_last) +{ + struct dentry *trap; + struct dentry *d2; + int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + int err; + + if (rd->flags & RENAME_EXCHANGE) + target_flags = 0; + if (rd->flags & RENAME_NOREPLACE) + target_flags |= LOOKUP_EXCL; + + /* Already have the dentry - need to be sure to lock the correct parent */ + trap = lock_rename_child(old_dentry, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + if (d_unhashed(old_dentry) || + (rd->old_parent && rd->old_parent != old_dentry->d_parent)) { + /* dentry was removed, or moved and explicit parent requested */ + err = -EINVAL; + goto out_unlock; + } + + d2 = lookup_one_qstr_excl(new_last, rd->new_parent, + lookup_flags | target_flags); + err = PTR_ERR(d2); + if (IS_ERR(d2)) + goto out_unlock; + + if (old_dentry == trap) { + /* source is an ancestor of target */ + err = -EINVAL; + goto out_dput_d2; + } + + if (d2 == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_dput_d2; + } + + rd->old_dentry = dget(old_dentry); + rd->new_dentry = d2; + rd->old_parent = dget(old_dentry->d_parent); + return 0; + +out_dput_d2: + dput(d2); +out_unlock: + unlock_rename(old_dentry->d_parent, rd->new_parent); + return err; +} + +/** + * start_renaming_dentry - lookup and lock name for rename with permission checking + * @rd: rename data containing parents and flags, and + * for receiving found dentries + * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL, + * LOOKUP_NO_SYMLINKS etc). + * @old_dentry: dentry of name to move + * @new_last: name of target in @rd.new_parent + * + * Look up target name and ensure locks are in place for + * rename. + * + * On success the found dentry is stored in @rd.new_dentry and + * @rd.old_parent is confirmed to be the parent of @old_dentry. If it + * was originally %NULL, it is set. In either case a reference is taken + * so that end_renaming() can have a stable reference to unlock. + * + * References and the lock can be dropped with end_renaming() + * + * The passed in qstr need not have the hash calculated, and basic + * eXecute permission checking is performed against @rd.mnt_idmap. + * + * Returns: zero or an error. + */ +int start_renaming_dentry(struct renamedata *rd, int lookup_flags, + struct dentry *old_dentry, struct qstr *new_last) +{ + int err; + + err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent); + if (err) + return err; + return __start_renaming_dentry(rd, lookup_flags, old_dentry, new_last); +} +EXPORT_SYMBOL(start_renaming_dentry); + +/** + * start_renaming_two_dentries - Lock to dentries in given parents for rename + * @rd: rename data containing parent + * @old_dentry: dentry of name to move + * @new_dentry: dentry to move to + * + * Ensure locks are in place for rename and check parentage is still correct. + * + * On success the two dentries are stored in @rd.old_dentry and + * @rd.new_dentry and @rd.old_parent and @rd.new_parent are confirmed to + * be the parents of the dentries. + * + * References and the lock can be dropped with end_renaming() + * + * Returns: zero or an error. + */ +int +start_renaming_two_dentries(struct renamedata *rd, + struct dentry *old_dentry, struct dentry *new_dentry) +{ + struct dentry *trap; + int err; + + /* Already have the dentry - need to be sure to lock the correct parent */ + trap = lock_rename_child(old_dentry, rd->new_parent); + if (IS_ERR(trap)) + return PTR_ERR(trap); + err = -EINVAL; + if (d_unhashed(old_dentry) || + (rd->old_parent && rd->old_parent != old_dentry->d_parent)) + /* old_dentry was removed, or moved and explicit parent requested */ + goto out_unlock; + if (d_unhashed(new_dentry) || + rd->new_parent != new_dentry->d_parent) + /* new_dentry was removed or moved */ + goto out_unlock; + + if (old_dentry == trap) + /* source is an ancestor of target */ + goto out_unlock; + + if (new_dentry == trap) { + /* target is an ancestor of source */ + if (rd->flags & RENAME_EXCHANGE) + err = -EINVAL; + else + err = -ENOTEMPTY; + goto out_unlock; + } + + err = -EEXIST; + if (d_is_positive(new_dentry) && (rd->flags & RENAME_NOREPLACE)) + goto out_unlock; + + rd->old_dentry = dget(old_dentry); + rd->new_dentry = dget(new_dentry); + rd->old_parent = dget(old_dentry->d_parent); + return 0; + +out_unlock: + unlock_rename(old_dentry->d_parent, rd->new_parent); + return err; +} +EXPORT_SYMBOL(start_renaming_two_dentries); + +void end_renaming(struct renamedata *rd) +{ + unlock_rename(rd->old_parent, rd->new_parent); + dput(rd->old_dentry); + dput(rd->new_dentry); + dput(rd->old_parent); +} +EXPORT_SYMBOL(end_renaming); + +/** * vfs_prepare_mode - prepare the mode to be used for a new inode * @idmap: idmap of the mount the inode was found from * @dir: parent directory of the new inode @@ -3461,10 +4097,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, /** * vfs_create - create new file * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory * @dentry: dentry of the child file * @mode: mode of the child file - * @want_excl: whether the file must not yet exist + * @di: returns parent inode, if the inode is delegated. * * Create a new file. * @@ -3474,9 +4109,10 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap, * On non-idmapped mounts or if permission checking is to be performed on the * raw inode simply pass @nop_mnt_idmap. */ -int vfs_create(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, bool want_excl) +int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode, + struct delegated_inode *di) { + struct inode *dir = d_inode(dentry->d_parent); int error; error = may_create(idmap, dir, dentry); @@ -3490,7 +4126,10 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir, error = security_inode_create(dir, dentry, mode); if (error) return error; - error = dir->i_op->create(idmap, dir, dentry, mode, want_excl); + error = try_break_deleg(dir, di); + if (error) + return error; + error = dir->i_op->create(idmap, dir, dentry, mode, true); if (!error) fsnotify_create(dir, dentry); return error; @@ -3697,7 +4336,7 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry, */ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, const struct open_flags *op, - bool got_write) + bool got_write, struct delegated_inode *delegated_inode) { struct mnt_idmap *idmap; struct dentry *dir = nd->path.dentry; @@ -3786,6 +4425,11 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, /* Negative dentry, just create the file */ if (!dentry->d_inode && (open_flag & O_CREAT)) { + /* but break the directory lease first! */ + error = try_break_deleg(dir_inode, delegated_inode); + if (error) + goto out_dput; + file->f_mode |= FMODE_CREATED; audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE); if (!dir_inode->i_op->create) { @@ -3848,6 +4492,7 @@ static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { + struct delegated_inode delegated_inode = { }; struct dentry *dir = nd->path.dentry; int open_flag = op->open_flag; bool got_write = false; @@ -3879,7 +4524,7 @@ static const char *open_last_lookups(struct nameidata *nd, return ERR_PTR(-ECHILD); } } - +retry: if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { got_write = !mnt_want_write(nd->path.mnt); /* @@ -3892,7 +4537,7 @@ static const char *open_last_lookups(struct nameidata *nd, inode_lock(dir->d_inode); else inode_lock_shared(dir->d_inode); - dentry = lookup_open(nd, file, op, got_write); + dentry = lookup_open(nd, file, op, got_write, &delegated_inode); if (!IS_ERR(dentry)) { if (file->f_mode & FMODE_CREATED) fsnotify_create(dir->d_inode, dentry); @@ -3907,8 +4552,16 @@ static const char *open_last_lookups(struct nameidata *nd, if (got_write) mnt_drop_write(nd->path.mnt); - if (IS_ERR(dentry)) + if (IS_ERR(dentry)) { + if (is_delegated(&delegated_inode)) { + int error = break_deleg_wait(&delegated_inode); + + if (!error) + goto retry; + return ERR_PTR(error); + } return ERR_CAST(dentry); + } if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) { dput(nd->path.dentry); @@ -4036,7 +4689,7 @@ int vfs_tmpfile(struct mnt_idmap *idmap, inode = file_inode(file); if (!(open_flag & O_EXCL)) { spin_lock(&inode->i_lock); - inode->i_state |= I_LINKABLE; + inode_state_set(inode, I_LINKABLE); spin_unlock(&inode->i_lock); } security_inode_post_create_tmpfile(idmap, inode); @@ -4223,21 +4876,18 @@ static struct dentry *filename_create(int dfd, struct filename *name, */ if (last.name[last.len] && !want_dir) create_flags &= ~LOOKUP_CREATE; - inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path->dentry, - reval_flag | create_flags); + dentry = start_dirop(path->dentry, &last, reval_flag | create_flags); if (IS_ERR(dentry)) - goto unlock; + goto out_drop_write; if (unlikely(error)) goto fail; return dentry; fail: - dput(dentry); + end_dirop(dentry); dentry = ERR_PTR(error); -unlock: - inode_unlock(path->dentry->d_inode); +out_drop_write: if (!error) mnt_drop_write(path->mnt); out: @@ -4256,11 +4906,20 @@ struct dentry *start_creating_path(int dfd, const char *pathname, } EXPORT_SYMBOL(start_creating_path); +/** + * end_creating_path - finish a code section started by start_creating_path() + * @path: the path instantiated by start_creating_path() + * @dentry: the dentry returned by start_creating_path() + * + * end_creating_path() will unlock and locks taken by start_creating_path() + * and drop an references that were taken. It should only be called + * if start_creating_path() returned a non-error. + * If vfs_mkdir() was called and it returned an error, that error *should* + * be passed to end_creating_path() together with the path. + */ void end_creating_path(const struct path *path, struct dentry *dentry) { - if (!IS_ERR(dentry)) - dput(dentry); - inode_unlock(path->dentry->d_inode); + end_creating(dentry); mnt_drop_write(path->mnt); path_put(path); } @@ -4278,13 +4937,15 @@ inline struct dentry *start_creating_user_path( } EXPORT_SYMBOL(start_creating_user_path); + /** * vfs_mknod - create device node or file - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child device node - * @mode: mode of the child device node - * @dev: device number of device to create + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child device node + * @mode: mode of the child device node + * @dev: device number of device to create + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a device node or file. * @@ -4295,7 +4956,8 @@ EXPORT_SYMBOL(start_creating_user_path); * raw inode simply pass @nop_mnt_idmap. */ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode, dev_t dev) + struct dentry *dentry, umode_t mode, dev_t dev, + struct delegated_inode *delegated_inode) { bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV; int error = may_create(idmap, dir, dentry); @@ -4319,6 +4981,10 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->mknod(idmap, dir, dentry, mode, dev); if (!error) fsnotify_create(dir, dentry); @@ -4346,6 +5012,7 @@ static int may_mknod(umode_t mode) static int do_mknodat(int dfd, struct filename *name, umode_t mode, unsigned int dev) { + struct delegated_inode di = { }; struct mnt_idmap *idmap; struct dentry *dentry; struct path path; @@ -4369,22 +5036,26 @@ retry: idmap = mnt_idmap(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: - error = vfs_create(idmap, path.dentry->d_inode, - dentry, mode, true); + error = vfs_create(idmap, dentry, mode, &di); if (!error) security_path_post_mknod(idmap, dentry); break; case S_IFCHR: case S_IFBLK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, new_decode_dev(dev)); + dentry, mode, new_decode_dev(dev), &di); break; case S_IFIFO: case S_IFSOCK: error = vfs_mknod(idmap, path.dentry->d_inode, - dentry, mode, 0); + dentry, mode, 0, &di); break; } out2: end_creating_path(&path, dentry); + if (is_delegated(&di)) { + error = break_deleg_wait(&di); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; @@ -4407,10 +5078,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d /** * vfs_mkdir - create directory returning correct dentry if possible - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory - * @mode: mode of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @mode: mode of the child directory + * @delegated_inode: returns parent inode, if the inode is delegated. * * Create a directory. * @@ -4427,7 +5099,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d * In case of an error the dentry is dput() and an ERR_PTR() is returned. */ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, umode_t mode) + struct dentry *dentry, umode_t mode, + struct delegated_inode *delegated_inode) { int error; unsigned max_links = dir->i_sb->s_max_links; @@ -4450,6 +5123,10 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, if (max_links && dir->i_nlink >= max_links) goto err; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto err; + de = dir->i_op->mkdir(idmap, dir, dentry, mode); error = PTR_ERR(de); if (IS_ERR(de)) @@ -4462,7 +5139,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, return dentry; err: - dput(dentry); + end_creating(dentry); return ERR_PTR(error); } EXPORT_SYMBOL(vfs_mkdir); @@ -4473,6 +5150,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode) struct path path; int error; unsigned int lookup_flags = LOOKUP_DIRECTORY; + struct delegated_inode delegated_inode = { }; retry: dentry = filename_create(dfd, name, &path, lookup_flags); @@ -4484,11 +5162,16 @@ retry: mode_strip_umask(path.dentry->d_inode, mode)); if (!error) { dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, mode); + dentry, mode, &delegated_inode); if (IS_ERR(dentry)) error = PTR_ERR(dentry); } end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; @@ -4510,9 +5193,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) /** * vfs_rmdir - remove directory - * @idmap: idmap of the mount the inode was found from - * @dir: inode of the parent directory - * @dentry: dentry of the child directory + * @idmap: idmap of the mount the inode was found from + * @dir: inode of the parent directory + * @dentry: dentry of the child directory + * @delegated_inode: returns parent inode, if it's delegated. * * Remove a directory. * @@ -4523,7 +5207,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode) * raw inode simply pass @nop_mnt_idmap. */ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry) + struct dentry *dentry, struct delegated_inode *delegated_inode) { int error = may_delete(idmap, dir, dentry, 1); @@ -4545,6 +5229,10 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir, if (error) goto out; + error = try_break_deleg(dir, delegated_inode); + if (error) + goto out; + error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -4571,6 +5259,7 @@ int do_rmdir(int dfd, struct filename *name) struct qstr last; int type; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) @@ -4592,22 +5281,26 @@ retry: if (error) goto exit2; - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry = start_dirop(path.dentry, &last, lookup_flags); error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto exit3; error = security_path_rmdir(&path, dentry); if (error) goto exit4; - error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry); + error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); exit4: - dput(dentry); + end_dirop(dentry); exit3: - inode_unlock(path.dentry->d_inode); mnt_drop_write(path.mnt); exit2: path_put(&path); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; @@ -4648,7 +5341,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, struct inode **delegated_inode) + struct dentry *dentry, struct delegated_inode *delegated_inode) { struct inode *target = dentry->d_inode; int error = may_delete(idmap, dir, dentry, 0); @@ -4667,6 +5360,9 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir, else { error = security_inode_unlink(dir, dentry); if (!error) { + error = try_break_deleg(dir, delegated_inode); + if (error) + goto out; error = try_break_deleg(target, delegated_inode); if (error) goto out; @@ -4705,67 +5401,62 @@ int do_unlinkat(int dfd, struct filename *name) struct path path; struct qstr last; int type; - struct inode *inode = NULL; - struct inode *delegated_inode = NULL; + struct inode *inode; + struct delegated_inode delegated_inode = { }; unsigned int lookup_flags = 0; retry: error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type); if (error) - goto exit1; + goto exit_putname; error = -EISDIR; if (type != LAST_NORM) - goto exit2; + goto exit_path_put; error = mnt_want_write(path.mnt); if (error) - goto exit2; + goto exit_path_put; retry_deleg: - inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT); - dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags); + dentry = start_dirop(path.dentry, &last, lookup_flags); error = PTR_ERR(dentry); - if (!IS_ERR(dentry)) { + if (IS_ERR(dentry)) + goto exit_drop_write; - /* Why not before? Because we want correct error value */ - if (last.name[last.len]) - goto slashes; - inode = dentry->d_inode; - ihold(inode); - error = security_path_unlink(&path, dentry); - if (error) - goto exit3; - error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, &delegated_inode); -exit3: - dput(dentry); + /* Why not before? Because we want correct error value */ + if (unlikely(last.name[last.len])) { + if (d_is_dir(dentry)) + error = -EISDIR; + else + error = -ENOTDIR; + end_dirop(dentry); + goto exit_drop_write; } - inode_unlock(path.dentry->d_inode); - if (inode) - iput(inode); /* truncate the inode here */ - inode = NULL; - if (delegated_inode) { + inode = dentry->d_inode; + ihold(inode); + error = security_path_unlink(&path, dentry); + if (error) + goto exit_end_dirop; + error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode, + dentry, &delegated_inode); +exit_end_dirop: + end_dirop(dentry); + iput(inode); /* truncate the inode here */ + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; } +exit_drop_write: mnt_drop_write(path.mnt); -exit2: +exit_path_put: path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; - inode = NULL; goto retry; } -exit1: +exit_putname: putname(name); return error; - -slashes: - if (d_is_dir(dentry)) - error = -EISDIR; - else - error = -ENOTDIR; - goto exit3; } SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) @@ -4789,6 +5480,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * @dir: inode of the parent directory * @dentry: dentry of the child symlink file * @oldname: name of the file to link to + * @delegated_inode: returns victim inode, if the inode is delegated. * * Create a symlink. * @@ -4799,7 +5491,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname) * raw inode simply pass @nop_mnt_idmap. */ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, - struct dentry *dentry, const char *oldname) + struct dentry *dentry, const char *oldname, + struct delegated_inode *delegated_inode) { int error; @@ -4814,6 +5507,10 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir, if (error) return error; + error = try_break_deleg(dir, delegated_inode); + if (error) + return error; + error = dir->i_op->symlink(idmap, dir, dentry, oldname); if (!error) fsnotify_create(dir, dentry); @@ -4827,6 +5524,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to) struct dentry *dentry; struct path path; unsigned int lookup_flags = 0; + struct delegated_inode delegated_inode = { }; if (IS_ERR(from)) { error = PTR_ERR(from); @@ -4841,8 +5539,13 @@ retry: error = security_path_symlink(&path, dentry, from->name); if (!error) error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode, - dentry, from->name); + dentry, from->name, &delegated_inode); end_creating_path(&path, dentry); + if (is_delegated(&delegated_inode)) { + error = break_deleg_wait(&delegated_inode); + if (!error) + goto retry; + } if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; goto retry; @@ -4892,7 +5595,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn */ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, struct inode *dir, struct dentry *new_dentry, - struct inode **delegated_inode) + struct delegated_inode *delegated_inode) { struct inode *inode = old_dentry->d_inode; unsigned max_links = dir->i_sb->s_max_links; @@ -4931,19 +5634,21 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap, inode_lock(inode); /* Make sure we don't allow creating hardlink to an unlinked file */ - if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) + if (inode->i_nlink == 0 && !(inode_state_read_once(inode) & I_LINKABLE)) error = -ENOENT; else if (max_links && inode->i_nlink >= max_links) error = -EMLINK; else { - error = try_break_deleg(inode, delegated_inode); + error = try_break_deleg(dir, delegated_inode); + if (!error) + error = try_break_deleg(inode, delegated_inode); if (!error) error = dir->i_op->link(old_dentry, dir, new_dentry); } - if (!error && (inode->i_state & I_LINKABLE)) { + if (!error && (inode_state_read_once(inode) & I_LINKABLE)) { spin_lock(&inode->i_lock); - inode->i_state &= ~I_LINKABLE; + inode_state_clear(inode, I_LINKABLE); spin_unlock(&inode->i_lock); } inode_unlock(inode); @@ -4968,7 +5673,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd, struct mnt_idmap *idmap; struct dentry *new_dentry; struct path old_path, new_path; - struct inode *delegated_inode = NULL; + struct delegated_inode delegated_inode = { }; int how = 0; int error; @@ -5012,7 +5717,7 @@ retry: new_dentry, &delegated_inode); out_dput: end_creating_path(&new_path, new_dentry); - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) { path_put(&old_path); @@ -5098,7 +5803,7 @@ int vfs_rename(struct renamedata *rd) struct inode *new_dir = d_inode(rd->new_parent); struct dentry *old_dentry = rd->old_dentry; struct dentry *new_dentry = rd->new_dentry; - struct inode **delegated_inode = rd->delegated_inode; + struct delegated_inode *delegated_inode = rd->delegated_inode; unsigned int flags = rd->flags; bool is_dir = d_is_dir(old_dentry); struct inode *source = old_dentry->d_inode; @@ -5203,6 +5908,14 @@ int vfs_rename(struct renamedata *rd) old_dir->i_nlink >= max_links) goto out; } + error = try_break_deleg(old_dir, delegated_inode); + if (error) + goto out; + if (new_dir != old_dir) { + error = try_break_deleg(new_dir, delegated_inode); + if (error) + goto out; + } if (!is_dir) { error = try_break_deleg(source, delegated_inode); if (error) @@ -5256,14 +5969,11 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, struct filename *to, unsigned int flags) { struct renamedata rd; - struct dentry *old_dentry, *new_dentry; - struct dentry *trap; struct path old_path, new_path; struct qstr old_last, new_last; int old_type, new_type; - struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0, target_flags = - LOOKUP_RENAME_TARGET | LOOKUP_CREATE; + struct delegated_inode delegated_inode = { }; + unsigned int lookup_flags = 0; bool should_retry = false; int error = -EINVAL; @@ -5274,11 +5984,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd, (flags & RENAME_EXCHANGE)) goto put_names; - if (flags & RENAME_EXCHANGE) - target_flags = 0; - if (flags & RENAME_NOREPLACE) - target_flags |= LOOKUP_EXCL; - retry: error = filename_parentat(olddfd, from, lookup_flags, &old_path, &old_last, &old_type); @@ -5308,68 +6013,42 @@ retry: goto exit2; retry_deleg: - trap = lock_rename(new_path.dentry, old_path.dentry); - if (IS_ERR(trap)) { - error = PTR_ERR(trap); + rd.old_parent = old_path.dentry; + rd.mnt_idmap = mnt_idmap(old_path.mnt); + rd.new_parent = new_path.dentry; + rd.delegated_inode = &delegated_inode; + rd.flags = flags; + + error = __start_renaming(&rd, lookup_flags, &old_last, &new_last); + if (error) goto exit_lock_rename; - } - old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry, - lookup_flags); - error = PTR_ERR(old_dentry); - if (IS_ERR(old_dentry)) - goto exit3; - new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry, - lookup_flags | target_flags); - error = PTR_ERR(new_dentry); - if (IS_ERR(new_dentry)) - goto exit4; if (flags & RENAME_EXCHANGE) { - if (!d_is_dir(new_dentry)) { + if (!d_is_dir(rd.new_dentry)) { error = -ENOTDIR; if (new_last.name[new_last.len]) - goto exit5; + goto exit_unlock; } } /* unless the source is a directory trailing slashes give -ENOTDIR */ - if (!d_is_dir(old_dentry)) { + if (!d_is_dir(rd.old_dentry)) { error = -ENOTDIR; if (old_last.name[old_last.len]) - goto exit5; + goto exit_unlock; if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len]) - goto exit5; + goto exit_unlock; } - /* source should not be ancestor of target */ - error = -EINVAL; - if (old_dentry == trap) - goto exit5; - /* target should not be an ancestor of source */ - if (!(flags & RENAME_EXCHANGE)) - error = -ENOTEMPTY; - if (new_dentry == trap) - goto exit5; - error = security_path_rename(&old_path, old_dentry, - &new_path, new_dentry, flags); + error = security_path_rename(&old_path, rd.old_dentry, + &new_path, rd.new_dentry, flags); if (error) - goto exit5; + goto exit_unlock; - rd.old_parent = old_path.dentry; - rd.old_dentry = old_dentry; - rd.mnt_idmap = mnt_idmap(old_path.mnt); - rd.new_parent = new_path.dentry; - rd.new_dentry = new_dentry; - rd.delegated_inode = &delegated_inode; - rd.flags = flags; error = vfs_rename(&rd); -exit5: - dput(new_dentry); -exit4: - dput(old_dentry); -exit3: - unlock_rename(new_path.dentry, old_path.dentry); +exit_unlock: + end_renaming(&rd); exit_lock_rename: - if (delegated_inode) { + if (is_delegated(&delegated_inode)) { error = break_deleg_wait(&delegated_inode); if (!error) goto retry_deleg; |
