summaryrefslogtreecommitdiff
path: root/fs/namei.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/namei.c')
-rw-r--r--fs/namei.c1059
1 files changed, 869 insertions, 190 deletions
diff --git a/fs/namei.c b/fs/namei.c
index 7377020a2cba..bf0f66f0e9b9 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -282,7 +282,7 @@ void putname(struct filename *name)
return;
refcnt = atomic_read(&name->refcnt);
- if (refcnt != 1) {
+ if (unlikely(refcnt != 1)) {
if (WARN_ON_ONCE(!refcnt))
return;
@@ -290,7 +290,7 @@ void putname(struct filename *name)
return;
}
- if (name->name != name->iname) {
+ if (unlikely(name->name != name->iname)) {
__putname(name->name);
kfree(name);
} else
@@ -540,10 +540,13 @@ static inline int do_inode_permission(struct mnt_idmap *idmap,
* @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
*
* Separate out file-system wide checks from inode-specific permission checks.
+ *
+ * Note: lookup_inode_permission_may_exec() does not call here. If you add
+ * MAY_EXEC checks, adjust it.
*/
static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
{
- if (unlikely(mask & MAY_WRITE)) {
+ if (mask & MAY_WRITE) {
umode_t mode = inode->i_mode;
/* Nobody gets write access to a read-only fs. */
@@ -574,7 +577,7 @@ int inode_permission(struct mnt_idmap *idmap,
if (unlikely(retval))
return retval;
- if (unlikely(mask & MAY_WRITE)) {
+ if (mask & MAY_WRITE) {
/*
* Nobody gets write access to an immutable file.
*/
@@ -602,6 +605,42 @@ int inode_permission(struct mnt_idmap *idmap,
}
EXPORT_SYMBOL(inode_permission);
+/*
+ * lookup_inode_permission_may_exec - Check traversal right for given inode
+ *
+ * This is a special case routine for may_lookup() making assumptions specific
+ * to path traversal. Use inode_permission() if you are doing something else.
+ *
+ * Work is shaved off compared to inode_permission() as follows:
+ * - we know for a fact there is no MAY_WRITE to worry about
+ * - it is an invariant the inode is a directory
+ *
+ * Since majority of real-world traversal happens on inodes which grant it for
+ * everyone, we check it upfront and only resort to more expensive work if it
+ * fails.
+ *
+ * Filesystems which have their own ->permission hook and consequently miss out
+ * on IOP_FASTPERM can still get the optimization if they set IOP_FASTPERM_MAY_EXEC
+ * on their directory inodes.
+ */
+static __always_inline int lookup_inode_permission_may_exec(struct mnt_idmap *idmap,
+ struct inode *inode, int mask)
+{
+ /* Lookup already checked this to return -ENOTDIR */
+ VFS_BUG_ON_INODE(!S_ISDIR(inode->i_mode), inode);
+ VFS_BUG_ON((mask & ~MAY_NOT_BLOCK) != 0);
+
+ mask |= MAY_EXEC;
+
+ if (unlikely(!(inode->i_opflags & (IOP_FASTPERM | IOP_FASTPERM_MAY_EXEC))))
+ return inode_permission(idmap, inode, mask);
+
+ if (unlikely(((inode->i_mode & 0111) != 0111) || !no_acl_inode(inode)))
+ return inode_permission(idmap, inode, mask);
+
+ return security_inode_permission(inode, mask);
+}
+
/**
* path_get - get a reference to a path
* @path: path to get the reference to
@@ -746,7 +785,8 @@ static void leave_rcu(struct nameidata *nd)
static void terminate_walk(struct nameidata *nd)
{
- drop_links(nd);
+ if (unlikely(nd->depth))
+ drop_links(nd);
if (!(nd->flags & LOOKUP_RCU)) {
int i;
path_put(&nd->path);
@@ -843,7 +883,7 @@ static bool try_to_unlazy(struct nameidata *nd)
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (unlikely(!legitimize_links(nd)))
+ if (unlikely(nd->depth && !legitimize_links(nd)))
goto out1;
if (unlikely(!legitimize_path(nd, &nd->path, nd->seq)))
goto out;
@@ -878,7 +918,7 @@ static bool try_to_unlazy_next(struct nameidata *nd, struct dentry *dentry)
int res;
BUG_ON(!(nd->flags & LOOKUP_RCU));
- if (unlikely(!legitimize_links(nd)))
+ if (unlikely(nd->depth && !legitimize_links(nd)))
goto out2;
res = __legitimize_mnt(nd->path.mnt, nd->m_seq);
if (unlikely(res)) {
@@ -951,8 +991,8 @@ static int complete_walk(struct nameidata *nd)
* We don't want to zero nd->root for scoped-lookups or
* externally-managed nd->root.
*/
- if (!(nd->state & ND_ROOT_PRESET))
- if (!(nd->flags & LOOKUP_IS_SCOPED))
+ if (likely(!(nd->state & ND_ROOT_PRESET)))
+ if (likely(!(nd->flags & LOOKUP_IS_SCOPED)))
nd->root.mnt = NULL;
nd->flags &= ~LOOKUP_CACHED;
if (!try_to_unlazy(nd))
@@ -1034,7 +1074,7 @@ static int nd_jump_root(struct nameidata *nd)
}
if (!nd->root.mnt) {
int error = set_root(nd);
- if (error)
+ if (unlikely(error))
return error;
}
if (nd->flags & LOOKUP_RCU) {
@@ -1632,13 +1672,15 @@ static inline int handle_mounts(struct nameidata *nd, struct dentry *dentry,
path->dentry = dentry;
if (nd->flags & LOOKUP_RCU) {
unsigned int seq = nd->next_seq;
+ if (likely(!d_managed(dentry)))
+ return 0;
if (likely(__follow_mount_rcu(nd, path)))
return 0;
// *path and nd->next_seq might've been clobbered
path->mnt = nd->path.mnt;
path->dentry = dentry;
nd->next_seq = seq;
- if (!try_to_unlazy_next(nd, dentry))
+ if (unlikely(!try_to_unlazy_next(nd, dentry)))
return -ECHILD;
}
ret = traverse_mounts(path, &jumped, &nd->total_link_count, nd->flags);
@@ -1823,7 +1865,7 @@ again:
return dentry;
}
-static struct dentry *lookup_slow(const struct qstr *name,
+static noinline struct dentry *lookup_slow(const struct qstr *name,
struct dentry *dir,
unsigned int flags)
{
@@ -1855,7 +1897,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
int err, mask;
mask = nd->flags & LOOKUP_RCU ? MAY_NOT_BLOCK : 0;
- err = inode_permission(idmap, nd->inode, mask | MAY_EXEC);
+ err = lookup_inode_permission_may_exec(idmap, nd->inode, mask);
if (likely(!err))
return 0;
@@ -1870,7 +1912,7 @@ static inline int may_lookup(struct mnt_idmap *idmap,
if (err != -ECHILD) // hard error
return err;
- return inode_permission(idmap, nd->inode, MAY_EXEC);
+ return lookup_inode_permission_may_exec(idmap, nd->inode, 0);
}
static int reserve_stack(struct nameidata *nd, struct path *link)
@@ -1901,13 +1943,23 @@ static int reserve_stack(struct nameidata *nd, struct path *link)
enum {WALK_TRAILING = 1, WALK_MORE = 2, WALK_NOFOLLOW = 4};
-static const char *pick_link(struct nameidata *nd, struct path *link,
+static noinline const char *pick_link(struct nameidata *nd, struct path *link,
struct inode *inode, int flags)
{
struct saved *last;
const char *res;
- int error = reserve_stack(nd, link);
+ int error;
+ if (nd->flags & LOOKUP_RCU) {
+ /* make sure that d_is_symlink from step_into_slowpath() matches the inode */
+ if (read_seqcount_retry(&link->dentry->d_seq, nd->next_seq))
+ return ERR_PTR(-ECHILD);
+ } else {
+ if (link->mnt == nd->path.mnt)
+ mntget(link->mnt);
+ }
+
+ error = reserve_stack(nd, link);
if (unlikely(error)) {
if (!(nd->flags & LOOKUP_RCU))
path_put(link);
@@ -1981,14 +2033,15 @@ all_done: // pure jump
*
* NOTE: dentry must be what nd->next_seq had been sampled from.
*/
-static const char *step_into(struct nameidata *nd, int flags,
+static noinline const char *step_into_slowpath(struct nameidata *nd, int flags,
struct dentry *dentry)
{
struct path path;
struct inode *inode;
- int err = handle_mounts(nd, dentry, &path);
+ int err;
- if (err < 0)
+ err = handle_mounts(nd, dentry, &path);
+ if (unlikely(err < 0))
return ERR_PTR(err);
inode = path.dentry->d_inode;
if (likely(!d_is_symlink(path.dentry)) ||
@@ -2010,15 +2063,32 @@ static const char *step_into(struct nameidata *nd, int flags,
nd->seq = nd->next_seq;
return NULL;
}
- if (nd->flags & LOOKUP_RCU) {
- /* make sure that d_is_symlink above matches inode */
- if (read_seqcount_retry(&path.dentry->d_seq, nd->next_seq))
+ return pick_link(nd, &path, inode, flags);
+}
+
+static __always_inline const char *step_into(struct nameidata *nd, int flags,
+ struct dentry *dentry)
+{
+ /*
+ * In the common case we are in rcu-walk and traversing over a non-mounted on
+ * directory (as opposed to e.g., a symlink).
+ *
+ * We can handle that and negative entries with the checks below.
+ */
+ if (likely((nd->flags & LOOKUP_RCU) &&
+ !d_managed(dentry) && !d_is_symlink(dentry))) {
+ struct inode *inode = dentry->d_inode;
+ if (read_seqcount_retry(&dentry->d_seq, nd->next_seq))
return ERR_PTR(-ECHILD);
- } else {
- if (path.mnt == nd->path.mnt)
- mntget(path.mnt);
+ if (unlikely(!inode))
+ return ERR_PTR(-ENOENT);
+ nd->path.dentry = dentry;
+ /* nd->path.mnt is retained on purpose */
+ nd->inode = inode;
+ nd->seq = nd->next_seq;
+ return NULL;
}
- return pick_link(nd, &path, inode, flags);
+ return step_into_slowpath(nd, flags, dentry);
}
static struct dentry *follow_dotdot_rcu(struct nameidata *nd)
@@ -2101,7 +2171,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
if (!nd->root.mnt) {
error = ERR_PTR(set_root(nd));
- if (error)
+ if (unlikely(error))
return error;
}
if (nd->flags & LOOKUP_RCU)
@@ -2131,7 +2201,7 @@ static const char *handle_dots(struct nameidata *nd, int type)
return NULL;
}
-static const char *walk_component(struct nameidata *nd, int flags)
+static __always_inline const char *walk_component(struct nameidata *nd, int flags)
{
struct dentry *dentry;
/*
@@ -2140,7 +2210,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
* parent relationships.
*/
if (unlikely(nd->last_type != LAST_NORM)) {
- if (!(flags & WALK_MORE) && nd->depth)
+ if (unlikely(nd->depth) && !(flags & WALK_MORE))
put_link(nd);
return handle_dots(nd, nd->last_type);
}
@@ -2152,7 +2222,7 @@ static const char *walk_component(struct nameidata *nd, int flags)
if (IS_ERR(dentry))
return ERR_CAST(dentry);
}
- if (!(flags & WALK_MORE) && nd->depth)
+ if (unlikely(nd->depth) && !(flags & WALK_MORE))
put_link(nd);
return step_into(nd, flags, dentry);
}
@@ -2505,7 +2575,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
if (unlikely(!*name)) {
OK:
/* pathname or trailing symlink, done */
- if (!depth) {
+ if (likely(!depth)) {
nd->dir_vfsuid = i_uid_into_vfsuid(idmap, nd->inode);
nd->dir_mode = nd->inode->i_mode;
nd->flags &= ~LOOKUP_PARENT;
@@ -2543,10 +2613,10 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
const char *s = nd->pathname;
/* LOOKUP_CACHED requires RCU, ask caller to retry */
- if ((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED)
+ if (unlikely((flags & (LOOKUP_RCU | LOOKUP_CACHED)) == LOOKUP_CACHED))
return ERR_PTR(-EAGAIN);
- if (!*s)
+ if (unlikely(!*s))
flags &= ~LOOKUP_RCU;
if (flags & LOOKUP_RCU)
rcu_read_lock();
@@ -2560,7 +2630,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->r_seq = __read_seqcount_begin(&rename_lock.seqcount);
smp_rmb();
- if (nd->state & ND_ROOT_PRESET) {
+ if (unlikely(nd->state & ND_ROOT_PRESET)) {
struct dentry *root = nd->root.dentry;
struct inode *inode = root->d_inode;
if (*s && unlikely(!d_can_lookup(root)))
@@ -2579,7 +2649,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
nd->root.mnt = NULL;
/* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
- if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
+ if (*s == '/' && likely(!(flags & LOOKUP_IN_ROOT))) {
error = nd_jump_root(nd);
if (unlikely(error))
return ERR_PTR(error);
@@ -2632,7 +2702,7 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
}
/* For scoped-lookups we need to set the root to the dirfd as well. */
- if (flags & LOOKUP_IS_SCOPED) {
+ if (unlikely(flags & LOOKUP_IS_SCOPED)) {
nd->root = nd->path;
if (flags & LOOKUP_RCU) {
nd->root_seq = nd->seq;
@@ -2765,6 +2835,62 @@ static int filename_parentat(int dfd, struct filename *name,
return __filename_parentat(dfd, name, flags, parent, last, type, NULL);
}
+/**
+ * start_dirop - begin a create or remove dirop, performing locking and lookup
+ * @parent: the dentry of the parent in which the operation will occur
+ * @name: a qstr holding the name within that parent
+ * @lookup_flags: intent and other lookup flags.
+ *
+ * The lookup is performed and necessary locks are taken so that, on success,
+ * the returned dentry can be operated on safely.
+ * The qstr must already have the hash value calculated.
+ *
+ * Returns: a locked dentry, or an error.
+ *
+ */
+static struct dentry *__start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags,
+ unsigned int state)
+{
+ struct dentry *dentry;
+ struct inode *dir = d_inode(parent);
+
+ if (state == TASK_KILLABLE) {
+ int ret = down_write_killable_nested(&dir->i_rwsem,
+ I_MUTEX_PARENT);
+ if (ret)
+ return ERR_PTR(ret);
+ } else {
+ inode_lock_nested(dir, I_MUTEX_PARENT);
+ }
+ dentry = lookup_one_qstr_excl(name, parent, lookup_flags);
+ if (IS_ERR(dentry))
+ inode_unlock(dir);
+ return dentry;
+}
+
+struct dentry *start_dirop(struct dentry *parent, struct qstr *name,
+ unsigned int lookup_flags)
+{
+ return __start_dirop(parent, name, lookup_flags, TASK_NORMAL);
+}
+
+/**
+ * end_dirop - signal completion of a dirop
+ * @de: the dentry which was returned by start_dirop or similar.
+ *
+ * If the de is an error, nothing happens. Otherwise any lock taken to
+ * protect the dentry is dropped and the dentry itself is release (dput()).
+ */
+void end_dirop(struct dentry *de)
+{
+ if (!IS_ERR(de)) {
+ inode_unlock(de->d_parent->d_inode);
+ dput(de);
+ }
+}
+EXPORT_SYMBOL(end_dirop);
+
/* does lookup, returns the object with parent locked */
static struct dentry *__start_removing_path(int dfd, struct filename *name,
struct path *path)
@@ -2781,10 +2907,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
return ERR_PTR(-EINVAL);
/* don't fail immediately if it's r/o, at least try to report other errors */
error = mnt_want_write(parent_path.mnt);
- inode_lock_nested(parent_path.dentry->d_inode, I_MUTEX_PARENT);
- d = lookup_one_qstr_excl(&last, parent_path.dentry, 0);
+ d = start_dirop(parent_path.dentry, &last, 0);
if (IS_ERR(d))
- goto unlock;
+ goto drop;
if (error)
goto fail;
path->dentry = no_free_ptr(parent_path.dentry);
@@ -2792,10 +2917,9 @@ static struct dentry *__start_removing_path(int dfd, struct filename *name,
return d;
fail:
- dput(d);
+ end_dirop(d);
d = ERR_PTR(error);
-unlock:
- inode_unlock(parent_path.dentry->d_inode);
+drop:
if (!error)
mnt_drop_write(parent_path.mnt);
return d;
@@ -2910,7 +3034,7 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
}
EXPORT_SYMBOL(vfs_path_lookup);
-static int lookup_noperm_common(struct qstr *qname, struct dentry *base)
+int lookup_noperm_common(struct qstr *qname, struct dentry *base)
{
const char *name = qname->name;
u32 len = qname->len;
@@ -3181,6 +3305,234 @@ struct dentry *lookup_noperm_positive_unlocked(struct qstr *name,
}
EXPORT_SYMBOL(lookup_noperm_positive_unlocked);
+/**
+ * start_creating - prepare to create a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup is performed prior to creating
+ * an object in a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name already exists, a positive dentry is returned, so
+ * behaviour is similar to O_CREAT without O_EXCL, which doesn't fail
+ * with -EEXIST.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating);
+
+/**
+ * start_removing - prepare to remove a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing(struct mnt_idmap *idmap, struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing);
+
+/**
+ * start_creating_killable - prepare to create a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return __start_dirop(parent, name, LOOKUP_CREATE, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_creating_killable);
+
+/**
+ * start_removing_killable - prepare to remove a given name with permission checking
+ * @idmap: idmap of the mount
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory. Permission checking (MAY_EXEC) is performed
+ * against @idmap.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * If a signal is received or was already pending, the function aborts
+ * with -EINTR;
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_killable(struct mnt_idmap *idmap,
+ struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_one_common(idmap, name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return __start_dirop(parent, name, 0, TASK_KILLABLE);
+}
+EXPORT_SYMBOL(start_removing_killable);
+
+/**
+ * start_creating_noperm - prepare to create a given name without permission checking
+ * @parent: directory in which to prepare to create the name
+ * @name: the name to be created
+ *
+ * Locks are taken and a lookup in performed prior to creating
+ * an object in a directory.
+ *
+ * If the name already exists, a positive dentry is returned.
+ *
+ * Returns: a negative or positive dentry, or an error.
+ */
+struct dentry *start_creating_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, LOOKUP_CREATE);
+}
+EXPORT_SYMBOL(start_creating_noperm);
+
+/**
+ * start_removing_noperm - prepare to remove a given name without permission checking
+ * @parent: directory in which to find the name
+ * @name: the name to be removed
+ *
+ * Locks are taken and a lookup in performed prior to removing
+ * an object from a directory.
+ *
+ * If the name doesn't exist, an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: a positive dentry, or an error.
+ */
+struct dentry *start_removing_noperm(struct dentry *parent,
+ struct qstr *name)
+{
+ int err = lookup_noperm_common(name, parent);
+
+ if (err)
+ return ERR_PTR(err);
+ return start_dirop(parent, name, 0);
+}
+EXPORT_SYMBOL(start_removing_noperm);
+
+/**
+ * start_creating_dentry - prepare to create a given dentry
+ * @parent: directory from which dentry should be removed
+ * @child: the dentry to be removed
+ *
+ * A lock is taken to protect the dentry again other dirops and
+ * the validity of the dentry is checked: correct parent and still hashed.
+ *
+ * If the dentry is valid and negative a reference is taken and
+ * returned. If not an error is returned.
+ *
+ * end_creating() should be called when creation is complete, or aborted.
+ *
+ * Returns: the valid dentry, or an error.
+ */
+struct dentry *start_creating_dentry(struct dentry *parent,
+ struct dentry *child)
+{
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ if (unlikely(IS_DEADDIR(parent->d_inode) ||
+ child->d_parent != parent ||
+ d_unhashed(child))) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EINVAL);
+ }
+ if (d_is_positive(child)) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EEXIST);
+ }
+ return dget(child);
+}
+EXPORT_SYMBOL(start_creating_dentry);
+
+/**
+ * start_removing_dentry - prepare to remove a given dentry
+ * @parent: directory from which dentry should be removed
+ * @child: the dentry to be removed
+ *
+ * A lock is taken to protect the dentry again other dirops and
+ * the validity of the dentry is checked: correct parent and still hashed.
+ *
+ * If the dentry is valid and positive, a reference is taken and
+ * returned. If not an error is returned.
+ *
+ * end_removing() should be called when removal is complete, or aborted.
+ *
+ * Returns: the valid dentry, or an error.
+ */
+struct dentry *start_removing_dentry(struct dentry *parent,
+ struct dentry *child)
+{
+ inode_lock_nested(parent->d_inode, I_MUTEX_PARENT);
+ if (unlikely(IS_DEADDIR(parent->d_inode) ||
+ child->d_parent != parent ||
+ d_unhashed(child))) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-EINVAL);
+ }
+ if (d_is_negative(child)) {
+ inode_unlock(parent->d_inode);
+ return ERR_PTR(-ENOENT);
+ }
+ return dget(child);
+}
+EXPORT_SYMBOL(start_removing_dentry);
+
#ifdef CONFIG_UNIX98_PTYS
int path_pts(struct path *path)
{
@@ -3419,6 +3771,290 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
EXPORT_SYMBOL(unlock_rename);
/**
+ * __start_renaming - lookup and lock names for rename
+ * @rd: rename data containing parents and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_last: name of object in @rd.old_parent
+ * @new_last: name of object in @rd.new_parent
+ *
+ * Look up two names and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentries are stored in @rd.old_dentry,
+ * @rd.new_dentry and an extra ref is taken on @rd.old_parent.
+ * These references and the lock are dropped by end_renaming().
+ *
+ * The passed in qstrs must have the hash calculated, and no permission
+ * checking is performed.
+ *
+ * Returns: zero or an error.
+ */
+static int
+__start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last)
+{
+ struct dentry *trap;
+ struct dentry *d1, *d2;
+ int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ int err;
+
+ if (rd->flags & RENAME_EXCHANGE)
+ target_flags = 0;
+ if (rd->flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
+
+ trap = lock_rename(rd->old_parent, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+
+ d1 = lookup_one_qstr_excl(old_last, rd->old_parent,
+ lookup_flags);
+ err = PTR_ERR(d1);
+ if (IS_ERR(d1))
+ goto out_unlock;
+
+ d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
+ lookup_flags | target_flags);
+ err = PTR_ERR(d2);
+ if (IS_ERR(d2))
+ goto out_dput_d1;
+
+ if (d1 == trap) {
+ /* source is an ancestor of target */
+ err = -EINVAL;
+ goto out_dput_d2;
+ }
+
+ if (d2 == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_dput_d2;
+ }
+
+ rd->old_dentry = d1;
+ rd->new_dentry = d2;
+ dget(rd->old_parent);
+ return 0;
+
+out_dput_d2:
+ dput(d2);
+out_dput_d1:
+ dput(d1);
+out_unlock:
+ unlock_rename(rd->old_parent, rd->new_parent);
+ return err;
+}
+
+/**
+ * start_renaming - lookup and lock names for rename with permission checking
+ * @rd: rename data containing parents and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_last: name of object in @rd.old_parent
+ * @new_last: name of object in @rd.new_parent
+ *
+ * Look up two names and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentries are stored in @rd.old_dentry,
+ * @rd.new_dentry. Also the refcount on @rd->old_parent is increased.
+ * These references and the lock are dropped by end_renaming().
+ *
+ * The passed in qstrs need not have the hash calculated, and basic
+ * eXecute permission checking is performed against @rd.mnt_idmap.
+ *
+ * Returns: zero or an error.
+ */
+int start_renaming(struct renamedata *rd, int lookup_flags,
+ struct qstr *old_last, struct qstr *new_last)
+{
+ int err;
+
+ err = lookup_one_common(rd->mnt_idmap, old_last, rd->old_parent);
+ if (err)
+ return err;
+ err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ if (err)
+ return err;
+ return __start_renaming(rd, lookup_flags, old_last, new_last);
+}
+EXPORT_SYMBOL(start_renaming);
+
+static int
+__start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last)
+{
+ struct dentry *trap;
+ struct dentry *d2;
+ int target_flags = LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ int err;
+
+ if (rd->flags & RENAME_EXCHANGE)
+ target_flags = 0;
+ if (rd->flags & RENAME_NOREPLACE)
+ target_flags |= LOOKUP_EXCL;
+
+ /* Already have the dentry - need to be sure to lock the correct parent */
+ trap = lock_rename_child(old_dentry, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ if (d_unhashed(old_dentry) ||
+ (rd->old_parent && rd->old_parent != old_dentry->d_parent)) {
+ /* dentry was removed, or moved and explicit parent requested */
+ err = -EINVAL;
+ goto out_unlock;
+ }
+
+ d2 = lookup_one_qstr_excl(new_last, rd->new_parent,
+ lookup_flags | target_flags);
+ err = PTR_ERR(d2);
+ if (IS_ERR(d2))
+ goto out_unlock;
+
+ if (old_dentry == trap) {
+ /* source is an ancestor of target */
+ err = -EINVAL;
+ goto out_dput_d2;
+ }
+
+ if (d2 == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_dput_d2;
+ }
+
+ rd->old_dentry = dget(old_dentry);
+ rd->new_dentry = d2;
+ rd->old_parent = dget(old_dentry->d_parent);
+ return 0;
+
+out_dput_d2:
+ dput(d2);
+out_unlock:
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
+ return err;
+}
+
+/**
+ * start_renaming_dentry - lookup and lock name for rename with permission checking
+ * @rd: rename data containing parents and flags, and
+ * for receiving found dentries
+ * @lookup_flags: extra flags to pass to ->lookup (e.g. LOOKUP_REVAL,
+ * LOOKUP_NO_SYMLINKS etc).
+ * @old_dentry: dentry of name to move
+ * @new_last: name of target in @rd.new_parent
+ *
+ * Look up target name and ensure locks are in place for
+ * rename.
+ *
+ * On success the found dentry is stored in @rd.new_dentry and
+ * @rd.old_parent is confirmed to be the parent of @old_dentry. If it
+ * was originally %NULL, it is set. In either case a reference is taken
+ * so that end_renaming() can have a stable reference to unlock.
+ *
+ * References and the lock can be dropped with end_renaming()
+ *
+ * The passed in qstr need not have the hash calculated, and basic
+ * eXecute permission checking is performed against @rd.mnt_idmap.
+ *
+ * Returns: zero or an error.
+ */
+int start_renaming_dentry(struct renamedata *rd, int lookup_flags,
+ struct dentry *old_dentry, struct qstr *new_last)
+{
+ int err;
+
+ err = lookup_one_common(rd->mnt_idmap, new_last, rd->new_parent);
+ if (err)
+ return err;
+ return __start_renaming_dentry(rd, lookup_flags, old_dentry, new_last);
+}
+EXPORT_SYMBOL(start_renaming_dentry);
+
+/**
+ * start_renaming_two_dentries - Lock to dentries in given parents for rename
+ * @rd: rename data containing parent
+ * @old_dentry: dentry of name to move
+ * @new_dentry: dentry to move to
+ *
+ * Ensure locks are in place for rename and check parentage is still correct.
+ *
+ * On success the two dentries are stored in @rd.old_dentry and
+ * @rd.new_dentry and @rd.old_parent and @rd.new_parent are confirmed to
+ * be the parents of the dentries.
+ *
+ * References and the lock can be dropped with end_renaming()
+ *
+ * Returns: zero or an error.
+ */
+int
+start_renaming_two_dentries(struct renamedata *rd,
+ struct dentry *old_dentry, struct dentry *new_dentry)
+{
+ struct dentry *trap;
+ int err;
+
+ /* Already have the dentry - need to be sure to lock the correct parent */
+ trap = lock_rename_child(old_dentry, rd->new_parent);
+ if (IS_ERR(trap))
+ return PTR_ERR(trap);
+ err = -EINVAL;
+ if (d_unhashed(old_dentry) ||
+ (rd->old_parent && rd->old_parent != old_dentry->d_parent))
+ /* old_dentry was removed, or moved and explicit parent requested */
+ goto out_unlock;
+ if (d_unhashed(new_dentry) ||
+ rd->new_parent != new_dentry->d_parent)
+ /* new_dentry was removed or moved */
+ goto out_unlock;
+
+ if (old_dentry == trap)
+ /* source is an ancestor of target */
+ goto out_unlock;
+
+ if (new_dentry == trap) {
+ /* target is an ancestor of source */
+ if (rd->flags & RENAME_EXCHANGE)
+ err = -EINVAL;
+ else
+ err = -ENOTEMPTY;
+ goto out_unlock;
+ }
+
+ err = -EEXIST;
+ if (d_is_positive(new_dentry) && (rd->flags & RENAME_NOREPLACE))
+ goto out_unlock;
+
+ rd->old_dentry = dget(old_dentry);
+ rd->new_dentry = dget(new_dentry);
+ rd->old_parent = dget(old_dentry->d_parent);
+ return 0;
+
+out_unlock:
+ unlock_rename(old_dentry->d_parent, rd->new_parent);
+ return err;
+}
+EXPORT_SYMBOL(start_renaming_two_dentries);
+
+void end_renaming(struct renamedata *rd)
+{
+ unlock_rename(rd->old_parent, rd->new_parent);
+ dput(rd->old_dentry);
+ dput(rd->new_dentry);
+ dput(rd->old_parent);
+}
+EXPORT_SYMBOL(end_renaming);
+
+/**
* vfs_prepare_mode - prepare the mode to be used for a new inode
* @idmap: idmap of the mount the inode was found from
* @dir: parent directory of the new inode
@@ -3461,10 +4097,9 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
/**
* vfs_create - create new file
* @idmap: idmap of the mount the inode was found from
- * @dir: inode of the parent directory
* @dentry: dentry of the child file
* @mode: mode of the child file
- * @want_excl: whether the file must not yet exist
+ * @di: returns parent inode, if the inode is delegated.
*
* Create a new file.
*
@@ -3474,9 +4109,10 @@ static inline umode_t vfs_prepare_mode(struct mnt_idmap *idmap,
* On non-idmapped mounts or if permission checking is to be performed on the
* raw inode simply pass @nop_mnt_idmap.
*/
-int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode, bool want_excl)
+int vfs_create(struct mnt_idmap *idmap, struct dentry *dentry, umode_t mode,
+ struct delegated_inode *di)
{
+ struct inode *dir = d_inode(dentry->d_parent);
int error;
error = may_create(idmap, dir, dentry);
@@ -3490,7 +4126,10 @@ int vfs_create(struct mnt_idmap *idmap, struct inode *dir,
error = security_inode_create(dir, dentry, mode);
if (error)
return error;
- error = dir->i_op->create(idmap, dir, dentry, mode, want_excl);
+ error = try_break_deleg(dir, di);
+ if (error)
+ return error;
+ error = dir->i_op->create(idmap, dir, dentry, mode, true);
if (!error)
fsnotify_create(dir, dentry);
return error;
@@ -3697,7 +4336,7 @@ static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
*/
static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
const struct open_flags *op,
- bool got_write)
+ bool got_write, struct delegated_inode *delegated_inode)
{
struct mnt_idmap *idmap;
struct dentry *dir = nd->path.dentry;
@@ -3786,6 +4425,11 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
/* Negative dentry, just create the file */
if (!dentry->d_inode && (open_flag & O_CREAT)) {
+ /* but break the directory lease first! */
+ error = try_break_deleg(dir_inode, delegated_inode);
+ if (error)
+ goto out_dput;
+
file->f_mode |= FMODE_CREATED;
audit_inode_child(dir_inode, dentry, AUDIT_TYPE_CHILD_CREATE);
if (!dir_inode->i_op->create) {
@@ -3848,6 +4492,7 @@ static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
+ struct delegated_inode delegated_inode = { };
struct dentry *dir = nd->path.dentry;
int open_flag = op->open_flag;
bool got_write = false;
@@ -3879,7 +4524,7 @@ static const char *open_last_lookups(struct nameidata *nd,
return ERR_PTR(-ECHILD);
}
}
-
+retry:
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
got_write = !mnt_want_write(nd->path.mnt);
/*
@@ -3892,7 +4537,7 @@ static const char *open_last_lookups(struct nameidata *nd,
inode_lock(dir->d_inode);
else
inode_lock_shared(dir->d_inode);
- dentry = lookup_open(nd, file, op, got_write);
+ dentry = lookup_open(nd, file, op, got_write, &delegated_inode);
if (!IS_ERR(dentry)) {
if (file->f_mode & FMODE_CREATED)
fsnotify_create(dir->d_inode, dentry);
@@ -3907,8 +4552,16 @@ static const char *open_last_lookups(struct nameidata *nd,
if (got_write)
mnt_drop_write(nd->path.mnt);
- if (IS_ERR(dentry))
+ if (IS_ERR(dentry)) {
+ if (is_delegated(&delegated_inode)) {
+ int error = break_deleg_wait(&delegated_inode);
+
+ if (!error)
+ goto retry;
+ return ERR_PTR(error);
+ }
return ERR_CAST(dentry);
+ }
if (file->f_mode & (FMODE_OPENED | FMODE_CREATED)) {
dput(nd->path.dentry);
@@ -4036,7 +4689,7 @@ int vfs_tmpfile(struct mnt_idmap *idmap,
inode = file_inode(file);
if (!(open_flag & O_EXCL)) {
spin_lock(&inode->i_lock);
- inode->i_state |= I_LINKABLE;
+ inode_state_set(inode, I_LINKABLE);
spin_unlock(&inode->i_lock);
}
security_inode_post_create_tmpfile(idmap, inode);
@@ -4223,21 +4876,18 @@ static struct dentry *filename_create(int dfd, struct filename *name,
*/
if (last.name[last.len] && !want_dir)
create_flags &= ~LOOKUP_CREATE;
- inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path->dentry,
- reval_flag | create_flags);
+ dentry = start_dirop(path->dentry, &last, reval_flag | create_flags);
if (IS_ERR(dentry))
- goto unlock;
+ goto out_drop_write;
if (unlikely(error))
goto fail;
return dentry;
fail:
- dput(dentry);
+ end_dirop(dentry);
dentry = ERR_PTR(error);
-unlock:
- inode_unlock(path->dentry->d_inode);
+out_drop_write:
if (!error)
mnt_drop_write(path->mnt);
out:
@@ -4256,11 +4906,20 @@ struct dentry *start_creating_path(int dfd, const char *pathname,
}
EXPORT_SYMBOL(start_creating_path);
+/**
+ * end_creating_path - finish a code section started by start_creating_path()
+ * @path: the path instantiated by start_creating_path()
+ * @dentry: the dentry returned by start_creating_path()
+ *
+ * end_creating_path() will unlock and locks taken by start_creating_path()
+ * and drop an references that were taken. It should only be called
+ * if start_creating_path() returned a non-error.
+ * If vfs_mkdir() was called and it returned an error, that error *should*
+ * be passed to end_creating_path() together with the path.
+ */
void end_creating_path(const struct path *path, struct dentry *dentry)
{
- if (!IS_ERR(dentry))
- dput(dentry);
- inode_unlock(path->dentry->d_inode);
+ end_creating(dentry);
mnt_drop_write(path->mnt);
path_put(path);
}
@@ -4278,13 +4937,15 @@ inline struct dentry *start_creating_user_path(
}
EXPORT_SYMBOL(start_creating_user_path);
+
/**
* vfs_mknod - create device node or file
- * @idmap: idmap of the mount the inode was found from
- * @dir: inode of the parent directory
- * @dentry: dentry of the child device node
- * @mode: mode of the child device node
- * @dev: device number of device to create
+ * @idmap: idmap of the mount the inode was found from
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child device node
+ * @mode: mode of the child device node
+ * @dev: device number of device to create
+ * @delegated_inode: returns parent inode, if the inode is delegated.
*
* Create a device node or file.
*
@@ -4295,7 +4956,8 @@ EXPORT_SYMBOL(start_creating_user_path);
* raw inode simply pass @nop_mnt_idmap.
*/
int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode, dev_t dev)
+ struct dentry *dentry, umode_t mode, dev_t dev,
+ struct delegated_inode *delegated_inode)
{
bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
int error = may_create(idmap, dir, dentry);
@@ -4319,6 +4981,10 @@ int vfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
if (error)
return error;
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ return error;
+
error = dir->i_op->mknod(idmap, dir, dentry, mode, dev);
if (!error)
fsnotify_create(dir, dentry);
@@ -4346,6 +5012,7 @@ static int may_mknod(umode_t mode)
static int do_mknodat(int dfd, struct filename *name, umode_t mode,
unsigned int dev)
{
+ struct delegated_inode di = { };
struct mnt_idmap *idmap;
struct dentry *dentry;
struct path path;
@@ -4369,22 +5036,26 @@ retry:
idmap = mnt_idmap(path.mnt);
switch (mode & S_IFMT) {
case 0: case S_IFREG:
- error = vfs_create(idmap, path.dentry->d_inode,
- dentry, mode, true);
+ error = vfs_create(idmap, dentry, mode, &di);
if (!error)
security_path_post_mknod(idmap, dentry);
break;
case S_IFCHR: case S_IFBLK:
error = vfs_mknod(idmap, path.dentry->d_inode,
- dentry, mode, new_decode_dev(dev));
+ dentry, mode, new_decode_dev(dev), &di);
break;
case S_IFIFO: case S_IFSOCK:
error = vfs_mknod(idmap, path.dentry->d_inode,
- dentry, mode, 0);
+ dentry, mode, 0, &di);
break;
}
out2:
end_creating_path(&path, dentry);
+ if (is_delegated(&di)) {
+ error = break_deleg_wait(&di);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
@@ -4407,10 +5078,11 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
/**
* vfs_mkdir - create directory returning correct dentry if possible
- * @idmap: idmap of the mount the inode was found from
- * @dir: inode of the parent directory
- * @dentry: dentry of the child directory
- * @mode: mode of the child directory
+ * @idmap: idmap of the mount the inode was found from
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child directory
+ * @mode: mode of the child directory
+ * @delegated_inode: returns parent inode, if the inode is delegated.
*
* Create a directory.
*
@@ -4427,7 +5099,8 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
* In case of an error the dentry is dput() and an ERR_PTR() is returned.
*/
struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, umode_t mode)
+ struct dentry *dentry, umode_t mode,
+ struct delegated_inode *delegated_inode)
{
int error;
unsigned max_links = dir->i_sb->s_max_links;
@@ -4450,6 +5123,10 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
if (max_links && dir->i_nlink >= max_links)
goto err;
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ goto err;
+
de = dir->i_op->mkdir(idmap, dir, dentry, mode);
error = PTR_ERR(de);
if (IS_ERR(de))
@@ -4462,7 +5139,7 @@ struct dentry *vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
return dentry;
err:
- dput(dentry);
+ end_creating(dentry);
return ERR_PTR(error);
}
EXPORT_SYMBOL(vfs_mkdir);
@@ -4473,6 +5150,7 @@ int do_mkdirat(int dfd, struct filename *name, umode_t mode)
struct path path;
int error;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
+ struct delegated_inode delegated_inode = { };
retry:
dentry = filename_create(dfd, name, &path, lookup_flags);
@@ -4484,11 +5162,16 @@ retry:
mode_strip_umask(path.dentry->d_inode, mode));
if (!error) {
dentry = vfs_mkdir(mnt_idmap(path.mnt), path.dentry->d_inode,
- dentry, mode);
+ dentry, mode, &delegated_inode);
if (IS_ERR(dentry))
error = PTR_ERR(dentry);
}
end_creating_path(&path, dentry);
+ if (is_delegated(&delegated_inode)) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
@@ -4510,9 +5193,10 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
/**
* vfs_rmdir - remove directory
- * @idmap: idmap of the mount the inode was found from
- * @dir: inode of the parent directory
- * @dentry: dentry of the child directory
+ * @idmap: idmap of the mount the inode was found from
+ * @dir: inode of the parent directory
+ * @dentry: dentry of the child directory
+ * @delegated_inode: returns parent inode, if it's delegated.
*
* Remove a directory.
*
@@ -4523,7 +5207,7 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
* raw inode simply pass @nop_mnt_idmap.
*/
int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry)
+ struct dentry *dentry, struct delegated_inode *delegated_inode)
{
int error = may_delete(idmap, dir, dentry, 1);
@@ -4545,6 +5229,10 @@ int vfs_rmdir(struct mnt_idmap *idmap, struct inode *dir,
if (error)
goto out;
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ goto out;
+
error = dir->i_op->rmdir(dir, dentry);
if (error)
goto out;
@@ -4571,6 +5259,7 @@ int do_rmdir(int dfd, struct filename *name)
struct qstr last;
int type;
unsigned int lookup_flags = 0;
+ struct delegated_inode delegated_inode = { };
retry:
error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (error)
@@ -4592,22 +5281,26 @@ retry:
if (error)
goto exit2;
- inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
+ dentry = start_dirop(path.dentry, &last, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto exit3;
error = security_path_rmdir(&path, dentry);
if (error)
goto exit4;
- error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode, dentry);
+ error = vfs_rmdir(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, &delegated_inode);
exit4:
- dput(dentry);
+ end_dirop(dentry);
exit3:
- inode_unlock(path.dentry->d_inode);
mnt_drop_write(path.mnt);
exit2:
path_put(&path);
+ if (is_delegated(&delegated_inode)) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
@@ -4648,7 +5341,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
* raw inode simply pass @nop_mnt_idmap.
*/
int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, struct inode **delegated_inode)
+ struct dentry *dentry, struct delegated_inode *delegated_inode)
{
struct inode *target = dentry->d_inode;
int error = may_delete(idmap, dir, dentry, 0);
@@ -4667,6 +5360,9 @@ int vfs_unlink(struct mnt_idmap *idmap, struct inode *dir,
else {
error = security_inode_unlink(dir, dentry);
if (!error) {
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ goto out;
error = try_break_deleg(target, delegated_inode);
if (error)
goto out;
@@ -4705,67 +5401,62 @@ int do_unlinkat(int dfd, struct filename *name)
struct path path;
struct qstr last;
int type;
- struct inode *inode = NULL;
- struct inode *delegated_inode = NULL;
+ struct inode *inode;
+ struct delegated_inode delegated_inode = { };
unsigned int lookup_flags = 0;
retry:
error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (error)
- goto exit1;
+ goto exit_putname;
error = -EISDIR;
if (type != LAST_NORM)
- goto exit2;
+ goto exit_path_put;
error = mnt_want_write(path.mnt);
if (error)
- goto exit2;
+ goto exit_path_put;
retry_deleg:
- inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
- dentry = lookup_one_qstr_excl(&last, path.dentry, lookup_flags);
+ dentry = start_dirop(path.dentry, &last, lookup_flags);
error = PTR_ERR(dentry);
- if (!IS_ERR(dentry)) {
+ if (IS_ERR(dentry))
+ goto exit_drop_write;
- /* Why not before? Because we want correct error value */
- if (last.name[last.len])
- goto slashes;
- inode = dentry->d_inode;
- ihold(inode);
- error = security_path_unlink(&path, dentry);
- if (error)
- goto exit3;
- error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
- dentry, &delegated_inode);
-exit3:
- dput(dentry);
+ /* Why not before? Because we want correct error value */
+ if (unlikely(last.name[last.len])) {
+ if (d_is_dir(dentry))
+ error = -EISDIR;
+ else
+ error = -ENOTDIR;
+ end_dirop(dentry);
+ goto exit_drop_write;
}
- inode_unlock(path.dentry->d_inode);
- if (inode)
- iput(inode); /* truncate the inode here */
- inode = NULL;
- if (delegated_inode) {
+ inode = dentry->d_inode;
+ ihold(inode);
+ error = security_path_unlink(&path, dentry);
+ if (error)
+ goto exit_end_dirop;
+ error = vfs_unlink(mnt_idmap(path.mnt), path.dentry->d_inode,
+ dentry, &delegated_inode);
+exit_end_dirop:
+ end_dirop(dentry);
+ iput(inode); /* truncate the inode here */
+ if (is_delegated(&delegated_inode)) {
error = break_deleg_wait(&delegated_inode);
if (!error)
goto retry_deleg;
}
+exit_drop_write:
mnt_drop_write(path.mnt);
-exit2:
+exit_path_put:
path_put(&path);
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
- inode = NULL;
goto retry;
}
-exit1:
+exit_putname:
putname(name);
return error;
-
-slashes:
- if (d_is_dir(dentry))
- error = -EISDIR;
- else
- error = -ENOTDIR;
- goto exit3;
}
SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
@@ -4789,6 +5480,7 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
* @dir: inode of the parent directory
* @dentry: dentry of the child symlink file
* @oldname: name of the file to link to
+ * @delegated_inode: returns victim inode, if the inode is delegated.
*
* Create a symlink.
*
@@ -4799,7 +5491,8 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
* raw inode simply pass @nop_mnt_idmap.
*/
int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
- struct dentry *dentry, const char *oldname)
+ struct dentry *dentry, const char *oldname,
+ struct delegated_inode *delegated_inode)
{
int error;
@@ -4814,6 +5507,10 @@ int vfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
if (error)
return error;
+ error = try_break_deleg(dir, delegated_inode);
+ if (error)
+ return error;
+
error = dir->i_op->symlink(idmap, dir, dentry, oldname);
if (!error)
fsnotify_create(dir, dentry);
@@ -4827,6 +5524,7 @@ int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
struct dentry *dentry;
struct path path;
unsigned int lookup_flags = 0;
+ struct delegated_inode delegated_inode = { };
if (IS_ERR(from)) {
error = PTR_ERR(from);
@@ -4841,8 +5539,13 @@ retry:
error = security_path_symlink(&path, dentry, from->name);
if (!error)
error = vfs_symlink(mnt_idmap(path.mnt), path.dentry->d_inode,
- dentry, from->name);
+ dentry, from->name, &delegated_inode);
end_creating_path(&path, dentry);
+ if (is_delegated(&delegated_inode)) {
+ error = break_deleg_wait(&delegated_inode);
+ if (!error)
+ goto retry;
+ }
if (retry_estale(error, lookup_flags)) {
lookup_flags |= LOOKUP_REVAL;
goto retry;
@@ -4892,7 +5595,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
*/
int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
struct inode *dir, struct dentry *new_dentry,
- struct inode **delegated_inode)
+ struct delegated_inode *delegated_inode)
{
struct inode *inode = old_dentry->d_inode;
unsigned max_links = dir->i_sb->s_max_links;
@@ -4931,19 +5634,21 @@ int vfs_link(struct dentry *old_dentry, struct mnt_idmap *idmap,
inode_lock(inode);
/* Make sure we don't allow creating hardlink to an unlinked file */
- if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
+ if (inode->i_nlink == 0 && !(inode_state_read_once(inode) & I_LINKABLE))
error = -ENOENT;
else if (max_links && inode->i_nlink >= max_links)
error = -EMLINK;
else {
- error = try_break_deleg(inode, delegated_inode);
+ error = try_break_deleg(dir, delegated_inode);
+ if (!error)
+ error = try_break_deleg(inode, delegated_inode);
if (!error)
error = dir->i_op->link(old_dentry, dir, new_dentry);
}
- if (!error && (inode->i_state & I_LINKABLE)) {
+ if (!error && (inode_state_read_once(inode) & I_LINKABLE)) {
spin_lock(&inode->i_lock);
- inode->i_state &= ~I_LINKABLE;
+ inode_state_clear(inode, I_LINKABLE);
spin_unlock(&inode->i_lock);
}
inode_unlock(inode);
@@ -4968,7 +5673,7 @@ int do_linkat(int olddfd, struct filename *old, int newdfd,
struct mnt_idmap *idmap;
struct dentry *new_dentry;
struct path old_path, new_path;
- struct inode *delegated_inode = NULL;
+ struct delegated_inode delegated_inode = { };
int how = 0;
int error;
@@ -5012,7 +5717,7 @@ retry:
new_dentry, &delegated_inode);
out_dput:
end_creating_path(&new_path, new_dentry);
- if (delegated_inode) {
+ if (is_delegated(&delegated_inode)) {
error = break_deleg_wait(&delegated_inode);
if (!error) {
path_put(&old_path);
@@ -5098,7 +5803,7 @@ int vfs_rename(struct renamedata *rd)
struct inode *new_dir = d_inode(rd->new_parent);
struct dentry *old_dentry = rd->old_dentry;
struct dentry *new_dentry = rd->new_dentry;
- struct inode **delegated_inode = rd->delegated_inode;
+ struct delegated_inode *delegated_inode = rd->delegated_inode;
unsigned int flags = rd->flags;
bool is_dir = d_is_dir(old_dentry);
struct inode *source = old_dentry->d_inode;
@@ -5203,6 +5908,14 @@ int vfs_rename(struct renamedata *rd)
old_dir->i_nlink >= max_links)
goto out;
}
+ error = try_break_deleg(old_dir, delegated_inode);
+ if (error)
+ goto out;
+ if (new_dir != old_dir) {
+ error = try_break_deleg(new_dir, delegated_inode);
+ if (error)
+ goto out;
+ }
if (!is_dir) {
error = try_break_deleg(source, delegated_inode);
if (error)
@@ -5256,14 +5969,11 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
struct filename *to, unsigned int flags)
{
struct renamedata rd;
- struct dentry *old_dentry, *new_dentry;
- struct dentry *trap;
struct path old_path, new_path;
struct qstr old_last, new_last;
int old_type, new_type;
- struct inode *delegated_inode = NULL;
- unsigned int lookup_flags = 0, target_flags =
- LOOKUP_RENAME_TARGET | LOOKUP_CREATE;
+ struct delegated_inode delegated_inode = { };
+ unsigned int lookup_flags = 0;
bool should_retry = false;
int error = -EINVAL;
@@ -5274,11 +5984,6 @@ int do_renameat2(int olddfd, struct filename *from, int newdfd,
(flags & RENAME_EXCHANGE))
goto put_names;
- if (flags & RENAME_EXCHANGE)
- target_flags = 0;
- if (flags & RENAME_NOREPLACE)
- target_flags |= LOOKUP_EXCL;
-
retry:
error = filename_parentat(olddfd, from, lookup_flags, &old_path,
&old_last, &old_type);
@@ -5308,68 +6013,42 @@ retry:
goto exit2;
retry_deleg:
- trap = lock_rename(new_path.dentry, old_path.dentry);
- if (IS_ERR(trap)) {
- error = PTR_ERR(trap);
+ rd.old_parent = old_path.dentry;
+ rd.mnt_idmap = mnt_idmap(old_path.mnt);
+ rd.new_parent = new_path.dentry;
+ rd.delegated_inode = &delegated_inode;
+ rd.flags = flags;
+
+ error = __start_renaming(&rd, lookup_flags, &old_last, &new_last);
+ if (error)
goto exit_lock_rename;
- }
- old_dentry = lookup_one_qstr_excl(&old_last, old_path.dentry,
- lookup_flags);
- error = PTR_ERR(old_dentry);
- if (IS_ERR(old_dentry))
- goto exit3;
- new_dentry = lookup_one_qstr_excl(&new_last, new_path.dentry,
- lookup_flags | target_flags);
- error = PTR_ERR(new_dentry);
- if (IS_ERR(new_dentry))
- goto exit4;
if (flags & RENAME_EXCHANGE) {
- if (!d_is_dir(new_dentry)) {
+ if (!d_is_dir(rd.new_dentry)) {
error = -ENOTDIR;
if (new_last.name[new_last.len])
- goto exit5;
+ goto exit_unlock;
}
}
/* unless the source is a directory trailing slashes give -ENOTDIR */
- if (!d_is_dir(old_dentry)) {
+ if (!d_is_dir(rd.old_dentry)) {
error = -ENOTDIR;
if (old_last.name[old_last.len])
- goto exit5;
+ goto exit_unlock;
if (!(flags & RENAME_EXCHANGE) && new_last.name[new_last.len])
- goto exit5;
+ goto exit_unlock;
}
- /* source should not be ancestor of target */
- error = -EINVAL;
- if (old_dentry == trap)
- goto exit5;
- /* target should not be an ancestor of source */
- if (!(flags & RENAME_EXCHANGE))
- error = -ENOTEMPTY;
- if (new_dentry == trap)
- goto exit5;
- error = security_path_rename(&old_path, old_dentry,
- &new_path, new_dentry, flags);
+ error = security_path_rename(&old_path, rd.old_dentry,
+ &new_path, rd.new_dentry, flags);
if (error)
- goto exit5;
+ goto exit_unlock;
- rd.old_parent = old_path.dentry;
- rd.old_dentry = old_dentry;
- rd.mnt_idmap = mnt_idmap(old_path.mnt);
- rd.new_parent = new_path.dentry;
- rd.new_dentry = new_dentry;
- rd.delegated_inode = &delegated_inode;
- rd.flags = flags;
error = vfs_rename(&rd);
-exit5:
- dput(new_dentry);
-exit4:
- dput(old_dentry);
-exit3:
- unlock_rename(new_path.dentry, old_path.dentry);
+exit_unlock:
+ end_renaming(&rd);
exit_lock_rename:
- if (delegated_inode) {
+ if (is_delegated(&delegated_inode)) {
error = break_deleg_wait(&delegated_inode);
if (!error)
goto retry_deleg;