summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_dentry.c10
-rw-r--r--fs/9p/vfs_inode.c8
-rw-r--r--fs/9p/vfs_inode_dotl.c8
-rw-r--r--fs/btrfs/delayed-inode.c2
-rw-r--r--fs/btrfs/delayed-inode.h7
-rw-r--r--fs/btrfs/extent_io.c2
-rw-r--r--fs/btrfs/free-space-tree.c15
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/ref-verify.c2
-rw-r--r--fs/btrfs/relocation.c13
-rw-r--r--fs/btrfs/scrub.c4
-rw-r--r--fs/btrfs/send.c60
-rw-r--r--fs/btrfs/super.c11
-rw-r--r--fs/btrfs/tree-checker.c2
-rw-r--r--fs/btrfs/zoned.c2
-rw-r--r--fs/coredump.c2
-rw-r--r--fs/dax.c2
-rw-r--r--fs/dcache.c2
-rw-r--r--fs/erofs/zmap.c59
-rw-r--r--fs/exec.c2
-rw-r--r--fs/exfat/exfat_fs.h1
-rw-r--r--fs/exfat/file.c7
-rw-r--r--fs/exfat/namei.c8
-rw-r--r--fs/exfat/nls.c3
-rw-r--r--fs/ext4/ext4_jbd2.c11
-rw-r--r--fs/ext4/inode.c8
-rw-r--r--fs/ext4/orphan.c4
-rw-r--r--fs/f2fs/data.c2
-rw-r--r--fs/f2fs/super.c2
-rw-r--r--fs/file_attr.c16
-rw-r--r--fs/file_table.c2
-rw-r--r--fs/fuse/ioctl.c4
-rw-r--r--fs/hugetlbfs/inode.c9
-rw-r--r--fs/jbd2/transaction.c13
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c35
-rw-r--r--fs/nfs/nfs4client.c1
-rw-r--r--fs/nfs/nfs4proc.c13
-rw-r--r--fs/nfs/write.c3
-rw-r--r--fs/nfsd/flexfilelayout.c8
-rw-r--r--fs/nfsd/nfs4proc.c21
-rw-r--r--fs/nfsd/nfs4state.c1
-rw-r--r--fs/nfsd/nfs4xdr.c21
-rw-r--r--fs/nfsd/nfsd.h3
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--fs/notify/fdinfo.c6
-rw-r--r--fs/nsfs.c4
-rw-r--r--fs/ocfs2/move_extents.c5
-rw-r--r--fs/overlayfs/copy_up.c2
-rw-r--r--fs/overlayfs/file.c5
-rw-r--r--fs/overlayfs/inode.c5
-rw-r--r--fs/resctrl/monitor.c16
-rw-r--r--fs/smb/client/Kconfig7
-rw-r--r--fs/smb/client/cifsacl.c5
-rw-r--r--fs/smb/client/cifsencrypt.c201
-rw-r--r--fs/smb/client/cifsfs.c6
-rw-r--r--fs/smb/client/cifsglob.h26
-rw-r--r--fs/smb/client/cifsproto.h13
-rw-r--r--fs/smb/client/cifssmb.c8
-rw-r--r--fs/smb/client/connect.c46
-rw-r--r--fs/smb/client/dfs_cache.c55
-rw-r--r--fs/smb/client/inode.c11
-rw-r--r--fs/smb/client/link.c31
-rw-r--r--fs/smb/client/misc.c17
-rw-r--r--fs/smb/client/sess.c2
-rw-r--r--fs/smb/client/smb2misc.c53
-rw-r--r--fs/smb/client/smb2ops.c15
-rw-r--r--fs/smb/client/smb2proto.h14
-rw-r--r--fs/smb/client/smb2transport.c182
-rw-r--r--fs/smb/client/smbdirect.c424
-rw-r--r--fs/smb/client/smbdirect.h2
-rw-r--r--fs/smb/client/trace.c1
-rw-r--r--fs/smb/client/xattr.c1
-rw-r--r--fs/smb/common/cifsglob.h30
-rw-r--r--fs/smb/common/smbdirect/smbdirect_socket.h24
-rw-r--r--fs/smb/server/mgmt/user_session.c7
-rw-r--r--fs/smb/server/smb2pdu.c11
-rw-r--r--fs/smb/server/smb_common.h14
-rw-r--r--fs/smb/server/transport_ipc.c20
-rw-r--r--fs/smb/server/transport_rdma.c411
-rw-r--r--fs/sysfs/group.c26
-rw-r--r--fs/xfs/Kconfig11
-rw-r--r--fs/xfs/libxfs/xfs_rtgroup.h6
-rw-r--r--fs/xfs/scrub/nlinks.c34
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf.h1
-rw-r--r--fs/xfs/xfs_mount.h1
-rw-r--r--fs/xfs/xfs_super.c53
-rw-r--r--fs/xfs/xfs_zone_alloc.c156
-rw-r--r--fs/xfs/xfs_zone_gc.c108
-rw-r--r--fs/xfs/xfs_zone_priv.h2
90 files changed, 1420 insertions, 1072 deletions
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index f3248a3e5402..c1acbc98465d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -66,7 +66,6 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
struct p9_fid *fid;
struct inode *inode;
struct v9fs_inode *v9inode;
- unsigned int cached;
if (flags & LOOKUP_RCU)
return -ECHILD;
@@ -76,11 +75,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
goto out_valid;
v9inode = V9FS_I(inode);
- struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode);
-
- cached = v9ses->cache & (CACHE_META | CACHE_LOOSE);
-
- if (!cached || v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
+ if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) {
int retval;
struct v9fs_session_info *v9ses;
@@ -114,6 +109,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
p9_debug(P9_DEBUG_VFS,
"refresh inode: dentry = %pd (%p), got error %pe\n",
dentry, dentry, ERR_PTR(retval));
+ if (retval < 0)
return retval;
}
}
@@ -150,8 +146,6 @@ const struct dentry_operations v9fs_cached_dentry_operations = {
};
const struct dentry_operations v9fs_dentry_operations = {
- .d_revalidate = v9fs_lookup_revalidate,
- .d_weak_revalidate = __v9fs_lookup_revalidate,
.d_release = v9fs_dentry_release,
.d_unalias_trylock = v9fs_dentry_unalias_trylock,
.d_unalias_unlock = v9fs_dentry_unalias_unlock,
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 69f378a83775..d0c77ec31b1d 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1339,14 +1339,8 @@ int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode)
* Don't update inode if the file type is different
*/
umode = p9mode2unixmode(v9ses, st, &rdev);
- if (inode_wrong_type(inode, umode)) {
- /*
- * Do this as a way of letting the caller know the inode should not
- * be reused
- */
- v9fs_invalidate_inode_attr(inode);
+ if (inode_wrong_type(inode, umode))
goto out;
- }
/*
* We don't want to refresh inode->i_size,
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0b404e8484d2..be297e335468 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -897,14 +897,8 @@ int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode)
/*
* Don't update inode if the file type is different
*/
- if (inode_wrong_type(inode, st->st_mode)) {
- /*
- * Do this as a way of letting the caller know the inode should not
- * be reused
- */
- v9fs_invalidate_inode_attr(inode);
+ if (inode_wrong_type(inode, st->st_mode))
goto out;
- }
/*
* We don't want to refresh inode->i_size,
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 41e37f7f67cc..3df7b9d7fbe8 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -2110,9 +2110,9 @@ void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
for (int i = 0; i < count; i++) {
__btrfs_kill_delayed_node(delayed_nodes[i]);
+ btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i],
&delayed_node_trackers[i]);
- btrfs_delayed_node_ref_tracker_dir_print(delayed_nodes[i]);
}
}
}
diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
index 0d949edc0caf..b09d4ec8c77d 100644
--- a/fs/btrfs/delayed-inode.h
+++ b/fs/btrfs/delayed-inode.h
@@ -219,6 +219,13 @@ static inline void btrfs_delayed_node_ref_tracker_dir_print(struct btrfs_delayed
if (!btrfs_test_opt(node->root->fs_info, REF_TRACKER))
return;
+ /*
+ * Only print if there are leaked references. The caller is
+ * holding one reference, so if refs == 1 there is no leak.
+ */
+ if (refcount_read(&node->refs) == 1)
+ return;
+
ref_tracker_dir_print(&node->ref_dir.dir,
BTRFS_DELAYED_NODE_REF_TRACKER_DISPLAY_LIMIT);
}
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index c123a3ef154a..755ec6dfd51c 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -973,7 +973,7 @@ static void btrfs_readahead_expand(struct readahead_control *ractl,
{
const u64 ra_pos = readahead_pos(ractl);
const u64 ra_end = ra_pos + readahead_length(ractl);
- const u64 em_end = em->start + em->ram_bytes;
+ const u64 em_end = em->start + em->len;
/* No expansion for holes and inline extents. */
if (em->disk_bytenr > EXTENT_MAP_LAST_BYTE)
diff --git a/fs/btrfs/free-space-tree.c b/fs/btrfs/free-space-tree.c
index dad0b492a663..d86541073d42 100644
--- a/fs/btrfs/free-space-tree.c
+++ b/fs/btrfs/free-space-tree.c
@@ -1106,14 +1106,15 @@ static int populate_free_space_tree(struct btrfs_trans_handle *trans,
* If ret is 1 (no key found), it means this is an empty block group,
* without any extents allocated from it and there's no block group
* item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree
- * because we are using the block group tree feature, so block group
- * items are stored in the block group tree. It also means there are no
- * extents allocated for block groups with a start offset beyond this
- * block group's end offset (this is the last, highest, block group).
+ * because we are using the block group tree feature (so block group
+ * items are stored in the block group tree) or this is a new block
+ * group created in the current transaction and its block group item
+ * was not yet inserted in the extent tree (that happens in
+ * btrfs_create_pending_block_groups() -> insert_block_group_item()).
+ * It also means there are no extents allocated for block groups with a
+ * start offset beyond this block group's end offset (this is the last,
+ * highest, block group).
*/
- if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE))
- ASSERT(ret == 0);
-
start = block_group->start;
end = block_group->start + block_group->length;
while (ret == 0) {
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 185bef0df1c2..8cb7d5a462ef 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3740,7 +3740,7 @@ static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg)
prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL);
if (!prealloc) {
ret = -ENOMEM;
- goto drop_write;
+ goto out;
}
}
diff --git a/fs/btrfs/ref-verify.c b/fs/btrfs/ref-verify.c
index de4cb0f3fbd0..e9224145d754 100644
--- a/fs/btrfs/ref-verify.c
+++ b/fs/btrfs/ref-verify.c
@@ -982,7 +982,7 @@ int btrfs_build_ref_tree(struct btrfs_fs_info *fs_info)
extent_root = btrfs_extent_root(fs_info, 0);
/* If the extent tree is damaged we cannot ignore it (IGNOREBADROOTS). */
- if (IS_ERR(extent_root)) {
+ if (!extent_root) {
btrfs_warn(fs_info, "ref-verify: extent tree not available, disabling");
btrfs_clear_opt(fs_info->mount_opt, REF_VERIFY);
return 0;
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8dd8de6b9fb8..0765e06d00b8 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -3780,6 +3780,7 @@ out:
/*
* Mark start of chunk relocation that is cancellable. Check if the cancellation
* has been requested meanwhile and don't start in that case.
+ * NOTE: if this returns an error, reloc_chunk_end() must not be called.
*
* Return:
* 0 success
@@ -3796,10 +3797,8 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
btrfs_info(fs_info, "chunk relocation canceled on start");
- /*
- * On cancel, clear all requests but let the caller mark
- * the end after cleanup operations.
- */
+ /* On cancel, clear all requests. */
+ clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
atomic_set(&fs_info->reloc_cancel_req, 0);
return -ECANCELED;
}
@@ -3808,9 +3807,11 @@ static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
/*
* Mark end of chunk relocation that is cancellable and wake any waiters.
+ * NOTE: call only if a previous call to reloc_chunk_start() succeeded.
*/
static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
{
+ ASSERT(test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags));
/* Requested after start, clear bit first so any waiters can continue */
if (atomic_read(&fs_info->reloc_cancel_req) > 0)
btrfs_info(fs_info, "chunk relocation canceled during operation");
@@ -4023,9 +4024,9 @@ out:
if (err && rw)
btrfs_dec_block_group_ro(rc->block_group);
iput(rc->data_inode);
+ reloc_chunk_end(fs_info);
out_put_bg:
btrfs_put_block_group(bg);
- reloc_chunk_end(fs_info);
free_reloc_control(rc);
return err;
}
@@ -4208,8 +4209,8 @@ out_clean:
ret = ret2;
out_unset:
unset_reloc_control(rc);
-out_end:
reloc_chunk_end(fs_info);
+out_end:
free_reloc_control(rc);
out:
free_reloc_roots(&reloc_roots);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 4691d0bdb2e8..651b11884f82 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -694,7 +694,7 @@ static void *scrub_stripe_get_kaddr(struct scrub_stripe *stripe, int sector_nr)
/* stripe->folios[] is allocated by us and no highmem is allowed. */
ASSERT(folio);
- ASSERT(!folio_test_partial_kmap(folio));
+ ASSERT(!folio_test_highmem(folio));
return folio_address(folio) + offset_in_folio(folio, offset);
}
@@ -707,7 +707,7 @@ static phys_addr_t scrub_stripe_get_paddr(struct scrub_stripe *stripe, int secto
/* stripe->folios[] is allocated by us and no highmem is allowed. */
ASSERT(folio);
- ASSERT(!folio_test_partial_kmap(folio));
+ ASSERT(!folio_test_highmem(folio));
/* And the range must be contained inside the folio. */
ASSERT(offset_in_folio(folio, offset) + fs_info->sectorsize <= folio_size(folio));
return page_to_phys(folio_page(folio, 0)) + offset_in_folio(folio, offset);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 9230e5066fc6..96a030d28e09 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -178,7 +178,6 @@ struct send_ctx {
u64 cur_inode_rdev;
u64 cur_inode_last_extent;
u64 cur_inode_next_write_offset;
- struct fs_path cur_inode_path;
bool cur_inode_new;
bool cur_inode_new_gen;
bool cur_inode_deleted;
@@ -305,6 +304,9 @@ struct send_ctx {
struct btrfs_lru_cache dir_created_cache;
struct btrfs_lru_cache dir_utimes_cache;
+
+ /* Must be last as it ends in a flexible-array member. */
+ struct fs_path cur_inode_path;
};
struct pending_dir_move {
@@ -4100,6 +4102,48 @@ out:
return ret;
}
+static int rbtree_check_dir_ref_comp(const void *k, const struct rb_node *node)
+{
+ const struct recorded_ref *data = k;
+ const struct recorded_ref *ref = rb_entry(node, struct recorded_ref, node);
+
+ if (data->dir > ref->dir)
+ return 1;
+ if (data->dir < ref->dir)
+ return -1;
+ if (data->dir_gen > ref->dir_gen)
+ return 1;
+ if (data->dir_gen < ref->dir_gen)
+ return -1;
+ return 0;
+}
+
+static bool rbtree_check_dir_ref_less(struct rb_node *node, const struct rb_node *parent)
+{
+ const struct recorded_ref *entry = rb_entry(node, struct recorded_ref, node);
+
+ return rbtree_check_dir_ref_comp(entry, parent) < 0;
+}
+
+static int record_check_dir_ref_in_tree(struct rb_root *root,
+ struct recorded_ref *ref, struct list_head *list)
+{
+ struct recorded_ref *tmp_ref;
+ int ret;
+
+ if (rb_find(ref, root, rbtree_check_dir_ref_comp))
+ return 0;
+
+ ret = dup_ref(ref, list);
+ if (ret < 0)
+ return ret;
+
+ tmp_ref = list_last_entry(list, struct recorded_ref, list);
+ rb_add(&tmp_ref->node, root, rbtree_check_dir_ref_less);
+ tmp_ref->root = root;
+ return 0;
+}
+
static int rename_current_inode(struct send_ctx *sctx,
struct fs_path *current_path,
struct fs_path *new_path)
@@ -4127,11 +4171,11 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
struct recorded_ref *cur;
struct recorded_ref *cur2;
LIST_HEAD(check_dirs);
+ struct rb_root rbtree_check_dirs = RB_ROOT;
struct fs_path *valid_path = NULL;
u64 ow_inode = 0;
u64 ow_gen;
u64 ow_mode;
- u64 last_dir_ino_rm = 0;
bool did_overwrite = false;
bool is_orphan = false;
bool can_rename = true;
@@ -4435,7 +4479,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
goto out;
}
}
- ret = dup_ref(cur, &check_dirs);
+ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
if (ret < 0)
goto out;
}
@@ -4463,7 +4507,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
}
list_for_each_entry(cur, &sctx->deleted_refs, list) {
- ret = dup_ref(cur, &check_dirs);
+ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
if (ret < 0)
goto out;
}
@@ -4473,7 +4517,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
* We have a moved dir. Add the old parent to check_dirs
*/
cur = list_first_entry(&sctx->deleted_refs, struct recorded_ref, list);
- ret = dup_ref(cur, &check_dirs);
+ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
if (ret < 0)
goto out;
} else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@ -4507,7 +4551,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
if (is_current_inode_path(sctx, cur->full_path))
fs_path_reset(&sctx->cur_inode_path);
}
- ret = dup_ref(cur, &check_dirs);
+ ret = record_check_dir_ref_in_tree(&rbtree_check_dirs, cur, &check_dirs);
if (ret < 0)
goto out;
}
@@ -4550,8 +4594,7 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
ret = cache_dir_utimes(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
- } else if (ret == inode_state_did_delete &&
- cur->dir != last_dir_ino_rm) {
+ } else if (ret == inode_state_did_delete) {
ret = can_rmdir(sctx, cur->dir, cur->dir_gen);
if (ret < 0)
goto out;
@@ -4563,7 +4606,6 @@ static int process_recorded_refs(struct send_ctx *sctx, int *pending_move)
ret = send_rmdir(sctx, valid_path);
if (ret < 0)
goto out;
- last_dir_ino_rm = cur->dir;
}
}
}
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d6e496436539..430e7419349c 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -1900,8 +1900,6 @@ static int btrfs_get_tree_super(struct fs_context *fc)
return PTR_ERR(sb);
}
- set_device_specific_options(fs_info);
-
if (sb->s_root) {
/*
* Not the first mount of the fs thus got an existing super block.
@@ -1946,6 +1944,7 @@ static int btrfs_get_tree_super(struct fs_context *fc)
deactivate_locked_super(sb);
return -EACCES;
}
+ set_device_specific_options(fs_info);
bdev = fs_devices->latest_dev->bdev;
snprintf(sb->s_id, sizeof(sb->s_id), "%pg", bdev);
shrinker_debugfs_rename(sb->s_shrink, "sb-btrfs:%s", sb->s_id);
@@ -2069,7 +2068,13 @@ static int btrfs_get_tree_subvol(struct fs_context *fc)
fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL);
if (!fs_info->super_copy || !fs_info->super_for_commit) {
- btrfs_free_fs_info(fs_info);
+ /*
+ * Dont call btrfs_free_fs_info() to free it as it's still
+ * initialized partially.
+ */
+ kfree(fs_info->super_copy);
+ kfree(fs_info->super_for_commit);
+ kvfree(fs_info);
return -ENOMEM;
}
btrfs_init_fs_info(fs_info);
diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c
index ca30b15ea452..c10b4c242acf 100644
--- a/fs/btrfs/tree-checker.c
+++ b/fs/btrfs/tree-checker.c
@@ -1797,7 +1797,7 @@ static int check_inode_extref(struct extent_buffer *leaf,
struct btrfs_inode_extref *extref = (struct btrfs_inode_extref *)ptr;
u16 namelen;
- if (unlikely(ptr + sizeof(*extref)) > end) {
+ if (unlikely(ptr + sizeof(*extref) > end)) {
inode_ref_err(leaf, slot,
"inode extref overflow, ptr %lu end %lu inode_extref size %zu",
ptr, end, sizeof(*extref));
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index e00036672f33..0ea0df18a8e4 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -1753,7 +1753,7 @@ out:
!fs_info->stripe_root) {
btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree",
btrfs_bg_type_to_raid_name(map->type));
- return -EINVAL;
+ ret = -EINVAL;
}
if (unlikely(cache->alloc_offset > cache->zone_capacity)) {
diff --git a/fs/coredump.c b/fs/coredump.c
index b5fc06a092a4..5c1c381ee380 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -1468,7 +1468,7 @@ static int proc_dostring_coredump(const struct ctl_table *table, int write,
ssize_t retval;
char old_core_pattern[CORENAME_MAX_SIZE];
- if (write)
+ if (!write)
return proc_dostring(table, write, buffer, lenp, ppos);
retval = strscpy(old_core_pattern, core_pattern, CORENAME_MAX_SIZE);
diff --git a/fs/dax.c b/fs/dax.c
index 89f071ba7b10..516f995a988c 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1725,7 +1725,7 @@ dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
if (iov_iter_rw(iter) == WRITE) {
lockdep_assert_held_write(&iomi.inode->i_rwsem);
iomi.flags |= IOMAP_WRITE;
- } else {
+ } else if (!sb_rdonly(iomi.inode->i_sb)) {
lockdep_assert_held(&iomi.inode->i_rwsem);
}
diff --git a/fs/dcache.c b/fs/dcache.c
index a067fa0a965a..035cccbc9276 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2557,6 +2557,8 @@ struct dentry *d_alloc_parallel(struct dentry *parent,
spin_lock(&parent->d_lock);
new->d_parent = dget_dlock(parent);
hlist_add_head(&new->d_sib, &parent->d_children);
+ if (parent->d_flags & DCACHE_DISCONNECTED)
+ new->d_flags |= DCACHE_DISCONNECTED;
spin_unlock(&parent->d_lock);
retry:
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index e5581dbeb4c2..c8d8e129eb4b 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -55,10 +55,6 @@ static int z_erofs_load_full_lcluster(struct z_erofs_maprecorder *m,
} else {
m->partialref = !!(advise & Z_EROFS_LI_PARTIAL_REF);
m->clusterofs = le16_to_cpu(di->di_clusterofs);
- if (m->clusterofs >= 1 << vi->z_lclusterbits) {
- DBG_BUGON(1);
- return -EFSCORRUPTED;
- }
m->pblk = le32_to_cpu(di->di_u.blkaddr);
}
return 0;
@@ -240,21 +236,29 @@ static int z_erofs_load_compact_lcluster(struct z_erofs_maprecorder *m,
static int z_erofs_load_lcluster_from_disk(struct z_erofs_maprecorder *m,
unsigned int lcn, bool lookahead)
{
+ struct erofs_inode *vi = EROFS_I(m->inode);
+ int err;
+
+ if (vi->datalayout == EROFS_INODE_COMPRESSED_COMPACT) {
+ err = z_erofs_load_compact_lcluster(m, lcn, lookahead);
+ } else {
+ DBG_BUGON(vi->datalayout != EROFS_INODE_COMPRESSED_FULL);
+ err = z_erofs_load_full_lcluster(m, lcn);
+ }
+ if (err)
+ return err;
+
if (m->type >= Z_EROFS_LCLUSTER_TYPE_MAX) {
erofs_err(m->inode->i_sb, "unknown type %u @ lcn %u of nid %llu",
- m->type, lcn, EROFS_I(m->inode)->nid);
+ m->type, lcn, EROFS_I(m->inode)->nid);
DBG_BUGON(1);
return -EOPNOTSUPP;
+ } else if (m->type != Z_EROFS_LCLUSTER_TYPE_NONHEAD &&
+ m->clusterofs >= (1 << vi->z_lclusterbits)) {
+ DBG_BUGON(1);
+ return -EFSCORRUPTED;
}
-
- switch (EROFS_I(m->inode)->datalayout) {
- case EROFS_INODE_COMPRESSED_FULL:
- return z_erofs_load_full_lcluster(m, lcn);
- case EROFS_INODE_COMPRESSED_COMPACT:
- return z_erofs_load_compact_lcluster(m, lcn, lookahead);
- default:
- return -EINVAL;
- }
+ return 0;
}
static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
@@ -268,20 +272,19 @@ static int z_erofs_extent_lookback(struct z_erofs_maprecorder *m,
unsigned long lcn = m->lcn - lookback_distance;
int err;
+ if (!lookback_distance)
+ break;
+
err = z_erofs_load_lcluster_from_disk(m, lcn, false);
if (err)
return err;
-
if (m->type == Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
lookback_distance = m->delta[0];
- if (!lookback_distance)
- break;
continue;
- } else {
- m->headtype = m->type;
- m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
- return 0;
}
+ m->headtype = m->type;
+ m->map->m_la = (lcn << lclusterbits) | m->clusterofs;
+ return 0;
}
erofs_err(sb, "bogus lookback distance %u @ lcn %lu of nid %llu",
lookback_distance, m->lcn, vi->nid);
@@ -431,13 +434,6 @@ static int z_erofs_map_blocks_fo(struct inode *inode,
end = inode->i_size;
} else {
if (m.type != Z_EROFS_LCLUSTER_TYPE_NONHEAD) {
- /* m.lcn should be >= 1 if endoff < m.clusterofs */
- if (!m.lcn) {
- erofs_err(sb, "invalid logical cluster 0 at nid %llu",
- vi->nid);
- err = -EFSCORRUPTED;
- goto unmap_out;
- }
end = (m.lcn << lclusterbits) | m.clusterofs;
map->m_flags |= EROFS_MAP_FULL_MAPPED;
m.delta[0] = 1;
@@ -596,7 +592,7 @@ static int z_erofs_map_blocks_ext(struct inode *inode,
vi->z_fragmentoff = map->m_plen;
if (recsz > offsetof(struct z_erofs_extent, pstart_lo))
vi->z_fragmentoff |= map->m_pa << 32;
- } else if (map->m_plen) {
+ } else if (map->m_plen & Z_EROFS_EXTENT_PLEN_MASK) {
map->m_flags |= EROFS_MAP_MAPPED |
EROFS_MAP_FULL_MAPPED | EROFS_MAP_ENCODED;
fmt = map->m_plen >> Z_EROFS_EXTENT_PLEN_FMT_BIT;
@@ -715,6 +711,7 @@ static int z_erofs_map_sanity_check(struct inode *inode,
struct erofs_map_blocks *map)
{
struct erofs_sb_info *sbi = EROFS_I_SB(inode);
+ u64 pend;
if (!(map->m_flags & EROFS_MAP_ENCODED))
return 0;
@@ -732,6 +729,10 @@ static int z_erofs_map_sanity_check(struct inode *inode,
if (unlikely(map->m_plen > Z_EROFS_PCLUSTER_MAX_SIZE ||
map->m_llen > Z_EROFS_PCLUSTER_MAX_DSIZE))
return -EOPNOTSUPP;
+ /* Filesystems beyond 48-bit physical block addresses are invalid */
+ if (unlikely(check_add_overflow(map->m_pa, map->m_plen, &pend) ||
+ (pend >> sbi->blkszbits) >= BIT_ULL(48)))
+ return -EFSCORRUPTED;
return 0;
}
diff --git a/fs/exec.c b/fs/exec.c
index 6b70c6726d31..4298e7e08d5d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -2048,7 +2048,7 @@ static int proc_dointvec_minmax_coredump(const struct ctl_table *table, int writ
{
int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
- if (!error && !write)
+ if (!error && write)
validate_coredump_safety();
return error;
}
diff --git a/fs/exfat/exfat_fs.h b/fs/exfat/exfat_fs.h
index 329697c89d09..38210fb6901c 100644
--- a/fs/exfat/exfat_fs.h
+++ b/fs/exfat/exfat_fs.h
@@ -29,7 +29,6 @@ enum exfat_error_mode {
enum {
NLS_NAME_NO_LOSSY = 0, /* no lossy */
NLS_NAME_LOSSY = 1 << 0, /* just detected incorrect filename(s) */
- NLS_NAME_OVERLEN = 1 << 1, /* the length is over than its limit */
};
#define EXFAT_HASH_BITS 8
diff --git a/fs/exfat/file.c b/fs/exfat/file.c
index f246cf439588..adc37b4d7fc2 100644
--- a/fs/exfat/file.c
+++ b/fs/exfat/file.c
@@ -509,8 +509,8 @@ static int exfat_ioctl_get_volume_label(struct super_block *sb, unsigned long ar
static int exfat_ioctl_set_volume_label(struct super_block *sb,
unsigned long arg)
{
- int ret = 0, lossy;
- char label[FSLABEL_MAX];
+ int ret = 0, lossy, label_len;
+ char label[FSLABEL_MAX] = {0};
struct exfat_uni_name uniname;
if (!capable(CAP_SYS_ADMIN))
@@ -520,8 +520,9 @@ static int exfat_ioctl_set_volume_label(struct super_block *sb,
return -EFAULT;
memset(&uniname, 0, sizeof(uniname));
+ label_len = strnlen(label, FSLABEL_MAX - 1);
if (label[0]) {
- ret = exfat_nls_to_utf16(sb, label, FSLABEL_MAX,
+ ret = exfat_nls_to_utf16(sb, label, label_len,
&uniname, &lossy);
if (ret < 0)
return ret;
diff --git a/fs/exfat/namei.c b/fs/exfat/namei.c
index 7eb9c67fd35f..745dce29ddb5 100644
--- a/fs/exfat/namei.c
+++ b/fs/exfat/namei.c
@@ -442,7 +442,7 @@ static int __exfat_resolve_path(struct inode *inode, const unsigned char *path,
return namelen; /* return error value */
if ((lossy && !lookup) || !namelen)
- return (lossy & NLS_NAME_OVERLEN) ? -ENAMETOOLONG : -EINVAL;
+ return -EINVAL;
return 0;
}
@@ -642,10 +642,14 @@ static int exfat_find(struct inode *dir, const struct qstr *qname,
info->type = exfat_get_entry_type(ep);
info->attr = le16_to_cpu(ep->dentry.file.attr);
- info->size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->valid_size = le64_to_cpu(ep2->dentry.stream.valid_size);
info->size = le64_to_cpu(ep2->dentry.stream.size);
+ if (info->valid_size < 0) {
+ exfat_fs_error(sb, "data valid size is invalid(%lld)", info->valid_size);
+ return -EIO;
+ }
+
if (unlikely(EXFAT_B_TO_CLU_ROUND_UP(info->size, sbi) > sbi->used_clusters)) {
exfat_fs_error(sb, "data size is invalid(%lld)", info->size);
return -EIO;
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 8243d94ceaf4..57db08a5271c 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -616,9 +616,6 @@ static int exfat_nls_to_ucs2(struct super_block *sb,
unilen++;
}
- if (p_cstring[i] != '\0')
- lossy |= NLS_NAME_OVERLEN;
-
*uniname = '\0';
p_uniname->name_len = unilen;
p_uniname->name_hash = exfat_calc_chksum16(upname, unilen << 1, 0,
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index b3e9b7bd7978..a0e66bc10093 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -280,9 +280,16 @@ int __ext4_forget(const char *where, unsigned int line, handle_t *handle,
bh, is_metadata, inode->i_mode,
test_opt(inode->i_sb, DATA_FLAGS));
- /* In the no journal case, we can just do a bforget and return */
+ /*
+ * In the no journal case, we should wait for the ongoing buffer
+ * to complete and do a forget.
+ */
if (!ext4_handle_valid(handle)) {
- bforget(bh);
+ if (bh) {
+ clear_buffer_dirty(bh);
+ wait_on_buffer(bh);
+ __bforget(bh);
+ }
return 0;
}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index f9e4ac87211e..e99306a8f47c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5319,6 +5319,14 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
}
ei->i_flags = le32_to_cpu(raw_inode->i_flags);
ext4_set_inode_flags(inode, true);
+ /* Detect invalid flag combination - can't have both inline data and extents */
+ if (ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
+ ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
+ ext4_error_inode(inode, function, line, 0,
+ "inode has both inline data and extents flags");
+ ret = -EFSCORRUPTED;
+ goto bad_inode;
+ }
inode->i_blocks = ext4_inode_blocks(raw_inode, ei);
ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl_lo);
if (ext4_has_feature_64bit(sb))
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index 33c3a89396b1..82d5e7501455 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -513,7 +513,7 @@ void ext4_release_orphan_info(struct super_block *sb)
return;
for (i = 0; i < oi->of_blocks; i++)
brelse(oi->of_binfo[i].ob_bh);
- kfree(oi->of_binfo);
+ kvfree(oi->of_binfo);
}
static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
@@ -637,7 +637,7 @@ int ext4_init_orphan_info(struct super_block *sb)
out_free:
for (i--; i >= 0; i--)
brelse(oi->of_binfo[i].ob_bh);
- kfree(oi->of_binfo);
+ kvfree(oi->of_binfo);
out_put:
iput(inode);
return ret;
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ef38e62cda8f..775aa4f63aa3 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1497,8 +1497,8 @@ static bool f2fs_map_blocks_cached(struct inode *inode,
struct f2fs_dev_info *dev = &sbi->devs[bidx];
map->m_bdev = dev->bdev;
- map->m_pblk -= dev->start_blk;
map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
+ map->m_pblk -= dev->start_blk;
} else {
map->m_bdev = inode->i_sb->s_bdev;
}
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index fd8e7b0b2166..db7afb806411 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -1820,7 +1820,7 @@ static int f2fs_drop_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
spin_lock(&inode->i_lock);
- iput(inode);
+ atomic_dec(&inode->i_count);
}
trace_f2fs_drop_inode(inode, 0);
return 0;
diff --git a/fs/file_attr.c b/fs/file_attr.c
index 12424d4945d0..1dcec88c0680 100644
--- a/fs/file_attr.c
+++ b/fs/file_attr.c
@@ -84,7 +84,7 @@ int vfs_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
int error;
if (!inode->i_op->fileattr_get)
- return -EOPNOTSUPP;
+ return -ENOIOCTLCMD;
error = security_inode_file_getattr(dentry, fa);
if (error)
@@ -270,7 +270,7 @@ int vfs_fileattr_set(struct mnt_idmap *idmap, struct dentry *dentry,
int err;
if (!inode->i_op->fileattr_set)
- return -EOPNOTSUPP;
+ return -ENOIOCTLCMD;
if (!inode_owner_or_capable(idmap, inode))
return -EPERM;
@@ -312,8 +312,6 @@ int ioctl_getflags(struct file *file, unsigned int __user *argp)
int err;
err = vfs_fileattr_get(file->f_path.dentry, &fa);
- if (err == -EOPNOTSUPP)
- err = -ENOIOCTLCMD;
if (!err)
err = put_user(fa.flags, argp);
return err;
@@ -335,8 +333,6 @@ int ioctl_setflags(struct file *file, unsigned int __user *argp)
fileattr_fill_flags(&fa, flags);
err = vfs_fileattr_set(idmap, dentry, &fa);
mnt_drop_write_file(file);
- if (err == -EOPNOTSUPP)
- err = -ENOIOCTLCMD;
}
}
return err;
@@ -349,8 +345,6 @@ int ioctl_fsgetxattr(struct file *file, void __user *argp)
int err;
err = vfs_fileattr_get(file->f_path.dentry, &fa);
- if (err == -EOPNOTSUPP)
- err = -ENOIOCTLCMD;
if (!err)
err = copy_fsxattr_to_user(&fa, argp);
@@ -371,8 +365,6 @@ int ioctl_fssetxattr(struct file *file, void __user *argp)
if (!err) {
err = vfs_fileattr_set(idmap, dentry, &fa);
mnt_drop_write_file(file);
- if (err == -EOPNOTSUPP)
- err = -ENOIOCTLCMD;
}
}
return err;
@@ -424,6 +416,8 @@ SYSCALL_DEFINE5(file_getattr, int, dfd, const char __user *, filename,
}
error = vfs_fileattr_get(filepath.dentry, &fa);
+ if (error == -ENOIOCTLCMD || error == -ENOTTY)
+ error = -EOPNOTSUPP;
if (error)
return error;
@@ -491,6 +485,8 @@ SYSCALL_DEFINE5(file_setattr, int, dfd, const char __user *, filename,
if (!error) {
error = vfs_fileattr_set(mnt_idmap(filepath.mnt),
filepath.dentry, &fa);
+ if (error == -ENOIOCTLCMD || error == -ENOTTY)
+ error = -EOPNOTSUPP;
mnt_drop_write(filepath.mnt);
}
diff --git a/fs/file_table.c b/fs/file_table.c
index b223d873e48b..cd4a3db4659a 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -192,7 +192,7 @@ static int init_file(struct file *f, int flags, const struct cred *cred)
f->f_sb_err = 0;
/*
- * We're SLAB_TYPESAFE_BY_RCU so initialize f_count last. While
+ * We're SLAB_TYPESAFE_BY_RCU so initialize f_ref last. While
* fget-rcu pattern users need to be able to handle spurious
* refcount bumps we should reinitialize the reused file first.
*/
diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
index 57032eadca6c..fdc175e93f74 100644
--- a/fs/fuse/ioctl.c
+++ b/fs/fuse/ioctl.c
@@ -536,8 +536,6 @@ int fuse_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
cleanup:
fuse_priv_ioctl_cleanup(inode, ff);
- if (err == -ENOTTY)
- err = -EOPNOTSUPP;
return err;
}
@@ -574,7 +572,5 @@ int fuse_fileattr_set(struct mnt_idmap *idmap,
cleanup:
fuse_priv_ioctl_cleanup(inode, ff);
- if (err == -ENOTTY)
- err = -EOPNOTSUPP;
return err;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 9c94ed8c3ab0..f42548ee9083 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -478,14 +478,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
if (!hugetlb_vma_trylock_write(vma))
continue;
- /*
- * Skip VMAs without shareable locks. Per the design in commit
- * 40549ba8f8e0, these will be handled by remove_inode_hugepages()
- * called after this function with proper locking.
- */
- if (!__vma_shareable_lock(vma))
- goto skip;
-
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
@@ -496,7 +488,6 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end,
* vmas. Therefore, lock is not held when calling
* unmap_hugepage_range for private vmas.
*/
-skip:
hugetlb_vma_unlock_write(vma);
}
}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index c7867139af69..3e510564de6e 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -1659,6 +1659,7 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
int drop_reserve = 0;
int err = 0;
int was_modified = 0;
+ int wait_for_writeback = 0;
if (is_handle_aborted(handle))
return -EROFS;
@@ -1782,18 +1783,22 @@ int jbd2_journal_forget(handle_t *handle, struct buffer_head *bh)
}
/*
- * The buffer is still not written to disk, we should
- * attach this buffer to current transaction so that the
- * buffer can be checkpointed only after the current
- * transaction commits.
+ * The buffer has not yet been written to disk. We should
+ * either clear the buffer or ensure that the ongoing I/O
+ * is completed, and attach this buffer to current
+ * transaction so that the buffer can be checkpointed only
+ * after the current transaction commits.
*/
clear_buffer_dirty(bh);
+ wait_for_writeback = 1;
__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
spin_unlock(&journal->j_list_lock);
}
drop:
__brelse(bh);
spin_unlock(&jh->b_state_lock);
+ if (wait_for_writeback)
+ wait_on_buffer(bh);
jbd2_journal_put_journal_head(jh);
if (drop_reserve) {
/* no need to reserve log space for this block -bzzz */
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index df01d2876b68..9056f05a67dc 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -270,19 +270,31 @@ ff_layout_remove_mirror(struct nfs4_ff_layout_mirror *mirror)
mirror->layout = NULL;
}
-static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(gfp_t gfp_flags)
+static struct nfs4_ff_layout_mirror *ff_layout_alloc_mirror(u32 dss_count,
+ gfp_t gfp_flags)
{
struct nfs4_ff_layout_mirror *mirror;
- u32 dss_id;
mirror = kzalloc(sizeof(*mirror), gfp_flags);
- if (mirror != NULL) {
- spin_lock_init(&mirror->lock);
- refcount_set(&mirror->ref, 1);
- INIT_LIST_HEAD(&mirror->mirrors);
- for (dss_id = 0; dss_id < mirror->dss_count; dss_id++)
- nfs_localio_file_init(&mirror->dss[dss_id].nfl);
+ if (mirror == NULL)
+ return NULL;
+
+ spin_lock_init(&mirror->lock);
+ refcount_set(&mirror->ref, 1);
+ INIT_LIST_HEAD(&mirror->mirrors);
+
+ mirror->dss_count = dss_count;
+ mirror->dss =
+ kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
+ gfp_flags);
+ if (mirror->dss == NULL) {
+ kfree(mirror);
+ return NULL;
}
+
+ for (u32 dss_id = 0; dss_id < mirror->dss_count; dss_id++)
+ nfs_localio_file_init(&mirror->dss[dss_id].nfl);
+
return mirror;
}
@@ -507,17 +519,12 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
if (dss_count > 1 && stripe_unit == 0)
goto out_err_free;
- fls->mirror_array[i] = ff_layout_alloc_mirror(gfp_flags);
+ fls->mirror_array[i] = ff_layout_alloc_mirror(dss_count, gfp_flags);
if (fls->mirror_array[i] == NULL) {
rc = -ENOMEM;
goto out_err_free;
}
- fls->mirror_array[i]->dss_count = dss_count;
- fls->mirror_array[i]->dss =
- kcalloc(dss_count, sizeof(struct nfs4_ff_layout_ds_stripe),
- gfp_flags);
-
for (dss_id = 0; dss_id < dss_count; dss_id++) {
dss_info = &fls->mirror_array[i]->dss[dss_id];
dss_info->mirror = fls->mirror_array[i];
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 6fddf43d729c..5998d6bd8a4f 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -222,6 +222,7 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
clp->cl_state = 1 << NFS4CLNT_LEASE_EXPIRED;
clp->cl_mvops = nfs_v4_minor_ops[cl_init->minorversion];
clp->cl_mig_gen = 1;
+ clp->cl_last_renewal = jiffies;
#if IS_ENABLED(CONFIG_NFS_V4_1)
init_waitqueue_head(&clp->cl_lock_waitq);
#endif
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f58098417142..411776718494 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3636,6 +3636,7 @@ struct nfs4_closedata {
} lr;
struct nfs_fattr fattr;
unsigned long timestamp;
+ unsigned short retrans;
};
static void nfs4_free_closedata(void *data)
@@ -3664,6 +3665,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
.state = state,
.inode = calldata->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -3711,6 +3713,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
default:
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -5593,9 +5596,11 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
server, task->tk_status, &exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -5709,10 +5714,12 @@ static int nfs4_write_done_cb(struct rpc_task *task,
.inode = hdr->inode,
.state = hdr->args.context->state,
.stateid = &hdr->args.stateid,
+ .retrans = hdr->retrans,
};
task->tk_status = nfs4_async_handle_exception(task,
NFS_SERVER(inode), task->tk_status,
&exception);
+ hdr->retrans = exception.retrans;
if (exception.retry) {
rpc_restart_call_prepare(task);
return -EAGAIN;
@@ -6726,6 +6733,7 @@ struct nfs4_delegreturndata {
struct nfs_fh fh;
nfs4_stateid stateid;
unsigned long timestamp;
+ unsigned short retrans;
struct {
struct nfs4_layoutreturn_args arg;
struct nfs4_layoutreturn_res res;
@@ -6746,6 +6754,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
.inode = data->inode,
.stateid = &data->stateid,
.task_is_privileged = data->args.seq_args.sa_privileged,
+ .retrans = data->retrans,
};
if (!nfs4_sequence_done(task, &data->res.seq_res))
@@ -6817,6 +6826,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
task->tk_status = nfs4_async_handle_exception(task,
data->res.server, task->tk_status,
&exception);
+ data->retrans = exception.retrans;
if (exception.retry)
goto out_restart;
}
@@ -7093,6 +7103,7 @@ struct nfs4_unlockdata {
struct file_lock fl;
struct nfs_server *server;
unsigned long timestamp;
+ unsigned short retrans;
};
static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
@@ -7147,6 +7158,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
struct nfs4_exception exception = {
.inode = calldata->lsp->ls_state->inode,
.stateid = &calldata->arg.stateid,
+ .retrans = calldata->retrans,
};
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
@@ -7180,6 +7192,7 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
task->tk_status = nfs4_async_handle_exception(task,
calldata->server, task->tk_status,
&exception);
+ calldata->retrans = exception.retrans;
if (exception.retry)
rpc_restart_call_prepare(task);
}
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 0fb6905736d5..336c510f3750 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1535,7 +1535,8 @@ static int nfs_writeback_done(struct rpc_task *task,
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE
+ | NFS_INO_REVAL_FORCED);
spin_unlock(&inode->i_lock);
}
return 0;
diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c
index c318cf74e388..0f1a35400cd5 100644
--- a/fs/nfsd/flexfilelayout.c
+++ b/fs/nfsd/flexfilelayout.c
@@ -125,6 +125,13 @@ nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, struct svc_rqst *rqstp,
return 0;
}
+static __be32
+nfsd4_ff_proc_layoutcommit(struct inode *inode, struct svc_rqst *rqstp,
+ struct nfsd4_layoutcommit *lcp)
+{
+ return nfs_ok;
+}
+
const struct nfsd4_layout_ops ff_layout_ops = {
.notify_types =
NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
@@ -133,4 +140,5 @@ const struct nfsd4_layout_ops ff_layout_ops = {
.encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo,
.proc_layoutget = nfsd4_ff_proc_layoutget,
.encode_layoutget = nfsd4_ff_encode_layoutget,
+ .proc_layoutcommit = nfsd4_ff_proc_layoutcommit,
};
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e466cf52d7d7..7f7e6bb23a90 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -988,10 +988,11 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
static void
nfsd4_read_release(union nfsd4_op_u *u)
{
- if (u->read.rd_nf)
+ if (u->read.rd_nf) {
+ trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
+ u->read.rd_offset, u->read.rd_length);
nfsd_file_put(u->read.rd_nf);
- trace_nfsd_read_done(u->read.rd_rqstp, u->read.rd_fhp,
- u->read.rd_offset, u->read.rd_length);
+ }
}
static __be32
@@ -2892,10 +2893,20 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
rqstp->rq_lease_breaker = (void **)&cstate->clp;
- trace_nfsd_compound(rqstp, args->tag, args->taglen, args->opcnt);
+ trace_nfsd_compound(rqstp, args->tag, args->taglen, args->client_opcnt);
while (!status && resp->opcnt < args->opcnt) {
op = &args->ops[resp->opcnt++];
+ if (unlikely(resp->opcnt == NFSD_MAX_OPS_PER_COMPOUND)) {
+ /* If there are still more operations to process,
+ * stop here and report NFS4ERR_RESOURCE. */
+ if (cstate->minorversion == 0 &&
+ args->client_opcnt > resp->opcnt) {
+ op->status = nfserr_resource;
+ goto encode_op;
+ }
+ }
+
/*
* The XDR decode routines may have pre-set op->status;
* for example, if there is a miscellaneous XDR error
@@ -2972,7 +2983,7 @@ encode_op:
status = op->status;
}
- trace_nfsd_compound_status(args->opcnt, resp->opcnt,
+ trace_nfsd_compound_status(args->client_opcnt, resp->opcnt,
status, nfsd4_op_name(op->opnum));
nfsd4_cstate_clear_replay(cstate);
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 81fa7cc6c77b..c1b54322c412 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3902,6 +3902,7 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
ca->headerpadsz = 0;
ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
+ ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c0a3c6a7c8bb..6040a6145dad 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2488,8 +2488,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
return false;
- if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
+ if (xdr_stream_decode_u32(argp->xdr, &argp->client_opcnt) < 0)
return false;
+ argp->opcnt = min_t(u32, argp->client_opcnt,
+ NFSD_MAX_OPS_PER_COMPOUND);
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
argp->ops = vcalloc(argp->opcnt, sizeof(*argp->ops));
@@ -2628,10 +2630,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
__be32 *p;
__be32 pathlen;
int pathlen_offset;
- int strlen, count=0;
char *str, *end, *next;
-
- dprintk("nfsd4_encode_components(%s)\n", components);
+ int count = 0;
pathlen_offset = xdr->buf->len;
p = xdr_reserve_space(xdr, 4);
@@ -2658,9 +2658,8 @@ static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
for (; *end && (*end != sep); end++)
/* find sep or end of string */;
- strlen = end - str;
- if (strlen) {
- if (xdr_stream_encode_opaque(xdr, str, strlen) < 0)
+ if (end > str) {
+ if (xdr_stream_encode_opaque(xdr, str, end - str) < 0)
return nfserr_resource;
count++;
} else
@@ -2939,6 +2938,12 @@ struct nfsd4_fattr_args {
typedef __be32(*nfsd4_enc_attr)(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args);
+static __be32 nfsd4_encode_fattr4__inval(struct xdr_stream *xdr,
+ const struct nfsd4_fattr_args *args)
+{
+ return nfserr_inval;
+}
+
static __be32 nfsd4_encode_fattr4__noop(struct xdr_stream *xdr,
const struct nfsd4_fattr_args *args)
{
@@ -3560,6 +3565,8 @@ static const nfsd4_enc_attr nfsd4_enc_fattr4_encode_ops[] = {
[FATTR4_MODE_UMASK] = nfsd4_encode_fattr4__noop,
[FATTR4_XATTR_SUPPORT] = nfsd4_encode_fattr4_xattr_support,
+ [FATTR4_TIME_DELEG_ACCESS] = nfsd4_encode_fattr4__inval,
+ [FATTR4_TIME_DELEG_MODIFY] = nfsd4_encode_fattr4__inval,
[FATTR4_OPEN_ARGUMENTS] = nfsd4_encode_fattr4_open_arguments,
};
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index ea87b42894dd..f19320018639 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -57,6 +57,9 @@ struct readdir_cd {
__be32 err; /* 0, nfserr, or nfserr_eof */
};
+/* Maximum number of operations per session compound */
+#define NFSD_MAX_OPS_PER_COMPOUND 200
+
struct nfsd_genl_rqstp {
struct sockaddr rq_daddr;
struct sockaddr rq_saddr;
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d4b48602b2b0..ee0570cbdd9e 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -903,6 +903,7 @@ struct nfsd4_compoundargs {
char * tag;
u32 taglen;
u32 minorversion;
+ u32 client_opcnt;
u32 opcnt;
bool splice_ok;
struct nfsd4_op *ops;
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 1161eabf11ee..9cc7eb863643 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -17,6 +17,7 @@
#include "fanotify/fanotify.h"
#include "fdinfo.h"
#include "fsnotify.h"
+#include "../internal.h"
#if defined(CONFIG_PROC_FS)
@@ -46,7 +47,12 @@ static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
size = f->handle_bytes >> 2;
+ if (!super_trylock_shared(inode->i_sb))
+ return;
+
ret = exportfs_encode_fid(inode, (struct fid *)f->f_handle, &size);
+ up_read(&inode->i_sb->s_umount);
+
if ((ret == FILEID_INVALID) || (ret < 0))
return;
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 648dc59bef7f..79b026a36fb6 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -490,7 +490,9 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type);
- VFS_WARN_ON_ONCE(ns->inum != fid->ns_inum);
+
+ if (ns->inum != fid->ns_inum)
+ return NULL;
if (!__ns_ref_get(ns))
return NULL;
diff --git a/fs/ocfs2/move_extents.c b/fs/ocfs2/move_extents.c
index 86f2631e6360..10923bf7c8b8 100644
--- a/fs/ocfs2/move_extents.c
+++ b/fs/ocfs2/move_extents.c
@@ -867,6 +867,11 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
mlog_errno(ret);
goto out;
}
+ /*
+ * Invalidate extent cache after moving/defragging to prevent
+ * stale cached data with outdated extent flags.
+ */
+ ocfs2_extent_map_trunc(inode, cpos);
context->clusters_moved += alloc_size;
next:
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index aac7e34f56c1..604a82acd164 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -178,7 +178,7 @@ static int ovl_copy_fileattr(struct inode *inode, const struct path *old,
err = ovl_real_fileattr_get(old, &oldfa);
if (err) {
/* Ntfs-3g returns -EINVAL for "no fileattr support" */
- if (err == -EOPNOTSUPP || err == -EINVAL)
+ if (err == -ENOTTY || err == -EINVAL)
return 0;
pr_warn("failed to retrieve lower fileattr (%pd2, err=%i)\n",
old->dentry, err);
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index fc52c796061d..7ab2c9daffd0 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -369,11 +369,6 @@ static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
if (!ovl_should_sync(OVL_FS(inode->i_sb)))
ifl &= ~(IOCB_DSYNC | IOCB_SYNC);
- /*
- * Overlayfs doesn't support deferred completions, don't copy
- * this property in case it is set by the issuer.
- */
- ifl &= ~IOCB_DIO_CALLER_COMP;
ret = backing_file_write_iter(realfile, iter, iocb, ifl, &ctx);
out_unlock:
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index aaa4cf579561..e11f310ce092 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -720,7 +720,10 @@ int ovl_real_fileattr_get(const struct path *realpath, struct file_kattr *fa)
if (err)
return err;
- return vfs_fileattr_get(realpath->dentry, fa);
+ err = vfs_fileattr_get(realpath->dentry, fa);
+ if (err == -ENOIOCTLCMD)
+ err = -ENOTTY;
+ return err;
}
int ovl_fileattr_get(struct dentry *dentry, struct file_kattr *fa)
diff --git a/fs/resctrl/monitor.c b/fs/resctrl/monitor.c
index 4076336fbba6..572a9925bd6c 100644
--- a/fs/resctrl/monitor.c
+++ b/fs/resctrl/monitor.c
@@ -1782,15 +1782,13 @@ int resctrl_mon_resource_init(void)
mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
if (r->mon.mbm_cntr_assignable) {
- if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- resctrl_enable_mon_event(QOS_L3_MBM_TOTAL_EVENT_ID);
- if (!resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- resctrl_enable_mon_event(QOS_L3_MBM_LOCAL_EVENT_ID);
- mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
- mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
- (READS_TO_LOCAL_MEM |
- READS_TO_LOCAL_S_MEM |
- NON_TEMP_WRITE_TO_LOCAL_MEM);
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
+ mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
+ if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
+ mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
+ (READS_TO_LOCAL_MEM |
+ READS_TO_LOCAL_S_MEM |
+ NON_TEMP_WRITE_TO_LOCAL_MEM);
r->mon.mbm_assign_on_mkdir = true;
resctrl_file_fflags_init("num_mbm_cntrs",
RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig
index a4c02199fef4..17bd368574e9 100644
--- a/fs/smb/client/Kconfig
+++ b/fs/smb/client/Kconfig
@@ -5,17 +5,16 @@ config CIFS
select NLS
select NLS_UCS2_UTILS
select CRYPTO
- select CRYPTO_MD5
- select CRYPTO_SHA256
- select CRYPTO_SHA512
select CRYPTO_CMAC
- select CRYPTO_HMAC
select CRYPTO_AEAD2
select CRYPTO_CCM
select CRYPTO_GCM
select CRYPTO_ECB
select CRYPTO_AES
select CRYPTO_LIB_ARC4
+ select CRYPTO_LIB_MD5
+ select CRYPTO_LIB_SHA256
+ select CRYPTO_LIB_SHA512
select KEYS
select DNS_RESOLVER
select ASN1
diff --git a/fs/smb/client/cifsacl.c b/fs/smb/client/cifsacl.c
index 63b3b1290bed..ce2ebc213a1d 100644
--- a/fs/smb/client/cifsacl.c
+++ b/fs/smb/client/cifsacl.c
@@ -339,7 +339,6 @@ int
sid_to_id(struct cifs_sb_info *cifs_sb, struct smb_sid *psid,
struct cifs_fattr *fattr, uint sidtype)
{
- int rc = 0;
struct key *sidkey;
char *sidstr;
const struct cred *saved_cred;
@@ -446,12 +445,12 @@ out_revert_creds:
* fails then we just fall back to using the ctx->linux_uid/linux_gid.
*/
got_valid_id:
- rc = 0;
if (sidtype == SIDOWNER)
fattr->cf_uid = fuid;
else
fattr->cf_gid = fgid;
- return rc;
+
+ return 0;
}
int
diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c
index 7b7c8c38fdd0..801824825ecf 100644
--- a/fs/smb/client/cifsencrypt.c
+++ b/fs/smb/client/cifsencrypt.c
@@ -24,14 +24,43 @@
#include <linux/iov_iter.h>
#include <crypto/aead.h>
#include <crypto/arc4.h>
+#include <crypto/md5.h>
+#include <crypto/sha2.h>
-static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len,
- void *priv, void *priv2)
+static int cifs_sig_update(struct cifs_calc_sig_ctx *ctx,
+ const u8 *data, size_t len)
{
- struct shash_desc *shash = priv;
+ if (ctx->md5) {
+ md5_update(ctx->md5, data, len);
+ return 0;
+ }
+ if (ctx->hmac) {
+ hmac_sha256_update(ctx->hmac, data, len);
+ return 0;
+ }
+ return crypto_shash_update(ctx->shash, data, len);
+}
+
+static int cifs_sig_final(struct cifs_calc_sig_ctx *ctx, u8 *out)
+{
+ if (ctx->md5) {
+ md5_final(ctx->md5, out);
+ return 0;
+ }
+ if (ctx->hmac) {
+ hmac_sha256_final(ctx->hmac, out);
+ return 0;
+ }
+ return crypto_shash_final(ctx->shash, out);
+}
+
+static size_t cifs_sig_step(void *iter_base, size_t progress, size_t len,
+ void *priv, void *priv2)
+{
+ struct cifs_calc_sig_ctx *ctx = priv;
int ret, *pret = priv2;
- ret = crypto_shash_update(shash, iter_base, len);
+ ret = cifs_sig_update(ctx, iter_base, len);
if (ret < 0) {
*pret = ret;
return len;
@@ -42,21 +71,20 @@ static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len,
/*
* Pass the data from an iterator into a hash.
*/
-static int cifs_shash_iter(const struct iov_iter *iter, size_t maxsize,
- struct shash_desc *shash)
+static int cifs_sig_iter(const struct iov_iter *iter, size_t maxsize,
+ struct cifs_calc_sig_ctx *ctx)
{
struct iov_iter tmp_iter = *iter;
int err = -EIO;
- if (iterate_and_advance_kernel(&tmp_iter, maxsize, shash, &err,
- cifs_shash_step) != maxsize)
+ if (iterate_and_advance_kernel(&tmp_iter, maxsize, ctx, &err,
+ cifs_sig_step) != maxsize)
return err;
return 0;
}
-int __cifs_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server, char *signature,
- struct shash_desc *shash)
+int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ char *signature, struct cifs_calc_sig_ctx *ctx)
{
int i;
ssize_t rc;
@@ -82,8 +110,7 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
return -EIO;
}
- rc = crypto_shash_update(shash,
- iov[i].iov_base, iov[i].iov_len);
+ rc = cifs_sig_update(ctx, iov[i].iov_base, iov[i].iov_len);
if (rc) {
cifs_dbg(VFS, "%s: Could not update with payload\n",
__func__);
@@ -91,11 +118,11 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
}
}
- rc = cifs_shash_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), shash);
+ rc = cifs_sig_iter(&rqst->rq_iter, iov_iter_count(&rqst->rq_iter), ctx);
if (rc < 0)
return rc;
- rc = crypto_shash_final(shash, signature);
+ rc = cifs_sig_final(ctx, signature);
if (rc)
cifs_dbg(VFS, "%s: Could not generate hash\n", __func__);
@@ -112,29 +139,22 @@ int __cifs_calc_signature(struct smb_rqst *rqst,
static int cifs_calc_signature(struct smb_rqst *rqst,
struct TCP_Server_Info *server, char *signature)
{
- int rc;
+ struct md5_ctx ctx;
if (!rqst->rq_iov || !signature || !server)
return -EINVAL;
-
- rc = cifs_alloc_hash("md5", &server->secmech.md5);
- if (rc)
- return -1;
-
- rc = crypto_shash_init(server->secmech.md5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init md5\n", __func__);
- return rc;
+ if (fips_enabled) {
+ cifs_dbg(VFS,
+ "MD5 signature support is disabled due to FIPS\n");
+ return -EOPNOTSUPP;
}
- rc = crypto_shash_update(server->secmech.md5,
- server->session_key.response, server->session_key.len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
- return rc;
- }
+ md5_init(&ctx);
+ md5_update(&ctx, server->session_key.response, server->session_key.len);
- return __cifs_calc_signature(rqst, server, signature, server->secmech.md5);
+ return __cifs_calc_signature(
+ rqst, server, signature,
+ &(struct cifs_calc_sig_ctx){ .md5 = &ctx });
}
/* must be called with server->srv_mutex held */
@@ -405,11 +425,11 @@ static __le64 find_timestamp(struct cifs_ses *ses)
}
static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
- const struct nls_table *nls_cp, struct shash_desc *hmacmd5)
+ const struct nls_table *nls_cp)
{
- int rc = 0;
int len;
char nt_hash[CIFS_NTHASH_SIZE];
+ struct hmac_md5_ctx hmac_ctx;
__le16 *user;
wchar_t *domain;
wchar_t *server;
@@ -417,17 +437,7 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
/* calculate md4 hash of password */
E_md4hash(ses->password, nt_hash, nls_cp);
- rc = crypto_shash_setkey(hmacmd5->tfm, nt_hash, CIFS_NTHASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NT hash as a key, rc=%d\n", __func__, rc);
- return rc;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- return rc;
- }
+ hmac_md5_init_usingrawkey(&hmac_ctx, nt_hash, CIFS_NTHASH_SIZE);
/* convert ses->user_name to unicode */
len = ses->user_name ? strlen(ses->user_name) : 0;
@@ -442,12 +452,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
*(u16 *)user = 0;
}
- rc = crypto_shash_update(hmacmd5, (char *)user, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)user, 2 * len);
kfree(user);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with user, rc=%d\n", __func__, rc);
- return rc;
- }
/* convert ses->domainName to unicode and uppercase */
if (ses->domainName) {
@@ -459,12 +465,8 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
len = cifs_strtoUTF16((__le16 *)domain, ses->domainName, len,
nls_cp);
- rc = crypto_shash_update(hmacmd5, (char *)domain, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)domain, 2 * len);
kfree(domain);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with domain, rc=%d\n", __func__, rc);
- return rc;
- }
} else {
/* We use ses->ip_addr if no domain name available */
len = strlen(ses->ip_addr);
@@ -474,25 +476,16 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
return -ENOMEM;
len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len, nls_cp);
- rc = crypto_shash_update(hmacmd5, (char *)server, 2 * len);
+ hmac_md5_update(&hmac_ctx, (const u8 *)server, 2 * len);
kfree(server);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with server, rc=%d\n", __func__, rc);
- return rc;
- }
}
- rc = crypto_shash_final(hmacmd5, ntlmv2_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
-
- return rc;
+ hmac_md5_final(&hmac_ctx, ntlmv2_hash);
+ return 0;
}
-static int
-CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_desc *hmacmd5)
+static void CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash)
{
- int rc;
struct ntlmv2_resp *ntlmv2 = (struct ntlmv2_resp *)
(ses->auth_key.response + CIFS_SESS_KEY_SIZE);
unsigned int hash_len;
@@ -501,35 +494,15 @@ CalcNTLMv2_response(const struct cifs_ses *ses, char *ntlmv2_hash, struct shash_
hash_len = ses->auth_key.len - (CIFS_SESS_KEY_SIZE +
offsetof(struct ntlmv2_resp, challenge.key[0]));
- rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
- return rc;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- return rc;
- }
-
if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED)
memcpy(ntlmv2->challenge.key, ses->ntlmssp->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
else
memcpy(ntlmv2->challenge.key, ses->server->cryptkey, CIFS_SERVER_CHALLENGE_SIZE);
- rc = crypto_shash_update(hmacmd5, ntlmv2->challenge.key, hash_len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
- return rc;
- }
-
- /* Note that the MD5 digest over writes anon.challenge_key.key */
- rc = crypto_shash_final(hmacmd5, ntlmv2->ntlmv2_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
-
- return rc;
+ /* Note that the HMAC-MD5 value overwrites ntlmv2->challenge.key */
+ hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ntlmv2->challenge.key, hash_len,
+ ntlmv2->ntlmv2_hash);
}
/*
@@ -586,7 +559,6 @@ out:
int
setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
{
- struct shash_desc *hmacmd5 = NULL;
unsigned char *tiblob = NULL; /* target info blob */
struct ntlmv2_resp *ntlmv2;
char ntlmv2_hash[16];
@@ -657,51 +629,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
ntlmv2->client_chal = cc;
ntlmv2->reserved2 = 0;
- rc = cifs_alloc_hash("hmac(md5)", &hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "Could not allocate HMAC-MD5, rc=%d\n", rc);
+ if (fips_enabled) {
+ cifs_dbg(VFS, "NTLMv2 support is disabled due to FIPS\n");
+ rc = -EOPNOTSUPP;
goto unlock;
}
/* calculate ntlmv2_hash */
- rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp, hmacmd5);
+ rc = calc_ntlmv2_hash(ses, ntlmv2_hash, nls_cp);
if (rc) {
cifs_dbg(VFS, "Could not get NTLMv2 hash, rc=%d\n", rc);
goto unlock;
}
/* calculate first part of the client response (CR1) */
- rc = CalcNTLMv2_response(ses, ntlmv2_hash, hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "Could not calculate CR1, rc=%d\n", rc);
- goto unlock;
- }
+ CalcNTLMv2_response(ses, ntlmv2_hash);
/* now calculate the session key for NTLMv2 */
- rc = crypto_shash_setkey(hmacmd5->tfm, ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not set NTLMv2 hash as a key, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_init(hmacmd5);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init HMAC-MD5, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_update(hmacmd5, ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update with response, rc=%d\n", __func__, rc);
- goto unlock;
- }
-
- rc = crypto_shash_final(hmacmd5, ses->auth_key.response);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate MD5 hash, rc=%d\n", __func__, rc);
+ hmac_md5_usingrawkey(ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ntlmv2->ntlmv2_hash, CIFS_HMAC_MD5_HASH_SIZE,
+ ses->auth_key.response);
+ rc = 0;
unlock:
cifs_server_unlock(ses->server);
- cifs_free_hash(&hmacmd5);
setup_ntlmv2_rsp_ret:
kfree_sensitive(tiblob);
@@ -743,9 +693,6 @@ void
cifs_crypto_secmech_release(struct TCP_Server_Info *server)
{
cifs_free_hash(&server->secmech.aes_cmac);
- cifs_free_hash(&server->secmech.hmacsha256);
- cifs_free_hash(&server->secmech.md5);
- cifs_free_hash(&server->secmech.sha512);
if (server->secmech.enc) {
crypto_free_aead(server->secmech.enc);
diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c
index 05b1fa76e8cc..185ac41bd7e9 100644
--- a/fs/smb/client/cifsfs.c
+++ b/fs/smb/client/cifsfs.c
@@ -173,7 +173,7 @@ module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
module_param(enable_gcm_256, bool, 0644);
-MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0");
+MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/1");
module_param(require_gcm_256, bool, 0644);
MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0");
@@ -2139,13 +2139,9 @@ MODULE_DESCRIPTION
"also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("ecb");
-MODULE_SOFTDEP("hmac");
-MODULE_SOFTDEP("md5");
MODULE_SOFTDEP("nls");
MODULE_SOFTDEP("aes");
MODULE_SOFTDEP("cmac");
-MODULE_SOFTDEP("sha256");
-MODULE_SOFTDEP("sha512");
MODULE_SOFTDEP("aead2");
MODULE_SOFTDEP("ccm");
MODULE_SOFTDEP("gcm");
diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h
index 8f6f567d7474..203e2aaa3c25 100644
--- a/fs/smb/client/cifsglob.h
+++ b/fs/smb/client/cifsglob.h
@@ -24,6 +24,7 @@
#include "cifsacl.h"
#include <crypto/internal/hash.h>
#include <uapi/linux/cifs/cifs_mount.h>
+#include "../common/cifsglob.h"
#include "../common/smb2pdu.h"
#include "smb2pdu.h"
#include <linux/filelock.h>
@@ -221,9 +222,6 @@ struct session_key {
/* crypto hashing related structure/fields, not specific to a sec mech */
struct cifs_secmech {
- struct shash_desc *md5; /* md5 hash function, for CIFS/SMB1 signatures */
- struct shash_desc *hmacsha256; /* hmac-sha256 hash function, for SMB2 signatures */
- struct shash_desc *sha512; /* sha512 hash function, for SMB3.1.1 preauth hash */
struct shash_desc *aes_cmac; /* block-cipher based MAC function, for SMB3 signatures */
struct crypto_aead *enc; /* smb3 encryption AEAD TFM (AES-CCM and AES-GCM) */
@@ -536,8 +534,6 @@ struct smb_version_operations {
void (*new_lease_key)(struct cifs_fid *);
int (*generate_signingkey)(struct cifs_ses *ses,
struct TCP_Server_Info *server);
- int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *,
- bool allocate_crypto);
int (*set_integrity)(const unsigned int, struct cifs_tcon *tcon,
struct cifsFileInfo *src_file);
int (*enum_snapshots)(const unsigned int xid, struct cifs_tcon *tcon,
@@ -702,12 +698,6 @@ get_rfc1002_length(void *buf)
return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
}
-static inline void
-inc_rfc1001_len(void *buf, int count)
-{
- be32_add_cpu((__be32 *)buf, count);
-}
-
struct TCP_Server_Info {
struct list_head tcp_ses_list;
struct list_head smb_ses_list;
@@ -740,7 +730,7 @@ struct TCP_Server_Info {
bool nosharesock;
bool tcp_nodelay;
bool terminate;
- unsigned int credits; /* send no more requests at once */
+ int credits; /* send no more requests at once */
unsigned int max_credits; /* can override large 32000 default at mnt */
unsigned int in_flight; /* number of requests on the wire to server */
unsigned int max_in_flight; /* max number of requests that were on wire */
@@ -1021,8 +1011,6 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
#define CIFS_MAX_RFC1002_WSIZE ((1<<17) - 1 - sizeof(WRITE_REQ) + 4)
#define CIFS_MAX_RFC1002_RSIZE ((1<<17) - 1 - sizeof(READ_RSP) + 4)
-#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
-
/*
* Windows only supports a max of 60kb reads and 65535 byte writes. Default to
* those values when posix extensions aren't in force. In actuality here, we
@@ -2148,30 +2136,20 @@ extern mempool_t cifs_io_request_pool;
extern mempool_t cifs_io_subrequest_pool;
/* Operations for different SMB versions */
-#define SMB1_VERSION_STRING "1.0"
-#define SMB20_VERSION_STRING "2.0"
#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
extern struct smb_version_operations smb1_operations;
extern struct smb_version_values smb1_values;
extern struct smb_version_operations smb20_operations;
extern struct smb_version_values smb20_values;
#endif /* CIFS_ALLOW_INSECURE_LEGACY */
-#define SMB21_VERSION_STRING "2.1"
extern struct smb_version_operations smb21_operations;
extern struct smb_version_values smb21_values;
-#define SMBDEFAULT_VERSION_STRING "default"
extern struct smb_version_values smbdefault_values;
-#define SMB3ANY_VERSION_STRING "3"
extern struct smb_version_values smb3any_values;
-#define SMB30_VERSION_STRING "3.0"
extern struct smb_version_operations smb30_operations;
extern struct smb_version_values smb30_values;
-#define SMB302_VERSION_STRING "3.02"
-#define ALT_SMB302_VERSION_STRING "3.0.2"
/*extern struct smb_version_operations smb302_operations;*/ /* not needed yet */
extern struct smb_version_values smb302_values;
-#define SMB311_VERSION_STRING "3.1.1"
-#define ALT_SMB311_VERSION_STRING "3.11"
extern struct smb_version_operations smb311_operations;
extern struct smb_version_values smb311_values;
diff --git a/fs/smb/client/cifsproto.h b/fs/smb/client/cifsproto.h
index e8fba98690ce..3528c365a452 100644
--- a/fs/smb/client/cifsproto.h
+++ b/fs/smb/client/cifsproto.h
@@ -9,6 +9,7 @@
#define _CIFSPROTO_H
#include <linux/nls.h>
#include <linux/ctype.h>
+#include "cifsglob.h"
#include "trace.h"
#ifdef CONFIG_CIFS_DFS_UPCALL
#include "dfs_cache.h"
@@ -615,6 +616,8 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16,
extern struct TCP_Server_Info *
cifs_find_tcp_session(struct smb3_fs_context *ctx);
+struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal);
+
void __cifs_put_smb_ses(struct cifs_ses *ses);
extern struct cifs_ses *
@@ -632,9 +635,13 @@ int cifs_create_mf_symlink(unsigned int xid, struct cifs_tcon *tcon,
struct cifs_sb_info *cifs_sb,
const unsigned char *path, char *pbuf,
unsigned int *pbytes_written);
-int __cifs_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server, char *signature,
- struct shash_desc *shash);
+struct cifs_calc_sig_ctx {
+ struct md5_ctx *md5;
+ struct hmac_sha256_ctx *hmac;
+ struct shash_desc *shash;
+};
+int __cifs_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
+ char *signature, struct cifs_calc_sig_ctx *ctx);
enum securityEnum cifs_select_sectype(struct TCP_Server_Info *,
enum securityEnum);
diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c
index 2881efcbe09a..7da194f29fef 100644
--- a/fs/smb/client/cifssmb.c
+++ b/fs/smb/client/cifssmb.c
@@ -1311,6 +1311,8 @@ cifs_readv_callback(struct mid_q_entry *mid)
.rreq_debug_id = rdata->rreq->debug_id,
.rreq_debug_index = rdata->subreq.debug_index,
};
+ unsigned int rreq_debug_id = rdata->rreq->debug_id;
+ unsigned int subreq_debug_index = rdata->subreq.debug_index;
cifs_dbg(FYI, "%s: mid=%llu state=%d result=%d bytes=%zu\n",
__func__, mid->mid, mid->mid_state, rdata->result,
@@ -1374,6 +1376,9 @@ do_retry:
__set_bit(NETFS_SREQ_MADE_PROGRESS, &rdata->subreq.flags);
}
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, rdata->credits.value,
+ server->credits, server->in_flight,
+ 0, cifs_trace_rw_credits_read_response_clear);
rdata->credits.value = 0;
rdata->subreq.error = rdata->result;
rdata->subreq.transferred += rdata->got_bytes;
@@ -1381,6 +1386,9 @@ do_retry:
netfs_read_subreq_terminated(&rdata->subreq);
release_mid(mid);
add_credits(server, &credits, 0);
+ trace_smb3_rw_credits(rreq_debug_id, subreq_debug_index, 0,
+ server->credits, server->in_flight,
+ credits.value, cifs_trace_rw_credits_read_response_add);
}
/* cifs_async_readv - send an async write, and set up mid to handle result */
diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
index dd12f3eb61dc..55cb4b0cbd48 100644
--- a/fs/smb/client/connect.c
+++ b/fs/smb/client/connect.c
@@ -310,6 +310,8 @@ cifs_abort_connection(struct TCP_Server_Info *server)
server->ssocket->flags);
sock_release(server->ssocket);
server->ssocket = NULL;
+ } else if (cifs_rdma_enabled(server)) {
+ smbd_destroy(server);
}
server->sequence_number = 0;
server->session_estab = false;
@@ -338,12 +340,6 @@ cifs_abort_connection(struct TCP_Server_Info *server)
mid_execute_callback(mid);
release_mid(mid);
}
-
- if (cifs_rdma_enabled(server)) {
- cifs_server_lock(server);
- smbd_destroy(server);
- cifs_server_unlock(server);
- }
}
static bool cifs_tcp_ses_needs_reconnect(struct TCP_Server_Info *server, int num_targets)
@@ -2015,39 +2011,31 @@ static int match_session(struct cifs_ses *ses,
/**
* cifs_setup_ipc - helper to setup the IPC tcon for the session
* @ses: smb session to issue the request on
- * @ctx: the superblock configuration context to use for building the
- * new tree connection for the IPC (interprocess communication RPC)
+ * @seal: if encryption is requested
*
* A new IPC connection is made and stored in the session
* tcon_ipc. The IPC tcon has the same lifetime as the session.
*/
-static int
-cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
+struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal)
{
int rc = 0, xid;
struct cifs_tcon *tcon;
char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
- bool seal = false;
struct TCP_Server_Info *server = ses->server;
/*
* If the mount request that resulted in the creation of the
* session requires encryption, force IPC to be encrypted too.
*/
- if (ctx->seal) {
- if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)
- seal = true;
- else {
- cifs_server_dbg(VFS,
- "IPC: server doesn't support encryption\n");
- return -EOPNOTSUPP;
- }
+ if (seal && !(server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) {
+ cifs_server_dbg(VFS, "IPC: server doesn't support encryption\n");
+ return ERR_PTR(-EOPNOTSUPP);
}
/* no need to setup directory caching on IPC share, so pass in false */
tcon = tcon_info_alloc(false, netfs_trace_tcon_ref_new_ipc);
if (tcon == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
spin_lock(&server->srv_lock);
scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname);
@@ -2057,13 +2045,13 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
tcon->ses = ses;
tcon->ipc = true;
tcon->seal = seal;
- rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls);
+ rc = server->ops->tree_connect(xid, ses, unc, tcon, ses->local_nls);
free_xid(xid);
if (rc) {
- cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc);
+ cifs_server_dbg(VFS | ONCE, "failed to connect to IPC (rc=%d)\n", rc);
tconInfoFree(tcon, netfs_trace_tcon_ref_free_ipc_fail);
- goto out;
+ return ERR_PTR(rc);
}
cifs_dbg(FYI, "IPC tcon rc=%d ipc tid=0x%x\n", rc, tcon->tid);
@@ -2071,9 +2059,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
spin_lock(&tcon->tc_lock);
tcon->status = TID_GOOD;
spin_unlock(&tcon->tc_lock);
- ses->tcon_ipc = tcon;
-out:
- return rc;
+ return tcon;
}
static struct cifs_ses *
@@ -2347,6 +2333,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
{
struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
+ struct cifs_tcon *ipc;
struct cifs_ses *ses;
unsigned int xid;
int retries = 0;
@@ -2525,7 +2512,12 @@ retry_new_session:
list_add(&ses->smb_ses_list, &server->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- cifs_setup_ipc(ses, ctx);
+ ipc = cifs_setup_ipc(ses, ctx->seal);
+ spin_lock(&cifs_tcp_ses_lock);
+ spin_lock(&ses->ses_lock);
+ ses->tcon_ipc = !IS_ERR(ipc) ? ipc : NULL;
+ spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
free_xid(xid);
diff --git a/fs/smb/client/dfs_cache.c b/fs/smb/client/dfs_cache.c
index 4dada26d56b5..f2ad0ccd08a7 100644
--- a/fs/smb/client/dfs_cache.c
+++ b/fs/smb/client/dfs_cache.c
@@ -1120,24 +1120,63 @@ static bool target_share_equal(struct cifs_tcon *tcon, const char *s1)
return match;
}
-static bool is_ses_good(struct cifs_ses *ses)
+static bool is_ses_good(struct cifs_tcon *tcon, struct cifs_ses *ses)
{
struct TCP_Server_Info *server = ses->server;
- struct cifs_tcon *tcon = ses->tcon_ipc;
+ struct cifs_tcon *ipc = NULL;
bool ret;
+ spin_lock(&cifs_tcp_ses_lock);
spin_lock(&ses->ses_lock);
spin_lock(&ses->chan_lock);
+
ret = !cifs_chan_needs_reconnect(ses, server) &&
- ses->ses_status == SES_GOOD &&
- !tcon->need_reconnect;
+ ses->ses_status == SES_GOOD;
+
spin_unlock(&ses->chan_lock);
+
+ if (!ret)
+ goto out;
+
+ if (likely(ses->tcon_ipc)) {
+ if (ses->tcon_ipc->need_reconnect) {
+ ret = false;
+ goto out;
+ }
+ } else {
+ spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+
+ ipc = cifs_setup_ipc(ses, tcon->seal);
+
+ spin_lock(&cifs_tcp_ses_lock);
+ spin_lock(&ses->ses_lock);
+ if (!IS_ERR(ipc)) {
+ if (!ses->tcon_ipc) {
+ ses->tcon_ipc = ipc;
+ ipc = NULL;
+ }
+ } else {
+ ret = false;
+ ipc = NULL;
+ }
+ }
+
+out:
spin_unlock(&ses->ses_lock);
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (ipc && server->ops->tree_disconnect) {
+ unsigned int xid = get_xid();
+
+ (void)server->ops->tree_disconnect(xid, ipc);
+ _free_xid(xid);
+ }
+ tconInfoFree(ipc, netfs_trace_tcon_ref_free_ipc);
return ret;
}
/* Refresh dfs referral of @ses */
-static void refresh_ses_referral(struct cifs_ses *ses)
+static void refresh_ses_referral(struct cifs_tcon *tcon, struct cifs_ses *ses)
{
struct cache_entry *ce;
unsigned int xid;
@@ -1153,7 +1192,7 @@ static void refresh_ses_referral(struct cifs_ses *ses)
}
ses = CIFS_DFS_ROOT_SES(ses);
- if (!is_ses_good(ses)) {
+ if (!is_ses_good(tcon, ses)) {
cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n",
__func__);
goto out;
@@ -1241,7 +1280,7 @@ static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh)
up_read(&htable_rw_lock);
ses = CIFS_DFS_ROOT_SES(ses);
- if (!is_ses_good(ses)) {
+ if (!is_ses_good(tcon, ses)) {
cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n",
__func__);
goto out;
@@ -1309,7 +1348,7 @@ void dfs_cache_refresh(struct work_struct *work)
tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work);
list_for_each_entry(ses, &tcon->dfs_ses_list, dlist)
- refresh_ses_referral(ses);
+ refresh_ses_referral(tcon, ses);
refresh_tcon_referral(tcon, false);
queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work,
diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c
index 239dd84a336f..cac355364e43 100644
--- a/fs/smb/client/inode.c
+++ b/fs/smb/client/inode.c
@@ -2431,8 +2431,10 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
tcon = tlink_tcon(tlink);
server = tcon->ses->server;
- if (!server->ops->rename)
- return -ENOSYS;
+ if (!server->ops->rename) {
+ rc = -ENOSYS;
+ goto do_rename_exit;
+ }
/* try path-based rename first */
rc = server->ops->rename(xid, tcon, from_dentry,
@@ -2482,11 +2484,8 @@ cifs_do_rename(const unsigned int xid, struct dentry *from_dentry,
}
#endif /* CONFIG_CIFS_ALLOW_INSECURE_LEGACY */
do_rename_exit:
- if (rc == 0) {
+ if (rc == 0)
d_move(from_dentry, to_dentry);
- /* Force a new lookup */
- d_drop(from_dentry);
- }
cifs_put_tlink(tlink);
return rc;
}
diff --git a/fs/smb/client/link.c b/fs/smb/client/link.c
index fe80e711cd75..70f3c0c67eeb 100644
--- a/fs/smb/client/link.c
+++ b/fs/smb/client/link.c
@@ -5,6 +5,7 @@
* Author(s): Steve French (sfrench@us.ibm.com)
*
*/
+#include <crypto/md5.h>
#include <linux/fs.h>
#include <linux/stat.h>
#include <linux/slab.h>
@@ -37,23 +38,6 @@
#define CIFS_MF_SYMLINK_MD5_ARGS(md5_hash) md5_hash
static int
-symlink_hash(unsigned int link_len, const char *link_str, u8 *md5_hash)
-{
- int rc;
- struct shash_desc *md5 = NULL;
-
- rc = cifs_alloc_hash("md5", &md5);
- if (rc)
- return rc;
-
- rc = crypto_shash_digest(md5, link_str, link_len, md5_hash);
- if (rc)
- cifs_dbg(VFS, "%s: Could not generate md5 hash\n", __func__);
- cifs_free_hash(&md5);
- return rc;
-}
-
-static int
parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
char **_link_str)
{
@@ -77,11 +61,7 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
return -EINVAL;
- rc = symlink_hash(link_len, link_str, md5_hash);
- if (rc) {
- cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
- return rc;
- }
+ md5(link_str, link_len, md5_hash);
scnprintf(md5_str2, sizeof(md5_str2),
CIFS_MF_SYMLINK_MD5_FORMAT,
@@ -103,7 +83,6 @@ parse_mf_symlink(const u8 *buf, unsigned int buf_len, unsigned int *_link_len,
static int
format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str)
{
- int rc;
unsigned int link_len;
unsigned int ofs;
u8 md5_hash[16];
@@ -116,11 +95,7 @@ format_mf_symlink(u8 *buf, unsigned int buf_len, const char *link_str)
if (link_len > CIFS_MF_SYMLINK_LINK_MAXLEN)
return -ENAMETOOLONG;
- rc = symlink_hash(link_len, link_str, md5_hash);
- if (rc) {
- cifs_dbg(FYI, "%s: MD5 hash failure: %d\n", __func__, rc);
- return rc;
- }
+ md5(link_str, link_len, md5_hash);
scnprintf(buf, buf_len,
CIFS_MF_SYMLINK_LEN_FORMAT CIFS_MF_SYMLINK_MD5_FORMAT,
diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c
index dda6dece802a..e10123d8cd7d 100644
--- a/fs/smb/client/misc.c
+++ b/fs/smb/client/misc.c
@@ -916,6 +916,14 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
char *data_end;
struct dfs_referral_level_3 *ref;
+ if (rsp_size < sizeof(*rsp)) {
+ cifs_dbg(VFS | ONCE,
+ "%s: header is malformed (size is %u, must be %zu)\n",
+ __func__, rsp_size, sizeof(*rsp));
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
*num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals);
if (*num_of_nodes < 1) {
@@ -925,6 +933,15 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
goto parse_DFS_referrals_exit;
}
+ if (sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3) > rsp_size) {
+ cifs_dbg(VFS | ONCE,
+ "%s: malformed buffer (size is %u, must be at least %zu)\n",
+ __func__, rsp_size,
+ sizeof(*rsp) + *num_of_nodes * sizeof(REFERRAL3));
+ rc = -EINVAL;
+ goto parse_DFS_referrals_exit;
+ }
+
ref = (struct dfs_referral_level_3 *) &(rsp->referrals);
if (ref->VersionNumber != cpu_to_le16(3)) {
cifs_dbg(VFS, "Referrals of V%d version are not supported, should be V3\n",
diff --git a/fs/smb/client/sess.c b/fs/smb/client/sess.c
index 0a8c2fcc9ded..ef3b498b0a02 100644
--- a/fs/smb/client/sess.c
+++ b/fs/smb/client/sess.c
@@ -584,7 +584,7 @@ cifs_ses_add_channel(struct cifs_ses *ses,
* to sign packets before we generate the channel signing key
* (we sign with the session key)
*/
- rc = smb311_crypto_shash_allocate(chan->server);
+ rc = smb3_crypto_shash_allocate(chan->server);
if (rc) {
cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
mutex_unlock(&ses->session_mutex);
diff --git a/fs/smb/client/smb2misc.c b/fs/smb/client/smb2misc.c
index 89d933b4a8bc..96bfe4c63ccf 100644
--- a/fs/smb/client/smb2misc.c
+++ b/fs/smb/client/smb2misc.c
@@ -7,6 +7,7 @@
* Pavel Shilovsky (pshilovsky@samba.org) 2012
*
*/
+#include <crypto/sha2.h>
#include <linux/ctype.h>
#include "cifsglob.h"
#include "cifsproto.h"
@@ -888,13 +889,13 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve
* @iov: array containing the SMB request we will send to the server
* @nvec: number of array entries for the iov
*/
-int
+void
smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
struct kvec *iov, int nvec)
{
- int i, rc;
+ int i;
struct smb2_hdr *hdr;
- struct shash_desc *sha512 = NULL;
+ struct sha512_ctx sha_ctx;
hdr = (struct smb2_hdr *)iov[0].iov_base;
/* neg prot are always taken */
@@ -907,52 +908,22 @@ smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server,
* and we can test it. Preauth requires 3.1.1 for now.
*/
if (server->dialect != SMB311_PROT_ID)
- return 0;
+ return;
if (hdr->Command != SMB2_SESSION_SETUP)
- return 0;
+ return;
/* skip last sess setup response */
if ((hdr->Flags & SMB2_FLAGS_SERVER_TO_REDIR)
&& (hdr->Status == NT_STATUS_OK
|| (hdr->Status !=
cpu_to_le32(NT_STATUS_MORE_PROCESSING_REQUIRED))))
- return 0;
+ return;
ok:
- rc = smb311_crypto_shash_allocate(server);
- if (rc)
- return rc;
-
- sha512 = server->secmech.sha512;
- rc = crypto_shash_init(sha512);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not init sha512 shash\n", __func__);
- return rc;
- }
-
- rc = crypto_shash_update(sha512, ses->preauth_sha_hash,
- SMB2_PREAUTH_HASH_SIZE);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n", __func__);
- return rc;
- }
-
- for (i = 0; i < nvec; i++) {
- rc = crypto_shash_update(sha512, iov[i].iov_base, iov[i].iov_len);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not update sha512 shash\n",
- __func__);
- return rc;
- }
- }
-
- rc = crypto_shash_final(sha512, ses->preauth_sha_hash);
- if (rc) {
- cifs_dbg(VFS, "%s: Could not finalize sha512 shash\n",
- __func__);
- return rc;
- }
-
- return 0;
+ sha512_init(&sha_ctx);
+ sha512_update(&sha_ctx, ses->preauth_sha_hash, SMB2_PREAUTH_HASH_SIZE);
+ for (i = 0; i < nvec; i++)
+ sha512_update(&sha_ctx, iov[i].iov_base, iov[i].iov_len);
+ sha512_final(&sha_ctx, ses->preauth_sha_hash);
}
diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c
index 7c392cf5940b..1e39f2165e42 100644
--- a/fs/smb/client/smb2ops.c
+++ b/fs/smb/client/smb2ops.c
@@ -2799,11 +2799,12 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
struct cifs_fid fid;
int rc;
__le16 *utf16_path;
- struct cached_fid *cfid = NULL;
+ struct cached_fid *cfid;
int retries = 0, cur_sleep = 1;
replay_again:
/* reinitialize for possible replay */
+ cfid = NULL;
flags = CIFS_CP_CREATE_CLOSE_OP;
oplock = SMB2_OPLOCK_LEVEL_NONE;
server = cifs_pick_channel(ses);
@@ -3212,8 +3213,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path) {
rc = -ENOMEM;
- free_xid(xid);
- return ERR_PTR(rc);
+ goto put_tlink;
}
oparms = (struct cifs_open_parms) {
@@ -3245,6 +3245,7 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
}
+put_tlink:
cifs_put_tlink(tlink);
free_xid(xid);
@@ -3285,8 +3286,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen,
utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
if (!utf16_path) {
rc = -ENOMEM;
- free_xid(xid);
- return rc;
+ goto put_tlink;
}
oparms = (struct cifs_open_parms) {
@@ -3307,6 +3307,7 @@ set_smb2_acl(struct smb_ntsd *pnntsd, __u32 acllen,
SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
}
+put_tlink:
cifs_put_tlink(tlink);
free_xid(xid);
return rc;
@@ -5446,7 +5447,6 @@ struct smb_version_operations smb20_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .calc_signature = smb2_calc_signature,
.is_read_op = smb2_is_read_op,
.set_oplock_level = smb2_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
@@ -5550,7 +5550,6 @@ struct smb_version_operations smb21_operations = {
.get_lease_key = smb2_get_lease_key,
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
- .calc_signature = smb2_calc_signature,
.is_read_op = smb21_is_read_op,
.set_oplock_level = smb21_set_oplock_level,
.create_lease_buf = smb2_create_lease_buf,
@@ -5660,7 +5659,6 @@ struct smb_version_operations smb30_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.generate_signingkey = generate_smb30signingkey,
- .calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
.set_oplock_level = smb3_set_oplock_level,
@@ -5777,7 +5775,6 @@ struct smb_version_operations smb311_operations = {
.set_lease_key = smb2_set_lease_key,
.new_lease_key = smb2_new_lease_key,
.generate_signingkey = generate_smb311signingkey,
- .calc_signature = smb3_calc_signature,
.set_integrity = smb3_set_integrity,
.is_read_op = smb21_is_read_op,
.set_oplock_level = smb3_set_oplock_level,
diff --git a/fs/smb/client/smb2proto.h b/fs/smb/client/smb2proto.h
index b3f1398c9f79..5241daaae543 100644
--- a/fs/smb/client/smb2proto.h
+++ b/fs/smb/client/smb2proto.h
@@ -39,12 +39,6 @@ extern struct mid_q_entry *smb2_setup_async_request(
struct TCP_Server_Info *server, struct smb_rqst *rqst);
extern struct cifs_tcon *smb2_find_smb_tcon(struct TCP_Server_Info *server,
__u64 ses_id, __u32 tid);
-extern int smb2_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server,
- bool allocate_crypto);
-extern int smb3_calc_signature(struct smb_rqst *rqst,
- struct TCP_Server_Info *server,
- bool allocate_crypto);
extern void smb2_echo_request(struct work_struct *work);
extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
extern bool smb2_is_valid_oplock_break(char *buffer,
@@ -295,10 +289,10 @@ extern int smb2_validate_and_copy_iov(unsigned int offset,
extern void smb2_copy_fs_info_to_kstatfs(
struct smb2_fs_full_size_info *pfs_inf,
struct kstatfs *kst);
-extern int smb311_crypto_shash_allocate(struct TCP_Server_Info *server);
-extern int smb311_update_preauth_hash(struct cifs_ses *ses,
- struct TCP_Server_Info *server,
- struct kvec *iov, int nvec);
+extern int smb3_crypto_shash_allocate(struct TCP_Server_Info *server);
+extern void smb311_update_preauth_hash(struct cifs_ses *ses,
+ struct TCP_Server_Info *server,
+ struct kvec *iov, int nvec);
extern int smb2_query_info_compound(const unsigned int xid,
struct cifs_tcon *tcon,
const char *path, u32 desired_access,
diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c
index 33f33013b392..6a9b80385b86 100644
--- a/fs/smb/client/smb2transport.c
+++ b/fs/smb/client/smb2transport.c
@@ -19,6 +19,7 @@
#include <linux/mempool.h>
#include <linux/highmem.h>
#include <crypto/aead.h>
+#include <crypto/sha2.h>
#include "cifsglob.h"
#include "cifsproto.h"
#include "smb2proto.h"
@@ -26,53 +27,14 @@
#include "../common/smb2status.h"
#include "smb2glob.h"
-static int
+int
smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
{
struct cifs_secmech *p = &server->secmech;
- int rc;
- rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
- if (rc)
- goto err;
-
- rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
- if (rc)
- goto err;
-
- return 0;
-err:
- cifs_free_hash(&p->hmacsha256);
- return rc;
+ return cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
}
-int
-smb311_crypto_shash_allocate(struct TCP_Server_Info *server)
-{
- struct cifs_secmech *p = &server->secmech;
- int rc = 0;
-
- rc = cifs_alloc_hash("hmac(sha256)", &p->hmacsha256);
- if (rc)
- return rc;
-
- rc = cifs_alloc_hash("cmac(aes)", &p->aes_cmac);
- if (rc)
- goto err;
-
- rc = cifs_alloc_hash("sha512", &p->sha512);
- if (rc)
- goto err;
-
- return 0;
-
-err:
- cifs_free_hash(&p->aes_cmac);
- cifs_free_hash(&p->hmacsha256);
- return rc;
-}
-
-
static
int smb3_get_sign_key(__u64 ses_id, struct TCP_Server_Info *server, u8 *key)
{
@@ -247,16 +209,15 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid)
return tcon;
}
-int
+static int
smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
- bool allocate_crypto)
+ bool allocate_crypto)
{
int rc;
unsigned char smb2_signature[SMB2_HMACSHA256_SIZE];
- unsigned char *sigptr = smb2_signature;
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
- struct shash_desc *shash = NULL;
+ struct hmac_sha256_ctx hmac_ctx;
struct smb_rqst drqst;
__u64 sid = le64_to_cpu(shdr->SessionId);
u8 key[SMB2_NTLMV2_SESSKEY_SIZE];
@@ -271,30 +232,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
memset(shdr->Signature, 0x0, SMB2_SIGNATURE_SIZE);
- if (allocate_crypto) {
- rc = cifs_alloc_hash("hmac(sha256)", &shash);
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: sha256 alloc failed\n", __func__);
- goto out;
- }
- } else {
- shash = server->secmech.hmacsha256;
- }
-
- rc = crypto_shash_setkey(shash->tfm, key, sizeof(key));
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: Could not update with response\n",
- __func__);
- goto out;
- }
-
- rc = crypto_shash_init(shash);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not init sha256", __func__);
- goto out;
- }
+ hmac_sha256_init_usingrawkey(&hmac_ctx, key, sizeof(key));
/*
* For SMB2+, __cifs_calc_signature() expects to sign only the actual
@@ -305,25 +243,17 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
*/
drqst = *rqst;
if (drqst.rq_nvec >= 2 && iov[0].iov_len == 4) {
- rc = crypto_shash_update(shash, iov[0].iov_base,
- iov[0].iov_len);
- if (rc) {
- cifs_server_dbg(VFS,
- "%s: Could not update with payload\n",
- __func__);
- goto out;
- }
+ hmac_sha256_update(&hmac_ctx, iov[0].iov_base, iov[0].iov_len);
drqst.rq_iov++;
drqst.rq_nvec--;
}
- rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
+ rc = __cifs_calc_signature(
+ &drqst, server, smb2_signature,
+ &(struct cifs_calc_sig_ctx){ .hmac = &hmac_ctx });
if (!rc)
- memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, smb2_signature, SMB2_SIGNATURE_SIZE);
-out:
- if (allocate_crypto)
- cifs_free_hash(&shash);
return rc;
}
@@ -336,8 +266,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
__u8 L256[4] = {0, 0, 1, 0};
int rc = 0;
unsigned char prfhash[SMB2_HMACSHA256_SIZE];
- unsigned char *hashptr = prfhash;
struct TCP_Server_Info *server = ses->server;
+ struct hmac_sha256_ctx hmac_ctx;
memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
memset(key, 0x0, key_size);
@@ -345,67 +275,26 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
rc = smb3_crypto_shash_allocate(server);
if (rc) {
cifs_server_dbg(VFS, "%s: crypto alloc failed\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_setkey(server->secmech.hmacsha256->tfm,
- ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not set with session key\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_init(server->secmech.hmacsha256);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, i, 4);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with n\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, label.iov_base, label.iov_len);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with label\n", __func__);
- goto smb3signkey_ret;
+ return rc;
}
- rc = crypto_shash_update(server->secmech.hmacsha256, &zero, 1);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with zero\n", __func__);
- goto smb3signkey_ret;
- }
-
- rc = crypto_shash_update(server->secmech.hmacsha256, context.iov_base, context.iov_len);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with context\n", __func__);
- goto smb3signkey_ret;
- }
+ hmac_sha256_init_usingrawkey(&hmac_ctx, ses->auth_key.response,
+ SMB2_NTLMV2_SESSKEY_SIZE);
+ hmac_sha256_update(&hmac_ctx, i, 4);
+ hmac_sha256_update(&hmac_ctx, label.iov_base, label.iov_len);
+ hmac_sha256_update(&hmac_ctx, &zero, 1);
+ hmac_sha256_update(&hmac_ctx, context.iov_base, context.iov_len);
if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
(server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
- rc = crypto_shash_update(server->secmech.hmacsha256, L256, 4);
+ hmac_sha256_update(&hmac_ctx, L256, 4);
} else {
- rc = crypto_shash_update(server->secmech.hmacsha256, L128, 4);
- }
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__);
- goto smb3signkey_ret;
+ hmac_sha256_update(&hmac_ctx, L128, 4);
}
+ hmac_sha256_final(&hmac_ctx, prfhash);
- rc = crypto_shash_final(server->secmech.hmacsha256, hashptr);
- if (rc) {
- cifs_server_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
- goto smb3signkey_ret;
- }
-
- memcpy(key, hashptr, key_size);
-
-smb3signkey_ret:
- return rc;
+ memcpy(key, prfhash, key_size);
+ return 0;
}
struct derivation {
@@ -576,19 +465,21 @@ generate_smb311signingkey(struct cifs_ses *ses,
return generate_smb3signingkey(ses, server, &triplet);
}
-int
+static int
smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
- bool allocate_crypto)
+ bool allocate_crypto)
{
int rc;
unsigned char smb3_signature[SMB2_CMACAES_SIZE];
- unsigned char *sigptr = smb3_signature;
struct kvec *iov = rqst->rq_iov;
struct smb2_hdr *shdr = (struct smb2_hdr *)iov[0].iov_base;
struct shash_desc *shash = NULL;
struct smb_rqst drqst;
u8 key[SMB3_SIGN_KEY_SIZE];
+ if (server->vals->protocol_id <= SMB21_PROT_ID)
+ return smb2_calc_signature(rqst, server, allocate_crypto);
+
rc = smb3_get_sign_key(le64_to_cpu(shdr->SessionId), server, key);
if (unlikely(rc)) {
cifs_server_dbg(FYI, "%s: Could not get signing key\n", __func__);
@@ -643,9 +534,11 @@ smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server,
drqst.rq_nvec--;
}
- rc = __cifs_calc_signature(&drqst, server, sigptr, shash);
+ rc = __cifs_calc_signature(
+ &drqst, server, smb3_signature,
+ &(struct cifs_calc_sig_ctx){ .shash = shash });
if (!rc)
- memcpy(shdr->Signature, sigptr, SMB2_SIGNATURE_SIZE);
+ memcpy(shdr->Signature, smb3_signature, SMB2_SIGNATURE_SIZE);
out:
if (allocate_crypto)
@@ -657,7 +550,6 @@ out:
static int
smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
{
- int rc = 0;
struct smb2_hdr *shdr;
struct smb2_sess_setup_req *ssr;
bool is_binding;
@@ -684,9 +576,7 @@ smb2_sign_rqst(struct smb_rqst *rqst, struct TCP_Server_Info *server)
return 0;
}
- rc = server->ops->calc_signature(rqst, server, false);
-
- return rc;
+ return smb3_calc_signature(rqst, server, false);
}
int
@@ -722,7 +612,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
memset(shdr->Signature, 0, SMB2_SIGNATURE_SIZE);
- rc = server->ops->calc_signature(rqst, server, true);
+ rc = smb3_calc_signature(rqst, server, true);
if (rc)
return rc;
diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c
index 316f398c70f4..85a4c55b61b8 100644
--- a/fs/smb/client/smbdirect.c
+++ b/fs/smb/client/smbdirect.c
@@ -172,6 +172,7 @@ static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc)
* in order to notice the broken connection.
*/
wake_up_all(&sc->status_wait);
+ wake_up_all(&sc->send_io.lcredits.wait_queue);
wake_up_all(&sc->send_io.credits.wait_queue);
wake_up_all(&sc->send_io.pending.dec_wait_queue);
wake_up_all(&sc->send_io.pending.zero_wait_queue);
@@ -495,6 +496,7 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
struct smbdirect_send_io *request =
container_of(wc->wr_cqe, struct smbdirect_send_io, cqe);
struct smbdirect_socket *sc = request->socket;
+ int lcredits = 0;
log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n",
request, ib_wc_status_msg(wc->status));
@@ -504,22 +506,24 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
request->sge[i].addr,
request->sge[i].length,
DMA_TO_DEVICE);
+ mempool_free(request, sc->send_io.mem.pool);
+ lcredits += 1;
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
if (wc->status != IB_WC_WR_FLUSH_ERR)
log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n",
ib_wc_status_msg(wc->status), wc->opcode);
- mempool_free(request, sc->send_io.mem.pool);
smbd_disconnect_rdma_connection(sc);
return;
}
+ atomic_add(lcredits, &sc->send_io.lcredits.count);
+ wake_up(&sc->send_io.lcredits.wait_queue);
+
if (atomic_dec_and_test(&sc->send_io.pending.count))
wake_up(&sc->send_io.pending.zero_wait_queue);
wake_up(&sc->send_io.pending.dec_wait_queue);
-
- mempool_free(request, sc->send_io.mem.pool);
}
static void dump_smbdirect_negotiate_resp(struct smbdirect_negotiate_resp *resp)
@@ -567,6 +571,7 @@ static bool process_negotiation_response(
log_rdma_event(ERR, "error: credits_granted==0\n");
return false;
}
+ atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target);
atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted));
if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) {
@@ -1114,6 +1119,24 @@ static int smbd_post_send_iter(struct smbdirect_socket *sc,
struct smbdirect_data_transfer *packet;
int new_credits = 0;
+wait_lcredit:
+ /* Wait for local send credits */
+ rc = wait_event_interruptible(sc->send_io.lcredits.wait_queue,
+ atomic_read(&sc->send_io.lcredits.count) > 0 ||
+ sc->status != SMBDIRECT_SOCKET_CONNECTED);
+ if (rc)
+ goto err_wait_lcredit;
+
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+ log_outgoing(ERR, "disconnected not sending on wait_credit\n");
+ rc = -EAGAIN;
+ goto err_wait_lcredit;
+ }
+ if (unlikely(atomic_dec_return(&sc->send_io.lcredits.count) < 0)) {
+ atomic_inc(&sc->send_io.lcredits.count);
+ goto wait_lcredit;
+ }
+
wait_credit:
/* Wait for send credits. A SMBD packet needs one credit */
rc = wait_event_interruptible(sc->send_io.credits.wait_queue,
@@ -1132,23 +1155,6 @@ wait_credit:
goto wait_credit;
}
-wait_send_queue:
- wait_event(sc->send_io.pending.dec_wait_queue,
- atomic_read(&sc->send_io.pending.count) < sp->send_credit_target ||
- sc->status != SMBDIRECT_SOCKET_CONNECTED);
-
- if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
- log_outgoing(ERR, "disconnected not sending on wait_send_queue\n");
- rc = -EAGAIN;
- goto err_wait_send_queue;
- }
-
- if (unlikely(atomic_inc_return(&sc->send_io.pending.count) >
- sp->send_credit_target)) {
- atomic_dec(&sc->send_io.pending.count);
- goto wait_send_queue;
- }
-
request = mempool_alloc(sc->send_io.mem.pool, GFP_KERNEL);
if (!request) {
rc = -ENOMEM;
@@ -1229,10 +1235,21 @@ wait_send_queue:
le32_to_cpu(packet->data_length),
le32_to_cpu(packet->remaining_data_length));
+ /*
+ * Now that we got a local and a remote credit
+ * we add us as pending
+ */
+ atomic_inc(&sc->send_io.pending.count);
+
rc = smbd_post_send(sc, request);
if (!rc)
return 0;
+ if (atomic_dec_and_test(&sc->send_io.pending.count))
+ wake_up(&sc->send_io.pending.zero_wait_queue);
+
+ wake_up(&sc->send_io.pending.dec_wait_queue);
+
err_dma:
for (i = 0; i < request->num_sge; i++)
if (request->sge[i].addr)
@@ -1246,14 +1263,14 @@ err_dma:
atomic_sub(new_credits, &sc->recv_io.credits.count);
err_alloc:
- if (atomic_dec_and_test(&sc->send_io.pending.count))
- wake_up(&sc->send_io.pending.zero_wait_queue);
-
-err_wait_send_queue:
- /* roll back send credits and pending */
atomic_inc(&sc->send_io.credits.count);
+ wake_up(&sc->send_io.credits.wait_queue);
err_wait_credit:
+ atomic_inc(&sc->send_io.lcredits.count);
+ wake_up(&sc->send_io.lcredits.wait_queue);
+
+err_wait_lcredit:
return rc;
}
@@ -1575,12 +1592,12 @@ void smbd_destroy(struct TCP_Server_Info *server)
disable_work_sync(&sc->disconnect_work);
log_rdma_event(INFO, "destroying rdma session\n");
- if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) {
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING)
smbd_disconnect_rdma_work(&sc->disconnect_work);
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED) {
log_rdma_event(INFO, "wait for transport being disconnected\n");
- wait_event_interruptible(
- sc->status_wait,
- sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
+ wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
+ log_rdma_event(INFO, "waited for transport being disconnected\n");
}
/*
@@ -1624,19 +1641,7 @@ void smbd_destroy(struct TCP_Server_Info *server)
log_rdma_event(INFO, "free receive buffers\n");
destroy_receive_buffers(sc);
- /*
- * For performance reasons, memory registration and deregistration
- * are not locked by srv_mutex. It is possible some processes are
- * blocked on transport srv_mutex while holding memory registration.
- * Release the transport srv_mutex to allow them to hit the failure
- * path when sending data, and then release memory registrations.
- */
log_rdma_event(INFO, "freeing mr list\n");
- while (atomic_read(&sc->mr_io.used.count)) {
- cifs_server_unlock(server);
- msleep(1000);
- cifs_server_lock(server);
- }
destroy_mr_list(sc);
ib_free_cq(sc->ib.send_cq);
@@ -1779,6 +1784,7 @@ static struct smbd_connection *_smbd_get_connection(
struct smbdirect_socket *sc;
struct smbdirect_socket_parameters *sp;
struct rdma_conn_param conn_param;
+ struct ib_qp_cap qp_cap;
struct ib_qp_init_attr qp_attr;
struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr;
struct ib_port_immutable port_immutable;
@@ -1850,6 +1856,25 @@ static struct smbd_connection *_smbd_get_connection(
goto config_failed;
}
+ sp->responder_resources =
+ min_t(u8, sp->responder_resources,
+ sc->ib.dev->attrs.max_qp_rd_atom);
+ log_rdma_mr(INFO, "responder_resources=%d\n",
+ sp->responder_resources);
+
+ /*
+ * We use allocate sp->responder_resources * 2 MRs
+ * and each MR needs WRs for REG and INV, so
+ * we use '* 4'.
+ *
+ * +1 for ib_drain_qp()
+ */
+ memset(&qp_cap, 0, sizeof(qp_cap));
+ qp_cap.max_send_wr = sp->send_credit_target + sp->responder_resources * 4 + 1;
+ qp_cap.max_recv_wr = sp->recv_credit_max + 1;
+ qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
+ qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
+
sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0);
if (IS_ERR(sc->ib.pd)) {
rc = PTR_ERR(sc->ib.pd);
@@ -1860,7 +1885,7 @@ static struct smbd_connection *_smbd_get_connection(
sc->ib.send_cq =
ib_alloc_cq_any(sc->ib.dev, sc,
- sp->send_credit_target, IB_POLL_SOFTIRQ);
+ qp_cap.max_send_wr, IB_POLL_SOFTIRQ);
if (IS_ERR(sc->ib.send_cq)) {
sc->ib.send_cq = NULL;
goto alloc_cq_failed;
@@ -1868,7 +1893,7 @@ static struct smbd_connection *_smbd_get_connection(
sc->ib.recv_cq =
ib_alloc_cq_any(sc->ib.dev, sc,
- sp->recv_credit_max, IB_POLL_SOFTIRQ);
+ qp_cap.max_recv_wr, IB_POLL_SOFTIRQ);
if (IS_ERR(sc->ib.recv_cq)) {
sc->ib.recv_cq = NULL;
goto alloc_cq_failed;
@@ -1877,11 +1902,7 @@ static struct smbd_connection *_smbd_get_connection(
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.event_handler = smbd_qp_async_error_upcall;
qp_attr.qp_context = sc;
- qp_attr.cap.max_send_wr = sp->send_credit_target;
- qp_attr.cap.max_recv_wr = sp->recv_credit_max;
- qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
- qp_attr.cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
- qp_attr.cap.max_inline_data = 0;
+ qp_attr.cap = qp_cap;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = sc->ib.send_cq;
@@ -1895,12 +1916,6 @@ static struct smbd_connection *_smbd_get_connection(
}
sc->ib.qp = sc->rdma.cm_id->qp;
- sp->responder_resources =
- min_t(u8, sp->responder_resources,
- sc->ib.dev->attrs.max_qp_rd_atom);
- log_rdma_mr(INFO, "responder_resources=%d\n",
- sp->responder_resources);
-
memset(&conn_param, 0, sizeof(conn_param));
conn_param.initiator_depth = sp->initiator_depth;
conn_param.responder_resources = sp->responder_resources;
@@ -2352,18 +2367,84 @@ static void smbd_mr_recovery_work(struct work_struct *work)
}
}
+static void smbd_mr_disable_locked(struct smbdirect_mr_io *mr)
+{
+ struct smbdirect_socket *sc = mr->socket;
+
+ lockdep_assert_held(&mr->mutex);
+
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ return;
+
+ if (mr->mr)
+ ib_dereg_mr(mr->mr);
+ if (mr->sgt.nents)
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
+ kfree(mr->sgt.sgl);
+
+ mr->mr = NULL;
+ mr->sgt.sgl = NULL;
+ mr->sgt.nents = 0;
+
+ mr->state = SMBDIRECT_MR_DISABLED;
+}
+
+static void smbd_mr_free_locked(struct kref *kref)
+{
+ struct smbdirect_mr_io *mr =
+ container_of(kref, struct smbdirect_mr_io, kref);
+
+ lockdep_assert_held(&mr->mutex);
+
+ /*
+ * smbd_mr_disable_locked() should already be called!
+ */
+ if (WARN_ON_ONCE(mr->state != SMBDIRECT_MR_DISABLED))
+ smbd_mr_disable_locked(mr);
+
+ mutex_unlock(&mr->mutex);
+ mutex_destroy(&mr->mutex);
+ kfree(mr);
+}
+
static void destroy_mr_list(struct smbdirect_socket *sc)
{
struct smbdirect_mr_io *mr, *tmp;
+ LIST_HEAD(all_list);
+ unsigned long flags;
disable_work_sync(&sc->mr_io.recovery_work);
- list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) {
- if (mr->state == SMBDIRECT_MR_INVALIDATED)
- ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl,
- mr->sgt.nents, mr->dir);
- ib_dereg_mr(mr->mr);
- kfree(mr->sgt.sgl);
- kfree(mr);
+
+ spin_lock_irqsave(&sc->mr_io.all.lock, flags);
+ list_splice_tail_init(&sc->mr_io.all.list, &all_list);
+ spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
+
+ list_for_each_entry_safe(mr, tmp, &all_list, list) {
+ mutex_lock(&mr->mutex);
+
+ smbd_mr_disable_locked(mr);
+ list_del(&mr->list);
+ mr->socket = NULL;
+
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ *
+ * If the mr is still registered it will
+ * be dangling (detached from the connection
+ * waiting for smbd_deregister_mr() to be
+ * called in order to free the memory.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
}
@@ -2377,10 +2458,9 @@ static void destroy_mr_list(struct smbdirect_socket *sc)
static int allocate_mr_list(struct smbdirect_socket *sc)
{
struct smbdirect_socket_parameters *sp = &sc->parameters;
- int i;
- struct smbdirect_mr_io *smbdirect_mr, *tmp;
-
- INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work);
+ struct smbdirect_mr_io *mr;
+ int ret;
+ u32 i;
if (sp->responder_resources == 0) {
log_rdma_mr(ERR, "responder_resources negotiated as 0\n");
@@ -2389,42 +2469,52 @@ static int allocate_mr_list(struct smbdirect_socket *sc)
/* Allocate more MRs (2x) than hardware responder_resources */
for (i = 0; i < sp->responder_resources * 2; i++) {
- smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL);
- if (!smbdirect_mr)
- goto cleanup_entries;
- smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type,
- sp->max_frmr_depth);
- if (IS_ERR(smbdirect_mr->mr)) {
+ mr = kzalloc(sizeof(*mr), GFP_KERNEL);
+ if (!mr) {
+ ret = -ENOMEM;
+ goto kzalloc_mr_failed;
+ }
+
+ kref_init(&mr->kref);
+ mutex_init(&mr->mutex);
+
+ mr->mr = ib_alloc_mr(sc->ib.pd,
+ sc->mr_io.type,
+ sp->max_frmr_depth);
+ if (IS_ERR(mr->mr)) {
+ ret = PTR_ERR(mr->mr);
log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n",
sc->mr_io.type, sp->max_frmr_depth);
- goto out;
+ goto ib_alloc_mr_failed;
}
- smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth,
- sizeof(struct scatterlist),
- GFP_KERNEL);
- if (!smbdirect_mr->sgt.sgl) {
+
+ mr->sgt.sgl = kcalloc(sp->max_frmr_depth,
+ sizeof(struct scatterlist),
+ GFP_KERNEL);
+ if (!mr->sgt.sgl) {
+ ret = -ENOMEM;
log_rdma_mr(ERR, "failed to allocate sgl\n");
- ib_dereg_mr(smbdirect_mr->mr);
- goto out;
+ goto kcalloc_sgl_failed;
}
- smbdirect_mr->state = SMBDIRECT_MR_READY;
- smbdirect_mr->socket = sc;
+ mr->state = SMBDIRECT_MR_READY;
+ mr->socket = sc;
- list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list);
+ list_add_tail(&mr->list, &sc->mr_io.all.list);
atomic_inc(&sc->mr_io.ready.count);
}
+
+ INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work);
+
return 0;
-out:
- kfree(smbdirect_mr);
-cleanup_entries:
- list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) {
- list_del(&smbdirect_mr->list);
- ib_dereg_mr(smbdirect_mr->mr);
- kfree(smbdirect_mr->sgt.sgl);
- kfree(smbdirect_mr);
- }
- return -ENOMEM;
+kcalloc_sgl_failed:
+ ib_dereg_mr(mr->mr);
+ib_alloc_mr_failed:
+ mutex_destroy(&mr->mutex);
+ kfree(mr);
+kzalloc_mr_failed:
+ destroy_mr_list(sc);
+ return ret;
}
/*
@@ -2458,6 +2548,7 @@ again:
list_for_each_entry(ret, &sc->mr_io.all.list, list) {
if (ret->state == SMBDIRECT_MR_READY) {
ret->state = SMBDIRECT_MR_REGISTERED;
+ kref_get(&ret->kref);
spin_unlock_irqrestore(&sc->mr_io.all.lock, flags);
atomic_dec(&sc->mr_io.ready.count);
atomic_inc(&sc->mr_io.used.count);
@@ -2504,9 +2595,8 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
{
struct smbdirect_socket *sc = &info->socket;
struct smbdirect_socket_parameters *sp = &sc->parameters;
- struct smbdirect_mr_io *smbdirect_mr;
+ struct smbdirect_mr_io *mr;
int rc, num_pages;
- enum dma_data_direction dir;
struct ib_reg_wr *reg_wr;
num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1);
@@ -2517,49 +2607,47 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
return NULL;
}
- smbdirect_mr = get_mr(sc);
- if (!smbdirect_mr) {
+ mr = get_mr(sc);
+ if (!mr) {
log_rdma_mr(ERR, "get_mr returning NULL\n");
return NULL;
}
- dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
- smbdirect_mr->dir = dir;
- smbdirect_mr->need_invalidate = need_invalidate;
- smbdirect_mr->sgt.nents = 0;
- smbdirect_mr->sgt.orig_nents = 0;
+ mutex_lock(&mr->mutex);
+
+ mr->dir = writing ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+ mr->need_invalidate = need_invalidate;
+ mr->sgt.nents = 0;
+ mr->sgt.orig_nents = 0;
log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n",
num_pages, iov_iter_count(iter), sp->max_frmr_depth);
- smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth);
+ smbd_iter_to_mr(iter, &mr->sgt, sp->max_frmr_depth);
- rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, dir);
+ rc = ib_dma_map_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
if (!rc) {
log_rdma_mr(ERR, "ib_dma_map_sg num_pages=%x dir=%x rc=%x\n",
- num_pages, dir, rc);
+ num_pages, mr->dir, rc);
goto dma_map_error;
}
- rc = ib_map_mr_sg(smbdirect_mr->mr, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, NULL, PAGE_SIZE);
- if (rc != smbdirect_mr->sgt.nents) {
+ rc = ib_map_mr_sg(mr->mr, mr->sgt.sgl, mr->sgt.nents, NULL, PAGE_SIZE);
+ if (rc != mr->sgt.nents) {
log_rdma_mr(ERR,
- "ib_map_mr_sg failed rc = %d nents = %x\n",
- rc, smbdirect_mr->sgt.nents);
+ "ib_map_mr_sg failed rc = %d nents = %x\n",
+ rc, mr->sgt.nents);
goto map_mr_error;
}
- ib_update_fast_reg_key(smbdirect_mr->mr,
- ib_inc_rkey(smbdirect_mr->mr->rkey));
- reg_wr = &smbdirect_mr->wr;
+ ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey));
+ reg_wr = &mr->wr;
reg_wr->wr.opcode = IB_WR_REG_MR;
- smbdirect_mr->cqe.done = register_mr_done;
- reg_wr->wr.wr_cqe = &smbdirect_mr->cqe;
+ mr->cqe.done = register_mr_done;
+ reg_wr->wr.wr_cqe = &mr->cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = IB_SEND_SIGNALED;
- reg_wr->mr = smbdirect_mr->mr;
- reg_wr->key = smbdirect_mr->mr->rkey;
+ reg_wr->mr = mr->mr;
+ reg_wr->key = mr->mr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -2570,24 +2658,51 @@ struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info,
* on the next ib_post_send when we actually send I/O to remote peer
*/
rc = ib_post_send(sc->ib.qp, &reg_wr->wr, NULL);
- if (!rc)
- return smbdirect_mr;
+ if (!rc) {
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we keep that and let
+ * the caller use smbd_deregister_mr()
+ * to remove it again.
+ */
+ mutex_unlock(&mr->mutex);
+ return mr;
+ }
log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n",
rc, reg_wr->key);
/* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/
map_mr_error:
- ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents, smbdirect_mr->dir);
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
dma_map_error:
- smbdirect_mr->state = SMBDIRECT_MR_ERROR;
+ mr->sgt.nents = 0;
+ mr->state = SMBDIRECT_MR_ERROR;
if (atomic_dec_and_test(&sc->mr_io.used.count))
wake_up(&sc->mr_io.cleanup.wait_queue);
smbd_disconnect_rdma_connection(sc);
+ /*
+ * get_mr() gave us a reference
+ * via kref_get(&mr->kref), we need to remove it again
+ * on error.
+ *
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
+
return NULL;
}
@@ -2612,44 +2727,55 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc)
* and we have to locally invalidate the buffer to prevent data is being
* modified by remote peer after upper layer consumes it
*/
-int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr)
+void smbd_deregister_mr(struct smbdirect_mr_io *mr)
{
- struct ib_send_wr *wr;
- struct smbdirect_socket *sc = smbdirect_mr->socket;
- int rc = 0;
+ struct smbdirect_socket *sc = mr->socket;
+
+ mutex_lock(&mr->mutex);
+ if (mr->state == SMBDIRECT_MR_DISABLED)
+ goto put_kref;
+
+ if (sc->status != SMBDIRECT_SOCKET_CONNECTED) {
+ smbd_mr_disable_locked(mr);
+ goto put_kref;
+ }
+
+ if (mr->need_invalidate) {
+ struct ib_send_wr *wr = &mr->inv_wr;
+ int rc;
- if (smbdirect_mr->need_invalidate) {
/* Need to finish local invalidation before returning */
- wr = &smbdirect_mr->inv_wr;
wr->opcode = IB_WR_LOCAL_INV;
- smbdirect_mr->cqe.done = local_inv_done;
- wr->wr_cqe = &smbdirect_mr->cqe;
+ mr->cqe.done = local_inv_done;
+ wr->wr_cqe = &mr->cqe;
wr->num_sge = 0;
- wr->ex.invalidate_rkey = smbdirect_mr->mr->rkey;
+ wr->ex.invalidate_rkey = mr->mr->rkey;
wr->send_flags = IB_SEND_SIGNALED;
- init_completion(&smbdirect_mr->invalidate_done);
+ init_completion(&mr->invalidate_done);
rc = ib_post_send(sc->ib.qp, wr, NULL);
if (rc) {
log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc);
+ smbd_mr_disable_locked(mr);
smbd_disconnect_rdma_connection(sc);
goto done;
}
- wait_for_completion(&smbdirect_mr->invalidate_done);
- smbdirect_mr->need_invalidate = false;
+ wait_for_completion(&mr->invalidate_done);
+ mr->need_invalidate = false;
} else
/*
* For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED
* and defer to mr_recovery_work to recover the MR for next use
*/
- smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED;
+ mr->state = SMBDIRECT_MR_INVALIDATED;
- if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) {
- ib_dma_unmap_sg(
- sc->ib.dev, smbdirect_mr->sgt.sgl,
- smbdirect_mr->sgt.nents,
- smbdirect_mr->dir);
- smbdirect_mr->state = SMBDIRECT_MR_READY;
+ if (mr->sgt.nents) {
+ ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir);
+ mr->sgt.nents = 0;
+ }
+
+ if (mr->state == SMBDIRECT_MR_INVALIDATED) {
+ mr->state = SMBDIRECT_MR_READY;
if (atomic_inc_return(&sc->mr_io.ready.count) == 1)
wake_up(&sc->mr_io.ready.wait_queue);
} else
@@ -2663,7 +2789,23 @@ done:
if (atomic_dec_and_test(&sc->mr_io.used.count))
wake_up(&sc->mr_io.cleanup.wait_queue);
- return rc;
+put_kref:
+ /*
+ * No kref_put_mutex() as it's already locked.
+ *
+ * If smbd_mr_free_locked() is called
+ * and the mutex is unlocked and mr is gone,
+ * in that case kref_put() returned 1.
+ *
+ * If kref_put() returned 0 we know that
+ * smbd_mr_free_locked() didn't
+ * run. Not by us nor by anyone else, as we
+ * still hold the mutex, so we need to unlock
+ * and keep the mr in SMBDIRECT_MR_READY or
+ * SMBDIRECT_MR_ERROR state.
+ */
+ if (!kref_put(&mr->kref, smbd_mr_free_locked))
+ mutex_unlock(&mr->mutex);
}
static bool smb_set_sge(struct smb_extract_to_rdma *rdma,
diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h
index d67ac5ddaff4..577d37dbeb8a 100644
--- a/fs/smb/client/smbdirect.h
+++ b/fs/smb/client/smbdirect.h
@@ -60,7 +60,7 @@ int smbd_send(struct TCP_Server_Info *server,
struct smbdirect_mr_io *smbd_register_mr(
struct smbd_connection *info, struct iov_iter *iter,
bool writing, bool need_invalidate);
-int smbd_deregister_mr(struct smbdirect_mr_io *mr);
+void smbd_deregister_mr(struct smbdirect_mr_io *mr);
#else
#define cifs_rdma_enabled(server) 0
diff --git a/fs/smb/client/trace.c b/fs/smb/client/trace.c
index 465483787193..16b0e719731f 100644
--- a/fs/smb/client/trace.c
+++ b/fs/smb/client/trace.c
@@ -4,5 +4,6 @@
*
* Author(s): Steve French <stfrench@microsoft.com>
*/
+#include "cifsglob.h"
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/smb/client/xattr.c b/fs/smb/client/xattr.c
index b88fa04f5792..029910d56c22 100644
--- a/fs/smb/client/xattr.c
+++ b/fs/smb/client/xattr.c
@@ -178,7 +178,6 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
memcpy(pacl, value, size);
if (pTcon->ses->server->ops->set_acl) {
int aclflags = 0;
- rc = 0;
switch (handler->flags) {
case XATTR_CIFS_NTSD_FULL:
diff --git a/fs/smb/common/cifsglob.h b/fs/smb/common/cifsglob.h
new file mode 100644
index 000000000000..00fd215e3eb5
--- /dev/null
+++ b/fs/smb/common/cifsglob.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1 */
+/*
+ *
+ * Copyright (C) International Business Machines Corp., 2002,2008
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ * Jeremy Allison (jra@samba.org)
+ *
+ */
+#ifndef _COMMON_CIFS_GLOB_H
+#define _COMMON_CIFS_GLOB_H
+
+static inline void inc_rfc1001_len(void *buf, int count)
+{
+ be32_add_cpu((__be32 *)buf, count);
+}
+
+#define SMB1_VERSION_STRING "1.0"
+#define SMB20_VERSION_STRING "2.0"
+#define SMB21_VERSION_STRING "2.1"
+#define SMBDEFAULT_VERSION_STRING "default"
+#define SMB3ANY_VERSION_STRING "3"
+#define SMB30_VERSION_STRING "3.0"
+#define SMB302_VERSION_STRING "3.02"
+#define ALT_SMB302_VERSION_STRING "3.0.2"
+#define SMB311_VERSION_STRING "3.1.1"
+#define ALT_SMB311_VERSION_STRING "3.11"
+
+#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+
+#endif /* _COMMON_CIFS_GLOB_H */
diff --git a/fs/smb/common/smbdirect/smbdirect_socket.h b/fs/smb/common/smbdirect/smbdirect_socket.h
index db22a1d0546b..ee5a90d691c8 100644
--- a/fs/smb/common/smbdirect/smbdirect_socket.h
+++ b/fs/smb/common/smbdirect/smbdirect_socket.h
@@ -142,7 +142,15 @@ struct smbdirect_socket {
} mem;
/*
- * The credit state for the send side
+ * The local credit state for ib_post_send()
+ */
+ struct {
+ atomic_t count;
+ wait_queue_head_t wait_queue;
+ } lcredits;
+
+ /*
+ * The remote credit state for the send side
*/
struct {
atomic_t count;
@@ -337,6 +345,9 @@ static __always_inline void smbdirect_socket_init(struct smbdirect_socket *sc)
INIT_DELAYED_WORK(&sc->idle.timer_work, __smbdirect_socket_disabled_work);
disable_delayed_work_sync(&sc->idle.timer_work);
+ atomic_set(&sc->send_io.lcredits.count, 0);
+ init_waitqueue_head(&sc->send_io.lcredits.wait_queue);
+
atomic_set(&sc->send_io.credits.count, 0);
init_waitqueue_head(&sc->send_io.credits.wait_queue);
@@ -437,13 +448,22 @@ enum smbdirect_mr_state {
SMBDIRECT_MR_READY,
SMBDIRECT_MR_REGISTERED,
SMBDIRECT_MR_INVALIDATED,
- SMBDIRECT_MR_ERROR
+ SMBDIRECT_MR_ERROR,
+ SMBDIRECT_MR_DISABLED
};
struct smbdirect_mr_io {
struct smbdirect_socket *socket;
struct ib_cqe cqe;
+ /*
+ * We can have up to two references:
+ * 1. by the connection
+ * 2. by the registration
+ */
+ struct kref kref;
+ struct mutex mutex;
+
struct list_head list;
enum smbdirect_mr_state state;
diff --git a/fs/smb/server/mgmt/user_session.c b/fs/smb/server/mgmt/user_session.c
index 6fa025374f2f..1c181ef99929 100644
--- a/fs/smb/server/mgmt/user_session.c
+++ b/fs/smb/server/mgmt/user_session.c
@@ -147,14 +147,11 @@ void ksmbd_session_rpc_close(struct ksmbd_session *sess, int id)
int ksmbd_session_rpc_method(struct ksmbd_session *sess, int id)
{
struct ksmbd_session_rpc *entry;
- int method;
- down_read(&sess->rpc_lock);
+ lockdep_assert_held(&sess->rpc_lock);
entry = xa_load(&sess->rpc_handle_list, id);
- method = entry ? entry->method : 0;
- up_read(&sess->rpc_lock);
- return method;
+ return entry ? entry->method : 0;
}
void ksmbd_session_destroy(struct ksmbd_session *sess)
diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c
index ab1d45fcebde..f901ae18e68a 100644
--- a/fs/smb/server/smb2pdu.c
+++ b/fs/smb/server/smb2pdu.c
@@ -1806,6 +1806,7 @@ int smb2_sess_setup(struct ksmbd_work *work)
if (ksmbd_conn_need_reconnect(conn)) {
rc = -EFAULT;
+ ksmbd_user_session_put(sess);
sess = NULL;
goto out_err;
}
@@ -4625,8 +4626,15 @@ static int smb2_get_info_file_pipe(struct ksmbd_session *sess,
* pipe without opening it, checking error condition here
*/
id = req->VolatileFileId;
- if (!ksmbd_session_rpc_method(sess, id))
+
+ lockdep_assert_not_held(&sess->rpc_lock);
+
+ down_read(&sess->rpc_lock);
+ if (!ksmbd_session_rpc_method(sess, id)) {
+ up_read(&sess->rpc_lock);
return -ENOENT;
+ }
+ up_read(&sess->rpc_lock);
ksmbd_debug(SMB, "FileInfoClass %u, FileId 0x%llx\n",
req->FileInfoClass, req->VolatileFileId);
@@ -6824,6 +6832,7 @@ int smb2_read(struct ksmbd_work *work)
nbytes = ksmbd_vfs_read(work, fp, length, &offset, aux_payload_buf);
if (nbytes < 0) {
+ kvfree(aux_payload_buf);
err = nbytes;
goto out;
}
diff --git a/fs/smb/server/smb_common.h b/fs/smb/server/smb_common.h
index d742ba754348..863716207a0d 100644
--- a/fs/smb/server/smb_common.h
+++ b/fs/smb/server/smb_common.h
@@ -10,6 +10,7 @@
#include "glob.h"
#include "nterr.h"
+#include "../common/cifsglob.h"
#include "../common/smb2pdu.h"
#include "smb2pdu.h"
@@ -26,16 +27,8 @@
#define SMB311_PROT 6
#define BAD_PROT 0xFFFF
-#define SMB1_VERSION_STRING "1.0"
-#define SMB20_VERSION_STRING "2.0"
-#define SMB21_VERSION_STRING "2.1"
-#define SMB30_VERSION_STRING "3.0"
-#define SMB302_VERSION_STRING "3.02"
-#define SMB311_VERSION_STRING "3.1.1"
-
#define SMB_ECHO_INTERVAL (60 * HZ)
-#define CIFS_DEFAULT_IOSIZE (64 * 1024)
#define MAX_CIFS_SMALL_BUFFER_SIZE 448 /* big enough for most */
#define MAX_STREAM_PROT_LEN 0x00FFFFFF
@@ -464,9 +457,4 @@ static inline unsigned int get_rfc1002_len(void *buf)
{
return be32_to_cpu(*((__be32 *)buf)) & 0xffffff;
}
-
-static inline void inc_rfc1001_len(void *buf, int count)
-{
- be32_add_cpu((__be32 *)buf, count);
-}
#endif /* __SMB_COMMON_H__ */
diff --git a/fs/smb/server/transport_ipc.c b/fs/smb/server/transport_ipc.c
index 2aa1b29bea08..2c08cccfa680 100644
--- a/fs/smb/server/transport_ipc.c
+++ b/fs/smb/server/transport_ipc.c
@@ -263,10 +263,16 @@ static void ipc_msg_handle_free(int handle)
static int handle_response(int type, void *payload, size_t sz)
{
- unsigned int handle = *(unsigned int *)payload;
+ unsigned int handle;
struct ipc_msg_table_entry *entry;
int ret = 0;
+ /* Prevent 4-byte read beyond declared payload size */
+ if (sz < sizeof(unsigned int))
+ return -EINVAL;
+
+ handle = *(unsigned int *)payload;
+
ipc_update_last_active();
down_read(&ipc_msg_table_lock);
hash_for_each_possible(ipc_msg_table, entry, ipc_table_hlist, handle) {
@@ -825,6 +831,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle
if (!msg)
return NULL;
+ lockdep_assert_not_held(&sess->rpc_lock);
+
+ down_read(&sess->rpc_lock);
msg->type = KSMBD_EVENT_RPC_REQUEST;
req = (struct ksmbd_rpc_command *)msg->payload;
req->handle = handle;
@@ -833,6 +842,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_write(struct ksmbd_session *sess, int handle
req->flags |= KSMBD_RPC_WRITE_METHOD;
req->payload_sz = payload_sz;
memcpy(req->payload, payload, payload_sz);
+ up_read(&sess->rpc_lock);
resp = ipc_msg_send_request(msg, req->handle);
ipc_msg_free(msg);
@@ -849,6 +859,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle)
if (!msg)
return NULL;
+ lockdep_assert_not_held(&sess->rpc_lock);
+
+ down_read(&sess->rpc_lock);
msg->type = KSMBD_EVENT_RPC_REQUEST;
req = (struct ksmbd_rpc_command *)msg->payload;
req->handle = handle;
@@ -856,6 +869,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_read(struct ksmbd_session *sess, int handle)
req->flags |= rpc_context_flags(sess);
req->flags |= KSMBD_RPC_READ_METHOD;
req->payload_sz = 0;
+ up_read(&sess->rpc_lock);
resp = ipc_msg_send_request(msg, req->handle);
ipc_msg_free(msg);
@@ -876,6 +890,9 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle
if (!msg)
return NULL;
+ lockdep_assert_not_held(&sess->rpc_lock);
+
+ down_read(&sess->rpc_lock);
msg->type = KSMBD_EVENT_RPC_REQUEST;
req = (struct ksmbd_rpc_command *)msg->payload;
req->handle = handle;
@@ -884,6 +901,7 @@ struct ksmbd_rpc_command *ksmbd_rpc_ioctl(struct ksmbd_session *sess, int handle
req->flags |= KSMBD_RPC_IOCTL_METHOD;
req->payload_sz = payload_sz;
memcpy(req->payload, payload, payload_sz);
+ up_read(&sess->rpc_lock);
resp = ipc_msg_send_request(msg, req->handle);
ipc_msg_free(msg);
diff --git a/fs/smb/server/transport_rdma.c b/fs/smb/server/transport_rdma.c
index b3077766d6ec..7d86553fcc7c 100644
--- a/fs/smb/server/transport_rdma.c
+++ b/fs/smb/server/transport_rdma.c
@@ -219,6 +219,7 @@ static void smb_direct_disconnect_wake_up_all(struct smbdirect_socket *sc)
* in order to notice the broken connection.
*/
wake_up_all(&sc->status_wait);
+ wake_up_all(&sc->send_io.lcredits.wait_queue);
wake_up_all(&sc->send_io.credits.wait_queue);
wake_up_all(&sc->send_io.pending.zero_wait_queue);
wake_up_all(&sc->recv_io.reassembly.wait_queue);
@@ -417,9 +418,6 @@ static struct smb_direct_transport *alloc_transport(struct rdma_cm_id *cm_id)
sc->ib.dev = sc->rdma.cm_id->device;
- INIT_WORK(&sc->recv_io.posted.refill_work,
- smb_direct_post_recv_credits);
- INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work);
INIT_DELAYED_WORK(&sc->idle.timer_work, smb_direct_idle_connection_timer);
conn = ksmbd_conn_alloc();
@@ -450,11 +448,10 @@ static void free_transport(struct smb_direct_transport *t)
struct smbdirect_recv_io *recvmsg;
disable_work_sync(&sc->disconnect_work);
- if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) {
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING)
smb_direct_disconnect_rdma_work(&sc->disconnect_work);
- wait_event_interruptible(sc->status_wait,
- sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
- }
+ if (sc->status < SMBDIRECT_SOCKET_DISCONNECTED)
+ wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED);
/*
* Wake up all waiters in all wait queues
@@ -469,9 +466,11 @@ static void free_transport(struct smb_direct_transport *t)
disable_delayed_work_sync(&sc->idle.timer_work);
disable_work_sync(&sc->idle.immediate_work);
+ if (sc->rdma.cm_id)
+ rdma_lock_handler(sc->rdma.cm_id);
+
if (sc->ib.qp) {
ib_drain_qp(sc->ib.qp);
- ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs);
sc->ib.qp = NULL;
rdma_destroy_qp(sc->rdma.cm_id);
}
@@ -498,8 +497,10 @@ static void free_transport(struct smb_direct_transport *t)
ib_free_cq(sc->ib.recv_cq);
if (sc->ib.pd)
ib_dealloc_pd(sc->ib.pd);
- if (sc->rdma.cm_id)
+ if (sc->rdma.cm_id) {
+ rdma_unlock_handler(sc->rdma.cm_id);
rdma_destroy_id(sc->rdma.cm_id);
+ }
smb_direct_destroy_pools(sc);
ksmbd_conn_free(KSMBD_TRANS(t)->conn);
@@ -524,6 +525,12 @@ static void smb_direct_free_sendmsg(struct smbdirect_socket *sc,
{
int i;
+ /*
+ * The list needs to be empty!
+ * The caller should take care of it.
+ */
+ WARN_ON_ONCE(!list_empty(&msg->sibling_list));
+
if (msg->num_sge > 0) {
ib_dma_unmap_single(sc->ib.dev,
msg->sge[0].addr, msg->sge[0].length,
@@ -909,9 +916,9 @@ static void smb_direct_post_recv_credits(struct work_struct *work)
static void send_done(struct ib_cq *cq, struct ib_wc *wc)
{
- struct smbdirect_send_io *sendmsg, *sibling;
+ struct smbdirect_send_io *sendmsg, *sibling, *next;
struct smbdirect_socket *sc;
- struct list_head *pos, *prev, *end;
+ int lcredits = 0;
sendmsg = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe);
sc = sendmsg->socket;
@@ -920,27 +927,31 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc)
ib_wc_status_msg(wc->status), wc->status,
wc->opcode);
+ /*
+ * Free possible siblings and then the main send_io
+ */
+ list_for_each_entry_safe(sibling, next, &sendmsg->sibling_list, sibling_list) {
+ list_del_init(&sibling->sibling_list);
+ smb_direct_free_sendmsg(sc, sibling);
+ lcredits += 1;
+ }
+ /* Note this frees wc->wr_cqe, but not wc */
+ smb_direct_free_sendmsg(sc, sendmsg);
+ lcredits += 1;
+
if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) {
pr_err("Send error. status='%s (%d)', opcode=%d\n",
ib_wc_status_msg(wc->status), wc->status,
wc->opcode);
smb_direct_disconnect_rdma_connection(sc);
+ return;
}
+ atomic_add(lcredits, &sc->send_io.lcredits.count);
+ wake_up(&sc->send_io.lcredits.wait_queue);
+
if (atomic_dec_and_test(&sc->send_io.pending.count))
wake_up(&sc->send_io.pending.zero_wait_queue);
-
- /* iterate and free the list of messages in reverse. the list's head
- * is invalid.
- */
- for (pos = &sendmsg->sibling_list, prev = pos->prev, end = sendmsg->sibling_list.next;
- prev != end; pos = prev, prev = prev->prev) {
- sibling = container_of(pos, struct smbdirect_send_io, sibling_list);
- smb_direct_free_sendmsg(sc, sibling);
- }
-
- sibling = container_of(pos, struct smbdirect_send_io, sibling_list);
- smb_direct_free_sendmsg(sc, sibling);
}
static int manage_credits_prior_sending(struct smbdirect_socket *sc)
@@ -988,8 +999,6 @@ static int smb_direct_post_send(struct smbdirect_socket *sc,
ret = ib_post_send(sc->ib.qp, wr, NULL);
if (ret) {
pr_err("failed to post send: %d\n", ret);
- if (atomic_dec_and_test(&sc->send_io.pending.count))
- wake_up(&sc->send_io.pending.zero_wait_queue);
smb_direct_disconnect_rdma_connection(sc);
}
return ret;
@@ -1032,19 +1041,29 @@ static int smb_direct_flush_send_list(struct smbdirect_socket *sc,
last->wr.send_flags = IB_SEND_SIGNALED;
last->wr.wr_cqe = &last->cqe;
+ /*
+ * Remove last from send_ctx->msg_list
+ * and splice the rest of send_ctx->msg_list
+ * to last->sibling_list.
+ *
+ * send_ctx->msg_list is a valid empty list
+ * at the end.
+ */
+ list_del_init(&last->sibling_list);
+ list_splice_tail_init(&send_ctx->msg_list, &last->sibling_list);
+ send_ctx->wr_cnt = 0;
+
ret = smb_direct_post_send(sc, &first->wr);
- if (!ret) {
- smb_direct_send_ctx_init(send_ctx,
- send_ctx->need_invalidate_rkey,
- send_ctx->remote_key);
- } else {
- atomic_add(send_ctx->wr_cnt, &sc->send_io.credits.count);
- wake_up(&sc->send_io.credits.wait_queue);
- list_for_each_entry_safe(first, last, &send_ctx->msg_list,
- sibling_list) {
- smb_direct_free_sendmsg(sc, first);
+ if (ret) {
+ struct smbdirect_send_io *sibling, *next;
+
+ list_for_each_entry_safe(sibling, next, &last->sibling_list, sibling_list) {
+ list_del_init(&sibling->sibling_list);
+ smb_direct_free_sendmsg(sc, sibling);
}
+ smb_direct_free_sendmsg(sc, last);
}
+
return ret;
}
@@ -1070,6 +1089,23 @@ static int wait_for_credits(struct smbdirect_socket *sc,
} while (true);
}
+static int wait_for_send_lcredit(struct smbdirect_socket *sc,
+ struct smbdirect_send_batch *send_ctx)
+{
+ if (send_ctx && (atomic_read(&sc->send_io.lcredits.count) <= 1)) {
+ int ret;
+
+ ret = smb_direct_flush_send_list(sc, send_ctx, false);
+ if (ret)
+ return ret;
+ }
+
+ return wait_for_credits(sc,
+ &sc->send_io.lcredits.wait_queue,
+ &sc->send_io.lcredits.count,
+ 1);
+}
+
static int wait_for_send_credits(struct smbdirect_socket *sc,
struct smbdirect_send_batch *send_ctx)
{
@@ -1257,9 +1293,13 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc,
int data_length;
struct scatterlist sg[SMBDIRECT_SEND_IO_MAX_SGE - 1];
+ ret = wait_for_send_lcredit(sc, send_ctx);
+ if (ret)
+ goto lcredit_failed;
+
ret = wait_for_send_credits(sc, send_ctx);
if (ret)
- return ret;
+ goto credit_failed;
data_length = 0;
for (i = 0; i < niov; i++)
@@ -1267,10 +1307,8 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc,
ret = smb_direct_create_header(sc, data_length, remaining_data_length,
&msg);
- if (ret) {
- atomic_inc(&sc->send_io.credits.count);
- return ret;
- }
+ if (ret)
+ goto header_failed;
for (i = 0; i < niov; i++) {
struct ib_sge *sge;
@@ -1308,7 +1346,11 @@ static int smb_direct_post_send_data(struct smbdirect_socket *sc,
return 0;
err:
smb_direct_free_sendmsg(sc, msg);
+header_failed:
atomic_inc(&sc->send_io.credits.count);
+credit_failed:
+ atomic_inc(&sc->send_io.lcredits.count);
+lcredit_failed:
return ret;
}
@@ -1574,18 +1616,14 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
get_buf_page_count(desc_buf, desc_buf_len),
msg->sg_list, SG_CHUNK_SIZE);
if (ret) {
- kfree(msg);
ret = -ENOMEM;
- goto out;
+ goto free_msg;
}
ret = get_sg_list(desc_buf, desc_buf_len,
msg->sgt.sgl, msg->sgt.orig_nents);
- if (ret < 0) {
- sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
- kfree(msg);
- goto out;
- }
+ if (ret < 0)
+ goto free_table;
ret = rdma_rw_ctx_init(&msg->rdma_ctx, sc->ib.qp, sc->ib.qp->port,
msg->sgt.sgl,
@@ -1596,9 +1634,7 @@ static int smb_direct_rdma_xmit(struct smb_direct_transport *t,
is_read ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
if (ret < 0) {
pr_err("failed to init rdma_rw_ctx: %d\n", ret);
- sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
- kfree(msg);
- goto out;
+ goto free_table;
}
list_add_tail(&msg->list, &msg_list);
@@ -1630,6 +1666,12 @@ out:
atomic_add(credits_needed, &sc->rw_io.credits.count);
wake_up(&sc->rw_io.credits.wait_queue);
return ret;
+
+free_table:
+ sg_free_table_chained(&msg->sgt, SG_CHUNK_SIZE);
+free_msg:
+ kfree(msg);
+ goto out;
}
static int smb_direct_rdma_write(struct ksmbd_transport *t,
@@ -1687,10 +1729,10 @@ static int smb_direct_cm_handler(struct rdma_cm_id *cm_id,
}
case RDMA_CM_EVENT_DEVICE_REMOVAL:
case RDMA_CM_EVENT_DISCONNECTED: {
- ib_drain_qp(sc->ib.qp);
-
sc->status = SMBDIRECT_SOCKET_DISCONNECTED;
smb_direct_disconnect_rdma_work(&sc->disconnect_work);
+ if (sc->ib.qp)
+ ib_drain_qp(sc->ib.qp);
break;
}
case RDMA_CM_EVENT_CONNECT_ERROR: {
@@ -1864,27 +1906,17 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc)
goto out_err;
}
- smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work);
return 0;
out_err:
put_recvmsg(sc, recvmsg);
return ret;
}
-static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc)
-{
- return min_t(unsigned int,
- sc->ib.dev->attrs.max_fast_reg_page_list_len,
- 256);
-}
-
-static int smb_direct_init_params(struct smbdirect_socket *sc,
- struct ib_qp_cap *cap)
+static int smb_direct_init_params(struct smbdirect_socket *sc)
{
struct smbdirect_socket_parameters *sp = &sc->parameters;
- struct ib_device *device = sc->ib.dev;
- int max_send_sges, max_rw_wrs, max_send_wrs;
- unsigned int max_sge_per_wr, wrs_per_credit;
+ int max_send_sges;
+ unsigned int maxpages;
/* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
* SMB2 response could be mapped.
@@ -1895,67 +1927,20 @@ static int smb_direct_init_params(struct smbdirect_socket *sc,
return -EINVAL;
}
- /* Calculate the number of work requests for RDMA R/W.
- * The maximum number of pages which can be registered
- * with one Memory region can be transferred with one
- * R/W credit. And at least 4 work requests for each credit
- * are needed for MR registration, RDMA R/W, local & remote
- * MR invalidation.
- */
- sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc);
- sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size,
- (sc->rw_io.credits.num_pages - 1) *
- PAGE_SIZE);
-
- max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
- device->attrs.max_sge_rd);
- max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
- max_send_sges);
- wrs_per_credit = max_t(unsigned int, 4,
- DIV_ROUND_UP(sc->rw_io.credits.num_pages,
- max_sge_per_wr) + 1);
- max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit;
-
- max_send_wrs = sp->send_credit_target + max_rw_wrs;
- if (max_send_wrs > device->attrs.max_cqe ||
- max_send_wrs > device->attrs.max_qp_wr) {
- pr_err("consider lowering send_credit_target = %d\n",
- sp->send_credit_target);
- pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
- device->attrs.max_cqe, device->attrs.max_qp_wr);
- return -EINVAL;
- }
+ atomic_set(&sc->send_io.lcredits.count, sp->send_credit_target);
- if (sp->recv_credit_max > device->attrs.max_cqe ||
- sp->recv_credit_max > device->attrs.max_qp_wr) {
- pr_err("consider lowering receive_credit_max = %d\n",
- sp->recv_credit_max);
- pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
- device->attrs.max_cqe, device->attrs.max_qp_wr);
- return -EINVAL;
- }
-
- if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) {
- pr_err("warning: device max_send_sge = %d too small\n",
- device->attrs.max_send_sge);
- return -EINVAL;
- }
- if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) {
- pr_err("warning: device max_recv_sge = %d too small\n",
- device->attrs.max_recv_sge);
- return -EINVAL;
- }
+ maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE);
+ sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev,
+ sc->rdma.cm_id->port_num,
+ maxpages);
+ sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max);
+ /* add one extra in order to handle unaligned pages */
+ sc->rw_io.credits.max += 1;
sc->recv_io.credits.target = 1;
atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max);
- cap->max_send_wr = max_send_wrs;
- cap->max_recv_wr = sp->recv_credit_max;
- cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
- cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
- cap->max_inline_data = 0;
- cap->max_rdma_ctxs = sc->rw_io.credits.max;
return 0;
}
@@ -2029,13 +2014,129 @@ err:
return -ENOMEM;
}
-static int smb_direct_create_qpair(struct smbdirect_socket *sc,
- struct ib_qp_cap *cap)
+static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr)
+{
+ /*
+ * This could be split out of rdma_rw_init_qp()
+ * and be a helper function next to rdma_rw_mr_factor()
+ *
+ * We can't check unlikely(rdma_rw_force_mr) here,
+ * but that is most likely 0 anyway.
+ */
+ u32 factor;
+
+ WARN_ON_ONCE(attr->port_num == 0);
+
+ /*
+ * Each context needs at least one RDMA READ or WRITE WR.
+ *
+ * For some hardware we might need more, eventually we should ask the
+ * HCA driver for a multiplier here.
+ */
+ factor = 1;
+
+ /*
+ * If the device needs MRs to perform RDMA READ or WRITE operations,
+ * we'll need two additional MRs for the registrations and the
+ * invalidation.
+ */
+ if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd)
+ factor += 2; /* inv + reg */
+
+ return factor * attr->cap.max_rdma_ctxs;
+}
+
+static int smb_direct_create_qpair(struct smbdirect_socket *sc)
{
struct smbdirect_socket_parameters *sp = &sc->parameters;
int ret;
+ struct ib_qp_cap qp_cap;
struct ib_qp_init_attr qp_attr;
- int pages_per_rw;
+ u32 max_send_wr;
+ u32 rdma_send_wr;
+
+ /*
+ * Note that {rdma,ib}_create_qp() will call
+ * rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0.
+ * It will adjust cap->max_send_wr to the required
+ * number of additional WRs for the RDMA RW operations.
+ * It will cap cap->max_send_wr to the device limit.
+ *
+ * +1 for ib_drain_qp
+ */
+ qp_cap.max_send_wr = sp->send_credit_target + 1;
+ qp_cap.max_recv_wr = sp->recv_credit_max + 1;
+ qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
+ qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
+ qp_cap.max_inline_data = 0;
+ qp_cap.max_rdma_ctxs = sc->rw_io.credits.max;
+
+ /*
+ * Find out the number of max_send_wr
+ * after rdma_rw_init_qp() adjusted it.
+ *
+ * We only do it on a temporary variable,
+ * as rdma_create_qp() will trigger
+ * rdma_rw_init_qp() again.
+ */
+ memset(&qp_attr, 0, sizeof(qp_attr));
+ qp_attr.cap = qp_cap;
+ qp_attr.port_num = sc->rdma.cm_id->port_num;
+ rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr);
+ max_send_wr = qp_cap.max_send_wr + rdma_send_wr;
+
+ if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe ||
+ qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) {
+ pr_err("Possible CQE overrun: max_send_wr %d\n",
+ qp_cap.max_send_wr);
+ pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
+ IB_DEVICE_NAME_MAX,
+ sc->ib.dev->name,
+ sc->ib.dev->attrs.max_cqe,
+ sc->ib.dev->attrs.max_qp_wr);
+ pr_err("consider lowering send_credit_target = %d\n",
+ sp->send_credit_target);
+ return -EINVAL;
+ }
+
+ if (qp_cap.max_rdma_ctxs &&
+ (max_send_wr >= sc->ib.dev->attrs.max_cqe ||
+ max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) {
+ pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n",
+ rdma_send_wr, qp_cap.max_send_wr, max_send_wr);
+ pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
+ IB_DEVICE_NAME_MAX,
+ sc->ib.dev->name,
+ sc->ib.dev->attrs.max_cqe,
+ sc->ib.dev->attrs.max_qp_wr);
+ pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n",
+ sp->send_credit_target, qp_cap.max_rdma_ctxs);
+ return -EINVAL;
+ }
+
+ if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe ||
+ qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) {
+ pr_err("Possible CQE overrun: max_recv_wr %d\n",
+ qp_cap.max_recv_wr);
+ pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
+ IB_DEVICE_NAME_MAX,
+ sc->ib.dev->name,
+ sc->ib.dev->attrs.max_cqe,
+ sc->ib.dev->attrs.max_qp_wr);
+ pr_err("consider lowering receive_credit_max = %d\n",
+ sp->recv_credit_max);
+ return -EINVAL;
+ }
+
+ if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge ||
+ qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) {
+ pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
+ IB_DEVICE_NAME_MAX,
+ sc->ib.dev->name,
+ sc->ib.dev->attrs.max_send_sge,
+ sc->ib.dev->attrs.max_recv_sge);
+ return -EINVAL;
+ }
sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0);
if (IS_ERR(sc->ib.pd)) {
@@ -2046,8 +2147,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
}
sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc,
- sp->send_credit_target +
- cap->max_rdma_ctxs,
+ max_send_wr,
IB_POLL_WORKQUEUE);
if (IS_ERR(sc->ib.send_cq)) {
pr_err("Can't create RDMA send CQ\n");
@@ -2057,7 +2157,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
}
sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc,
- sp->recv_credit_max,
+ qp_cap.max_recv_wr,
IB_POLL_WORKQUEUE);
if (IS_ERR(sc->ib.recv_cq)) {
pr_err("Can't create RDMA recv CQ\n");
@@ -2066,10 +2166,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
goto err;
}
+ /*
+ * We reset completely here!
+ * As the above use was just temporary
+ * to calc max_send_wr and rdma_send_wr.
+ *
+ * rdma_create_qp() will trigger rdma_rw_init_qp()
+ * again if max_rdma_ctxs is not 0.
+ */
memset(&qp_attr, 0, sizeof(qp_attr));
qp_attr.event_handler = smb_direct_qpair_handler;
qp_attr.qp_context = sc;
- qp_attr.cap = *cap;
+ qp_attr.cap = qp_cap;
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
qp_attr.qp_type = IB_QPT_RC;
qp_attr.send_cq = sc->ib.send_cq;
@@ -2085,18 +2193,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
sc->ib.qp = sc->rdma.cm_id->qp;
sc->rdma.cm_id->event_handler = smb_direct_cm_handler;
- pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1;
- if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) {
- ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs,
- sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG,
- sc->rw_io.credits.num_pages, 0);
- if (ret) {
- pr_err("failed to init mr pool count %zu pages %zu\n",
- sc->rw_io.credits.max, sc->rw_io.credits.num_pages);
- goto err;
- }
- }
-
return 0;
err:
if (sc->ib.qp) {
@@ -2154,8 +2250,8 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
return -ECONNABORTED;
ret = smb_direct_check_recvmsg(recvmsg);
- if (ret == -ECONNABORTED)
- goto out;
+ if (ret)
+ goto put;
req = (struct smbdirect_negotiate_req *)recvmsg->packet;
sp->max_recv_size = min_t(int, sp->max_recv_size,
@@ -2170,23 +2266,46 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max);
sc->recv_io.credits.target = max_t(u16, sc->recv_io.credits.target, 1);
- ret = smb_direct_send_negotiate_response(sc, ret);
-out:
+put:
spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags);
sc->recv_io.reassembly.queue_length--;
list_del(&recvmsg->list);
spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags);
put_recvmsg(sc, recvmsg);
+ if (ret == -ECONNABORTED)
+ return ret;
+
+ if (ret)
+ goto respond;
+
+ /*
+ * We negotiated with success, so we need to refill the recv queue.
+ * We do that with sc->idle.immediate_work still being disabled
+ * via smbdirect_socket_init(), so that queue_work(sc->workqueue,
+ * &sc->idle.immediate_work) in smb_direct_post_recv_credits()
+ * is a no-op.
+ *
+ * The message that grants the credits to the client is
+ * the negotiate response.
+ */
+ INIT_WORK(&sc->recv_io.posted.refill_work, smb_direct_post_recv_credits);
+ smb_direct_post_recv_credits(&sc->recv_io.posted.refill_work);
+ if (unlikely(sc->first_error))
+ return sc->first_error;
+ INIT_WORK(&sc->idle.immediate_work, smb_direct_send_immediate_work);
+
+respond:
+ ret = smb_direct_send_negotiate_response(sc, ret);
+
return ret;
}
static int smb_direct_connect(struct smbdirect_socket *sc)
{
- struct ib_qp_cap qp_cap;
int ret;
- ret = smb_direct_init_params(sc, &qp_cap);
+ ret = smb_direct_init_params(sc);
if (ret) {
pr_err("Can't configure RDMA parameters\n");
return ret;
@@ -2198,7 +2317,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc)
return ret;
}
- ret = smb_direct_create_qpair(sc, &qp_cap);
+ ret = smb_direct_create_qpair(sc);
if (ret) {
pr_err("Can't accept RDMA client: %d\n", ret);
return ret;
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index 2d78e94072a0..e142bac4f9f8 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -498,17 +498,26 @@ int compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj,
}
EXPORT_SYMBOL_GPL(compat_only_sysfs_link_entry_to_kobj);
-static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn,
+static int sysfs_group_attrs_change_owner(struct kobject *kobj,
+ struct kernfs_node *grp_kn,
const struct attribute_group *grp,
struct iattr *newattrs)
{
struct kernfs_node *kn;
- int error;
+ int error, i;
+ umode_t mode;
if (grp->attrs) {
struct attribute *const *attr;
- for (attr = grp->attrs; *attr; attr++) {
+ for (i = 0, attr = grp->attrs; *attr; i++, attr++) {
+ if (grp->is_visible) {
+ mode = grp->is_visible(kobj, *attr, i);
+ if (mode & SYSFS_GROUP_INVISIBLE)
+ break;
+ if (!mode)
+ continue;
+ }
kn = kernfs_find_and_get(grp_kn, (*attr)->name);
if (!kn)
return -ENOENT;
@@ -523,7 +532,14 @@ static int sysfs_group_attrs_change_owner(struct kernfs_node *grp_kn,
if (grp->bin_attrs) {
const struct bin_attribute *const *bin_attr;
- for (bin_attr = grp->bin_attrs; *bin_attr; bin_attr++) {
+ for (i = 0, bin_attr = grp->bin_attrs; *bin_attr; i++, bin_attr++) {
+ if (grp->is_bin_visible) {
+ mode = grp->is_bin_visible(kobj, *bin_attr, i);
+ if (mode & SYSFS_GROUP_INVISIBLE)
+ break;
+ if (!mode)
+ continue;
+ }
kn = kernfs_find_and_get(grp_kn, (*bin_attr)->attr.name);
if (!kn)
return -ENOENT;
@@ -573,7 +589,7 @@ int sysfs_group_change_owner(struct kobject *kobj,
error = kernfs_setattr(grp_kn, &newattrs);
if (!error)
- error = sysfs_group_attrs_change_owner(grp_kn, grp, &newattrs);
+ error = sysfs_group_attrs_change_owner(kobj, grp_kn, grp, &newattrs);
kernfs_put(grp_kn);
diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index 8930d5254e1d..b99da294e9a3 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -119,6 +119,15 @@ config XFS_RT
See the xfs man page in section 5 for additional information.
+ This option is mandatory to support zoned block devices. For these
+ devices, the realtime subvolume must be backed by a zoned block
+ device and a regular block device used as the main device (for
+ metadata). If the zoned block device is a host-managed SMR hard-disk
+ containing conventional zones at the beginning of its address space,
+ XFS will use the disk conventional zones as the main device and the
+ remaining sequential write required zones as the backing storage for
+ the realtime subvolume.
+
If unsure, say N.
config XFS_DRAIN_INTENTS
@@ -156,7 +165,7 @@ config XFS_ONLINE_SCRUB_STATS
bool "XFS online metadata check usage data collection"
default y
depends on XFS_ONLINE_SCRUB
- select DEBUG_FS
+ depends on DEBUG_FS
help
If you say Y here, the kernel will gather usage data about
the online metadata check subsystem. This includes the number
diff --git a/fs/xfs/libxfs/xfs_rtgroup.h b/fs/xfs/libxfs/xfs_rtgroup.h
index d36a6ae0abe5..d4fcf591e63d 100644
--- a/fs/xfs/libxfs/xfs_rtgroup.h
+++ b/fs/xfs/libxfs/xfs_rtgroup.h
@@ -50,6 +50,12 @@ struct xfs_rtgroup {
uint8_t *rtg_rsum_cache;
struct xfs_open_zone *rtg_open_zone;
};
+
+ /*
+ * Count of outstanding GC operations for zoned XFS. Any RTG with a
+ * non-zero rtg_gccount will not be picked as new GC victim.
+ */
+ atomic_t rtg_gccount;
};
/*
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 26721fab5cab..091c79e432e5 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -376,6 +376,36 @@ out_incomplete:
return error;
}
+static uint
+xchk_nlinks_ilock_dir(
+ struct xfs_inode *ip)
+{
+ uint lock_mode = XFS_ILOCK_SHARED;
+
+ /*
+ * We're going to scan the directory entries, so we must be ready to
+ * pull the data fork mappings into memory if they aren't already.
+ */
+ if (xfs_need_iread_extents(&ip->i_df))
+ lock_mode = XFS_ILOCK_EXCL;
+
+ /*
+ * We're going to scan the parent pointers, so we must be ready to
+ * pull the attr fork mappings into memory if they aren't already.
+ */
+ if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) &&
+ xfs_need_iread_extents(&ip->i_af))
+ lock_mode = XFS_ILOCK_EXCL;
+
+ /*
+ * Take the IOLOCK so that other threads cannot start a directory
+ * update while we're scanning.
+ */
+ lock_mode |= XFS_IOLOCK_SHARED;
+ xfs_ilock(ip, lock_mode);
+ return lock_mode;
+}
+
/* Walk a directory to bump the observed link counts of the children. */
STATIC int
xchk_nlinks_collect_dir(
@@ -394,8 +424,7 @@ xchk_nlinks_collect_dir(
return 0;
/* Prevent anyone from changing this directory while we walk it. */
- xfs_ilock(dp, XFS_IOLOCK_SHARED);
- lock_mode = xfs_ilock_data_map_shared(dp);
+ lock_mode = xchk_nlinks_ilock_dir(dp);
/*
* The dotdot entry of an unlinked directory still points to the last
@@ -452,7 +481,6 @@ out_abort:
xchk_iscan_abort(&xnc->collect_iscan);
out_unlock:
xfs_iunlock(dp, lock_mode);
- xfs_iunlock(dp, XFS_IOLOCK_SHARED);
return error;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 773d959965dc..47edf3041631 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1751,7 +1751,7 @@ xfs_init_buftarg(
const char *descr)
{
/* The maximum size of the buftarg is only known once the sb is read. */
- btp->bt_nr_sectors = (xfs_daddr_t)-1;
+ btp->bt_nr_sectors = XFS_BUF_DADDR_MAX;
/* Set up device logical sector size mask */
btp->bt_logical_sectorsize = logical_sectorsize;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 8fa7bdf59c91..e25cd2a160f3 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -22,6 +22,7 @@ extern struct kmem_cache *xfs_buf_cache;
*/
struct xfs_buf;
+#define XFS_BUF_DADDR_MAX ((xfs_daddr_t) S64_MAX)
#define XFS_BUF_DADDR_NULL ((xfs_daddr_t) (-1LL))
#define XBF_READ (1u << 0) /* buffer intended for reading from device */
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index f046d1215b04..b871dfde372b 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -236,7 +236,6 @@ typedef struct xfs_mount {
bool m_update_sb; /* sb needs update in mount */
unsigned int m_max_open_zones;
unsigned int m_zonegc_low_space;
- struct xfs_mru_cache *m_zone_cache; /* Inode to open zone cache */
/* max_atomic_write mount option value */
unsigned long long m_awu_max_bytes;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e85a156dc17d..1067ebb3b001 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -102,7 +102,7 @@ static const struct constant_table dax_param_enums[] = {
* Table driven mount option parser.
*/
enum {
- Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
+ Op_deprecated, Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev,
Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
Opt_allocsize, Opt_norecovery, Opt_inode64, Opt_inode32,
@@ -114,7 +114,21 @@ enum {
Opt_lifetime, Opt_nolifetime, Opt_max_atomic_write,
};
+#define fsparam_dead(NAME) \
+ __fsparam(NULL, (NAME), Op_deprecated, fs_param_deprecated, NULL)
+
static const struct fs_parameter_spec xfs_fs_parameters[] = {
+ /*
+ * These mount options were supposed to be deprecated in September 2025
+ * but the deprecation warning was buggy, so not all users were
+ * notified. The deprecation is now obnoxiously loud and postponed to
+ * September 2030.
+ */
+ fsparam_dead("attr2"),
+ fsparam_dead("noattr2"),
+ fsparam_dead("ikeep"),
+ fsparam_dead("noikeep"),
+
fsparam_u32("logbufs", Opt_logbufs),
fsparam_string("logbsize", Opt_logbsize),
fsparam_string("logdev", Opt_logdev),
@@ -786,6 +800,12 @@ xfs_fs_evict_inode(
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
+
+ if (IS_ENABLED(CONFIG_XFS_RT) &&
+ S_ISREG(inode->i_mode) && inode->i_private) {
+ xfs_open_zone_put(inode->i_private);
+ inode->i_private = NULL;
+ }
}
static void
@@ -1373,16 +1393,25 @@ suffix_kstrtoull(
static inline void
xfs_fs_warn_deprecated(
struct fs_context *fc,
- struct fs_parameter *param,
- uint64_t flag,
- bool value)
+ struct fs_parameter *param)
{
- /* Don't print the warning if reconfiguring and current mount point
- * already had the flag set
+ /*
+ * Always warn about someone passing in a deprecated mount option.
+ * Previously we wouldn't print the warning if we were reconfiguring
+ * and current mount point already had the flag set, but that was not
+ * the right thing to do.
+ *
+ * Many distributions mount the root filesystem with no options in the
+ * initramfs and rely on mount -a to remount the root fs with the
+ * options in fstab. However, the old behavior meant that there would
+ * never be a warning about deprecated mount options for the root fs in
+ * /etc/fstab. On a single-fs system, that means no warning at all.
+ *
+ * Compounding this problem are distribution scripts that copy
+ * /proc/mounts to fstab, which means that we can't remove mount
+ * options unless we're 100% sure they have only ever been advertised
+ * in /proc/mounts in response to explicitly provided mount options.
*/
- if ((fc->purpose & FS_CONTEXT_FOR_RECONFIGURE) &&
- !!(XFS_M(fc->root->d_sb)->m_features & flag) == value)
- return;
xfs_warn(fc->s_fs_info, "%s mount option is deprecated.", param->key);
}
@@ -1408,6 +1437,9 @@ xfs_fs_parse_param(
return opt;
switch (opt) {
+ case Op_deprecated:
+ xfs_fs_warn_deprecated(fc, param);
+ return 0;
case Opt_logbufs:
parsing_mp->m_logbufs = result.uint_32;
return 0;
@@ -1528,7 +1560,6 @@ xfs_fs_parse_param(
xfs_mount_set_dax_mode(parsing_mp, result.uint_32);
return 0;
#endif
- /* Following mount options will be removed in September 2025 */
case Opt_max_open_zones:
parsing_mp->m_max_open_zones = result.uint_32;
return 0;
@@ -2221,7 +2252,7 @@ xfs_init_fs_context(
struct xfs_mount *mp;
int i;
- mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
+ mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
if (!mp)
return -ENOMEM;
diff --git a/fs/xfs/xfs_zone_alloc.c b/fs/xfs/xfs_zone_alloc.c
index 1147bacb2da8..040402240807 100644
--- a/fs/xfs/xfs_zone_alloc.c
+++ b/fs/xfs/xfs_zone_alloc.c
@@ -26,14 +26,22 @@
#include "xfs_trace.h"
#include "xfs_mru_cache.h"
+static void
+xfs_open_zone_free_rcu(
+ struct callback_head *cb)
+{
+ struct xfs_open_zone *oz = container_of(cb, typeof(*oz), oz_rcu);
+
+ xfs_rtgroup_rele(oz->oz_rtg);
+ kfree(oz);
+}
+
void
xfs_open_zone_put(
struct xfs_open_zone *oz)
{
- if (atomic_dec_and_test(&oz->oz_ref)) {
- xfs_rtgroup_rele(oz->oz_rtg);
- kfree(oz);
- }
+ if (atomic_dec_and_test(&oz->oz_ref))
+ call_rcu(&oz->oz_rcu, xfs_open_zone_free_rcu);
}
static inline uint32_t
@@ -238,6 +246,14 @@ xfs_zoned_map_extent(
* If a data write raced with this GC write, keep the existing data in
* the data fork, mark our newly written GC extent as reclaimable, then
* move on to the next extent.
+ *
+ * Note that this can also happen when racing with operations that do
+ * not actually invalidate the data, but just move it to a different
+ * inode (XFS_IOC_EXCHANGE_RANGE), or to a different offset inside the
+ * inode (FALLOC_FL_COLLAPSE_RANGE / FALLOC_FL_INSERT_RANGE). If the
+ * data was just moved around, GC fails to free the zone, but the zone
+ * becomes a GC candidate again as soon as all previous GC I/O has
+ * finished and these blocks will be moved out eventually.
*/
if (old_startblock != NULLFSBLOCK &&
old_startblock != data.br_startblock)
@@ -614,14 +630,25 @@ static inline enum rw_hint xfs_inode_write_hint(struct xfs_inode *ip)
}
/*
- * Try to pack inodes that are written back after they were closed tight instead
- * of trying to open new zones for them or spread them to the least recently
- * used zone. This optimizes the data layout for workloads that untar or copy
- * a lot of small files. Right now this does not separate multiple such
+ * Try to tightly pack small files that are written back after they were closed
+ * instead of trying to open new zones for them or spread them to the least
+ * recently used zone. This optimizes the data layout for workloads that untar
+ * or copy a lot of small files. Right now this does not separate multiple such
* streams.
*/
static inline bool xfs_zoned_pack_tight(struct xfs_inode *ip)
{
+ struct xfs_mount *mp = ip->i_mount;
+ size_t zone_capacity =
+ XFS_FSB_TO_B(mp, mp->m_groups[XG_TYPE_RTG].blocks);
+
+ /*
+ * Do not pack write files that are already using a full zone to avoid
+ * fragmentation.
+ */
+ if (i_size_read(VFS_I(ip)) >= zone_capacity)
+ return false;
+
return !inode_is_open_for_write(VFS_I(ip)) &&
!(ip->i_diflags & XFS_DIFLAG_APPEND);
}
@@ -746,97 +773,54 @@ xfs_mark_rtg_boundary(
}
/*
- * Cache the last zone written to for an inode so that it is considered first
- * for subsequent writes.
- */
-struct xfs_zone_cache_item {
- struct xfs_mru_cache_elem mru;
- struct xfs_open_zone *oz;
-};
-
-static inline struct xfs_zone_cache_item *
-xfs_zone_cache_item(struct xfs_mru_cache_elem *mru)
-{
- return container_of(mru, struct xfs_zone_cache_item, mru);
-}
-
-static void
-xfs_zone_cache_free_func(
- void *data,
- struct xfs_mru_cache_elem *mru)
-{
- struct xfs_zone_cache_item *item = xfs_zone_cache_item(mru);
-
- xfs_open_zone_put(item->oz);
- kfree(item);
-}
-
-/*
* Check if we have a cached last open zone available for the inode and
* if yes return a reference to it.
*/
static struct xfs_open_zone *
-xfs_cached_zone(
- struct xfs_mount *mp,
- struct xfs_inode *ip)
+xfs_get_cached_zone(
+ struct xfs_inode *ip)
{
- struct xfs_mru_cache_elem *mru;
- struct xfs_open_zone *oz;
+ struct xfs_open_zone *oz;
- mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
- if (!mru)
- return NULL;
- oz = xfs_zone_cache_item(mru)->oz;
+ rcu_read_lock();
+ oz = VFS_I(ip)->i_private;
if (oz) {
/*
* GC only steals open zones at mount time, so no GC zones
* should end up in the cache.
*/
ASSERT(!oz->oz_is_gc);
- ASSERT(atomic_read(&oz->oz_ref) > 0);
- atomic_inc(&oz->oz_ref);
+ if (!atomic_inc_not_zero(&oz->oz_ref))
+ oz = NULL;
}
- xfs_mru_cache_done(mp->m_zone_cache);
+ rcu_read_unlock();
+
return oz;
}
/*
- * Update the last used zone cache for a given inode.
+ * Stash our zone in the inode so that is is reused for future allocations.
*
- * The caller must have a reference on the open zone.
+ * The open_zone structure will be pinned until either the inode is freed or
+ * until the cached open zone is replaced with a different one because the
+ * current one was full when we tried to use it. This means we keep any
+ * open zone around forever as long as any inode that used it for the last
+ * write is cached, which slightly increases the memory use of cached inodes
+ * that were every written to, but significantly simplifies the cached zone
+ * lookup. Because the open_zone is clearly marked as full when all data
+ * in the underlying RTG was written, the caching is always safe.
*/
static void
-xfs_zone_cache_create_association(
- struct xfs_inode *ip,
- struct xfs_open_zone *oz)
+xfs_set_cached_zone(
+ struct xfs_inode *ip,
+ struct xfs_open_zone *oz)
{
- struct xfs_mount *mp = ip->i_mount;
- struct xfs_zone_cache_item *item = NULL;
- struct xfs_mru_cache_elem *mru;
+ struct xfs_open_zone *old_oz;
- ASSERT(atomic_read(&oz->oz_ref) > 0);
atomic_inc(&oz->oz_ref);
-
- mru = xfs_mru_cache_lookup(mp->m_zone_cache, ip->i_ino);
- if (mru) {
- /*
- * If we have an association already, update it to point to the
- * new zone.
- */
- item = xfs_zone_cache_item(mru);
- xfs_open_zone_put(item->oz);
- item->oz = oz;
- xfs_mru_cache_done(mp->m_zone_cache);
- return;
- }
-
- item = kmalloc(sizeof(*item), GFP_KERNEL);
- if (!item) {
- xfs_open_zone_put(oz);
- return;
- }
- item->oz = oz;
- xfs_mru_cache_insert(mp->m_zone_cache, ip->i_ino, &item->mru);
+ old_oz = xchg(&VFS_I(ip)->i_private, oz);
+ if (old_oz)
+ xfs_open_zone_put(old_oz);
}
static void
@@ -880,15 +864,14 @@ xfs_zone_alloc_and_submit(
* the inode is still associated with a zone and use that if so.
*/
if (!*oz)
- *oz = xfs_cached_zone(mp, ip);
+ *oz = xfs_get_cached_zone(ip);
if (!*oz) {
select_zone:
*oz = xfs_select_zone(mp, write_hint, pack_tight);
if (!*oz)
goto out_error;
-
- xfs_zone_cache_create_association(ip, *oz);
+ xfs_set_cached_zone(ip, *oz);
}
alloc_len = xfs_zone_alloc_blocks(*oz, XFS_B_TO_FSB(mp, ioend->io_size),
@@ -966,6 +949,12 @@ xfs_free_open_zones(
xfs_open_zone_put(oz);
}
spin_unlock(&zi->zi_open_zones_lock);
+
+ /*
+ * Wait for all open zones to be freed so that they drop the group
+ * references:
+ */
+ rcu_barrier();
}
struct xfs_init_zones {
@@ -1279,14 +1268,6 @@ xfs_mount_zones(
error = xfs_zone_gc_mount(mp);
if (error)
goto out_free_zone_info;
-
- /*
- * Set up a mru cache to track inode to open zone for data placement
- * purposes. The magic values for group count and life time is the
- * same as the defaults for file streams, which seems sane enough.
- */
- xfs_mru_cache_create(&mp->m_zone_cache, mp,
- 5000, 10, xfs_zone_cache_free_func);
return 0;
out_free_zone_info:
@@ -1300,5 +1281,4 @@ xfs_unmount_zones(
{
xfs_zone_gc_unmount(mp);
xfs_free_zone_info(mp->m_zone_info);
- xfs_mru_cache_destroy(mp->m_zone_cache);
}
diff --git a/fs/xfs/xfs_zone_gc.c b/fs/xfs/xfs_zone_gc.c
index 064cd1a857a0..4ade54445532 100644
--- a/fs/xfs/xfs_zone_gc.c
+++ b/fs/xfs/xfs_zone_gc.c
@@ -114,6 +114,8 @@ struct xfs_gc_bio {
/* Open Zone being written to */
struct xfs_open_zone *oz;
+ struct xfs_rtgroup *victim_rtg;
+
/* Bio used for reads and writes, including the bvec used by it */
struct bio_vec bv;
struct bio bio; /* must be last */
@@ -264,6 +266,7 @@ xfs_zone_gc_iter_init(
iter->rec_count = 0;
iter->rec_idx = 0;
iter->victim_rtg = victim_rtg;
+ atomic_inc(&victim_rtg->rtg_gccount);
}
/*
@@ -362,6 +365,7 @@ xfs_zone_gc_query(
return 0;
done:
+ atomic_dec(&iter->victim_rtg->rtg_gccount);
xfs_rtgroup_rele(iter->victim_rtg);
iter->victim_rtg = NULL;
return 0;
@@ -451,6 +455,20 @@ xfs_zone_gc_pick_victim_from(
if (!rtg)
continue;
+ /*
+ * If the zone is already undergoing GC, don't pick it again.
+ *
+ * This prevents us from picking one of the zones for which we
+ * already submitted GC I/O, but for which the remapping hasn't
+ * concluded yet. This won't cause data corruption, but
+ * increases write amplification and slows down GC, so this is
+ * a bad thing.
+ */
+ if (atomic_read(&rtg->rtg_gccount)) {
+ xfs_rtgroup_rele(rtg);
+ continue;
+ }
+
/* skip zones that are just waiting for a reset */
if (rtg_rmap(rtg)->i_used_blocks == 0 ||
rtg_rmap(rtg)->i_used_blocks >= victim_used) {
@@ -491,21 +509,6 @@ xfs_zone_gc_select_victim(
struct xfs_rtgroup *victim_rtg = NULL;
unsigned int bucket;
- if (xfs_is_shutdown(mp))
- return false;
-
- if (iter->victim_rtg)
- return true;
-
- /*
- * Don't start new work if we are asked to stop or park.
- */
- if (kthread_should_stop() || kthread_should_park())
- return false;
-
- if (!xfs_zoned_need_gc(mp))
- return false;
-
spin_lock(&zi->zi_used_buckets_lock);
for (bucket = 0; bucket < XFS_ZONE_USED_BUCKETS; bucket++) {
victim_rtg = xfs_zone_gc_pick_victim_from(mp, bucket);
@@ -703,6 +706,9 @@ xfs_zone_gc_start_chunk(
chunk->scratch = &data->scratch[data->scratch_idx];
chunk->data = data;
chunk->oz = oz;
+ chunk->victim_rtg = iter->victim_rtg;
+ atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+ atomic_inc(&chunk->victim_rtg->rtg_gccount);
bio->bi_iter.bi_sector = xfs_rtb_to_daddr(mp, chunk->old_startblock);
bio->bi_end_io = xfs_zone_gc_end_io;
@@ -725,6 +731,8 @@ static void
xfs_zone_gc_free_chunk(
struct xfs_gc_bio *chunk)
{
+ atomic_dec(&chunk->victim_rtg->rtg_gccount);
+ xfs_rtgroup_rele(chunk->victim_rtg);
list_del(&chunk->entry);
xfs_open_zone_put(chunk->oz);
xfs_irele(chunk->ip);
@@ -785,6 +793,10 @@ xfs_zone_gc_split_write(
split_chunk->oz = chunk->oz;
atomic_inc(&chunk->oz->oz_ref);
+ split_chunk->victim_rtg = chunk->victim_rtg;
+ atomic_inc(&chunk->victim_rtg->rtg_group.xg_active_ref);
+ atomic_inc(&chunk->victim_rtg->rtg_gccount);
+
chunk->offset += split_len;
chunk->len -= split_len;
chunk->old_startblock += XFS_B_TO_FSB(data->mp, split_len);
@@ -975,6 +987,27 @@ xfs_zone_gc_reset_zones(
} while (next);
}
+static bool
+xfs_zone_gc_should_start_new_work(
+ struct xfs_zone_gc_data *data)
+{
+ if (xfs_is_shutdown(data->mp))
+ return false;
+ if (!xfs_zone_gc_space_available(data))
+ return false;
+
+ if (!data->iter.victim_rtg) {
+ if (kthread_should_stop() || kthread_should_park())
+ return false;
+ if (!xfs_zoned_need_gc(data->mp))
+ return false;
+ if (!xfs_zone_gc_select_victim(data))
+ return false;
+ }
+
+ return true;
+}
+
/*
* Handle the work to read and write data for GC and to reset the zones,
* including handling all completions.
@@ -982,7 +1015,7 @@ xfs_zone_gc_reset_zones(
* Note that the order of the chunks is preserved so that we don't undo the
* optimal order established by xfs_zone_gc_query().
*/
-static bool
+static void
xfs_zone_gc_handle_work(
struct xfs_zone_gc_data *data)
{
@@ -996,30 +1029,22 @@ xfs_zone_gc_handle_work(
zi->zi_reset_list = NULL;
spin_unlock(&zi->zi_reset_list_lock);
- if (!xfs_zone_gc_select_victim(data) ||
- !xfs_zone_gc_space_available(data)) {
- if (list_empty(&data->reading) &&
- list_empty(&data->writing) &&
- list_empty(&data->resetting) &&
- !reset_list)
- return false;
- }
-
- __set_current_state(TASK_RUNNING);
- try_to_freeze();
-
- if (reset_list)
+ if (reset_list) {
+ set_current_state(TASK_RUNNING);
xfs_zone_gc_reset_zones(data, reset_list);
+ }
list_for_each_entry_safe(chunk, next, &data->resetting, entry) {
if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE)
break;
+ set_current_state(TASK_RUNNING);
xfs_zone_gc_finish_reset(chunk);
}
list_for_each_entry_safe(chunk, next, &data->writing, entry) {
if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE)
break;
+ set_current_state(TASK_RUNNING);
xfs_zone_gc_finish_chunk(chunk);
}
@@ -1027,15 +1052,18 @@ xfs_zone_gc_handle_work(
list_for_each_entry_safe(chunk, next, &data->reading, entry) {
if (READ_ONCE(chunk->state) != XFS_GC_BIO_DONE)
break;
+ set_current_state(TASK_RUNNING);
xfs_zone_gc_write_chunk(chunk);
}
blk_finish_plug(&plug);
- blk_start_plug(&plug);
- while (xfs_zone_gc_start_chunk(data))
- ;
- blk_finish_plug(&plug);
- return true;
+ if (xfs_zone_gc_should_start_new_work(data)) {
+ set_current_state(TASK_RUNNING);
+ blk_start_plug(&plug);
+ while (xfs_zone_gc_start_chunk(data))
+ ;
+ blk_finish_plug(&plug);
+ }
}
/*
@@ -1059,8 +1087,18 @@ xfs_zoned_gcd(
for (;;) {
set_current_state(TASK_INTERRUPTIBLE | TASK_FREEZABLE);
xfs_set_zonegc_running(mp);
- if (xfs_zone_gc_handle_work(data))
+
+ xfs_zone_gc_handle_work(data);
+
+ /*
+ * Only sleep if nothing set the state to running. Else check for
+ * work again as someone might have queued up more work and woken
+ * us in the meantime.
+ */
+ if (get_current_state() == TASK_RUNNING) {
+ try_to_freeze();
continue;
+ }
if (list_empty(&data->reading) &&
list_empty(&data->writing) &&
diff --git a/fs/xfs/xfs_zone_priv.h b/fs/xfs/xfs_zone_priv.h
index 35e6de3d25ed..4322e26dd99a 100644
--- a/fs/xfs/xfs_zone_priv.h
+++ b/fs/xfs/xfs_zone_priv.h
@@ -44,6 +44,8 @@ struct xfs_open_zone {
* the life time of an open zone.
*/
struct xfs_rtgroup *oz_rtg;
+
+ struct rcu_head oz_rcu;
};
/*