diff options
Diffstat (limited to 'fs/smb/client')
-rw-r--r-- | fs/smb/client/Kconfig | 1 | ||||
-rw-r--r-- | fs/smb/client/cached_dir.c | 87 | ||||
-rw-r--r-- | fs/smb/client/cached_dir.h | 17 | ||||
-rw-r--r-- | fs/smb/client/cifs_debug.c | 122 | ||||
-rw-r--r-- | fs/smb/client/cifs_spnego.c | 12 | ||||
-rw-r--r-- | fs/smb/client/cifs_swn.c | 20 | ||||
-rw-r--r-- | fs/smb/client/cifsencrypt.c | 8 | ||||
-rw-r--r-- | fs/smb/client/cifsfs.c | 80 | ||||
-rw-r--r-- | fs/smb/client/cifsfs.h | 4 | ||||
-rw-r--r-- | fs/smb/client/cifsglob.h | 14 | ||||
-rw-r--r-- | fs/smb/client/cifssmb.c | 4 | ||||
-rw-r--r-- | fs/smb/client/dir.c | 86 | ||||
-rw-r--r-- | fs/smb/client/file.c | 121 | ||||
-rw-r--r-- | fs/smb/client/fs_context.c | 11 | ||||
-rw-r--r-- | fs/smb/client/inode.c | 155 | ||||
-rw-r--r-- | fs/smb/client/readdir.c | 40 | ||||
-rw-r--r-- | fs/smb/client/smb1ops.c | 69 | ||||
-rw-r--r-- | fs/smb/client/smb2inode.c | 24 | ||||
-rw-r--r-- | fs/smb/client/smb2ops.c | 364 | ||||
-rw-r--r-- | fs/smb/client/smb2pdu.c | 22 | ||||
-rw-r--r-- | fs/smb/client/smb2pdu.h | 16 | ||||
-rw-r--r-- | fs/smb/client/smb2transport.c | 5 | ||||
-rw-r--r-- | fs/smb/client/smbdirect.c | 1199 | ||||
-rw-r--r-- | fs/smb/client/smbdirect.h | 102 | ||||
-rw-r--r-- | fs/smb/client/trace.h | 2 | ||||
-rw-r--r-- | fs/smb/client/transport.c | 13 |
26 files changed, 1566 insertions, 1032 deletions
diff --git a/fs/smb/client/Kconfig b/fs/smb/client/Kconfig index 9f05f94e265a..a4c02199fef4 100644 --- a/fs/smb/client/Kconfig +++ b/fs/smb/client/Kconfig @@ -15,6 +15,7 @@ config CIFS select CRYPTO_GCM select CRYPTO_ECB select CRYPTO_AES + select CRYPTO_LIB_ARC4 select KEYS select DNS_RESOLVER select ASN1 diff --git a/fs/smb/client/cached_dir.c b/fs/smb/client/cached_dir.c index b69daeb1301b..b8ac7b7faf61 100644 --- a/fs/smb/client/cached_dir.c +++ b/fs/smb/client/cached_dir.c @@ -36,9 +36,8 @@ static struct cached_fid *find_or_create_cached_dir(struct cached_fids *cfids, * fully cached or it may be in the process of * being deleted due to a lease break. */ - if (!cfid->time || !cfid->has_lease) { + if (!is_valid_cached_dir(cfid)) return NULL; - } kref_get(&cfid->refcount); return cfid; } @@ -194,7 +193,7 @@ replay_again: * Otherwise, it is either a new entry or laundromat worker removed it * from @cfids->entries. Caller will put last reference if the latter. */ - if (cfid->has_lease && cfid->time) { + if (is_valid_cached_dir(cfid)) { cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); *ret_cfid = cfid; @@ -233,7 +232,7 @@ replay_again: list_for_each_entry(parent_cfid, &cfids->entries, entry) { if (parent_cfid->dentry == dentry->d_parent) { cifs_dbg(FYI, "found a parent cached file handle\n"); - if (parent_cfid->has_lease && parent_cfid->time) { + if (is_valid_cached_dir(parent_cfid)) { lease_flags |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE; memcpy(pfid->parent_lease_key, @@ -417,12 +416,18 @@ int open_cached_dir_by_dentry(struct cifs_tcon *tcon, if (cfids == NULL) return -EOPNOTSUPP; + if (!dentry) + return -ENOENT; + spin_lock(&cfids->cfid_list_lock); list_for_each_entry(cfid, &cfids->entries, entry) { - if (dentry && cfid->dentry == dentry) { + if (cfid->dentry == dentry) { + if (!is_valid_cached_dir(cfid)) + break; cifs_dbg(FYI, "found a cached file handle by dentry\n"); kref_get(&cfid->refcount); *ret_cfid = cfid; + cfid->last_access_time = jiffies; spin_unlock(&cfids->cfid_list_lock); return 0; } @@ -522,10 +527,9 @@ void close_all_cached_dirs(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); goto done; } - spin_lock(&cfid->fid_lock); + tmp_list->dentry = cfid->dentry; cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); list_add_tail(&tmp_list->entry, &entry); } @@ -558,8 +562,8 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) /* * Mark all the cfids as closed, and move them to the cfids->dying list. - * They'll be cleaned up later by cfids_invalidation_worker. Take - * a reference to each cfid during this process. + * They'll be cleaned up by laundromat. Take a reference to each cfid + * during this process. */ spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { @@ -576,12 +580,11 @@ void invalidate_all_cached_dirs(struct cifs_tcon *tcon) } else kref_get(&cfid->refcount); } - /* - * Queue dropping of the dentries once locks have been dropped - */ - if (!list_empty(&cfids->dying)) - queue_work(cfid_put_wq, &cfids->invalidation_work); spin_unlock(&cfids->cfid_list_lock); + + /* run laundromat unconditionally now as there might have been previously queued work */ + mod_delayed_work(cfid_put_wq, &cfids->laundromat_work, 0); + flush_delayed_work(&cfids->laundromat_work); } static void @@ -608,14 +611,9 @@ static void cached_dir_put_work(struct work_struct *work) { struct cached_fid *cfid = container_of(work, struct cached_fid, put_work); - struct dentry *dentry; - - spin_lock(&cfid->fid_lock); - dentry = cfid->dentry; + dput(cfid->dentry); cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); - dput(dentry); queue_work(serverclose_wq, &cfid->close_work); } @@ -673,7 +671,6 @@ static struct cached_fid *init_cached_dir(const char *path) INIT_LIST_HEAD(&cfid->entry); INIT_LIST_HEAD(&cfid->dirents.entries); mutex_init(&cfid->dirents.de_mutex); - spin_lock_init(&cfid->fid_lock); kref_init(&cfid->refcount); return cfid; } @@ -697,40 +694,38 @@ static void free_cached_dir(struct cached_fid *cfid) kfree(dirent); } + /* adjust tcon-level counters and reset per-dir accounting */ + if (cfid->cfids) { + if (cfid->dirents.entries_count) + atomic_long_sub((long)cfid->dirents.entries_count, + &cfid->cfids->total_dirents_entries); + if (cfid->dirents.bytes_used) { + atomic64_sub((long long)cfid->dirents.bytes_used, + &cfid->cfids->total_dirents_bytes); + atomic64_sub((long long)cfid->dirents.bytes_used, + &cifs_dircache_bytes_used); + } + } + cfid->dirents.entries_count = 0; + cfid->dirents.bytes_used = 0; + kfree(cfid->path); cfid->path = NULL; kfree(cfid); } -static void cfids_invalidation_worker(struct work_struct *work) -{ - struct cached_fids *cfids = container_of(work, struct cached_fids, - invalidation_work); - struct cached_fid *cfid, *q; - LIST_HEAD(entry); - - spin_lock(&cfids->cfid_list_lock); - /* move cfids->dying to the local list */ - list_cut_before(&entry, &cfids->dying, &cfids->dying); - spin_unlock(&cfids->cfid_list_lock); - - list_for_each_entry_safe(cfid, q, &entry, entry) { - list_del(&cfid->entry); - /* Drop the ref-count acquired in invalidate_all_cached_dirs */ - kref_put(&cfid->refcount, smb2_close_cached_fid); - } -} - static void cfids_laundromat_worker(struct work_struct *work) { struct cached_fids *cfids; struct cached_fid *cfid, *q; - struct dentry *dentry; LIST_HEAD(entry); cfids = container_of(work, struct cached_fids, laundromat_work.work); spin_lock(&cfids->cfid_list_lock); + /* move cfids->dying to the local list */ + list_cut_before(&entry, &cfids->dying, &cfids->dying); + list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { if (cfid->last_access_time && time_after(jiffies, cfid->last_access_time + HZ * dir_cache_timeout)) { @@ -752,12 +747,9 @@ static void cfids_laundromat_worker(struct work_struct *work) list_for_each_entry_safe(cfid, q, &entry, entry) { list_del(&cfid->entry); - spin_lock(&cfid->fid_lock); - dentry = cfid->dentry; + dput(cfid->dentry); cfid->dentry = NULL; - spin_unlock(&cfid->fid_lock); - dput(dentry); if (cfid->is_open) { spin_lock(&cifs_tcp_ses_lock); ++cfid->tcon->tc_count; @@ -787,11 +779,13 @@ struct cached_fids *init_cached_dirs(void) INIT_LIST_HEAD(&cfids->entries); INIT_LIST_HEAD(&cfids->dying); - INIT_WORK(&cfids->invalidation_work, cfids_invalidation_worker); INIT_DELAYED_WORK(&cfids->laundromat_work, cfids_laundromat_worker); queue_delayed_work(cfid_put_wq, &cfids->laundromat_work, dir_cache_timeout * HZ); + atomic_long_set(&cfids->total_dirents_entries, 0); + atomic64_set(&cfids->total_dirents_bytes, 0); + return cfids; } @@ -808,7 +802,6 @@ void free_cached_dirs(struct cached_fids *cfids) return; cancel_delayed_work_sync(&cfids->laundromat_work); - cancel_work_sync(&cfids->invalidation_work); spin_lock(&cfids->cfid_list_lock); list_for_each_entry_safe(cfid, q, &cfids->entries, entry) { diff --git a/fs/smb/client/cached_dir.h b/fs/smb/client/cached_dir.h index 46b5a2fdf15b..1e383db7c337 100644 --- a/fs/smb/client/cached_dir.h +++ b/fs/smb/client/cached_dir.h @@ -27,6 +27,9 @@ struct cached_dirents { struct mutex de_mutex; loff_t pos; /* Expected ctx->pos */ struct list_head entries; + /* accounting for cached entries in this directory */ + unsigned long entries_count; + unsigned long bytes_used; }; struct cached_fid { @@ -41,7 +44,6 @@ struct cached_fid { unsigned long last_access_time; /* jiffies of when last accessed */ struct kref refcount; struct cifs_fid fid; - spinlock_t fid_lock; struct cifs_tcon *tcon; struct dentry *dentry; struct work_struct put_work; @@ -60,10 +62,21 @@ struct cached_fids { int num_entries; struct list_head entries; struct list_head dying; - struct work_struct invalidation_work; struct delayed_work laundromat_work; + /* aggregate accounting for all cached dirents under this tcon */ + atomic_long_t total_dirents_entries; + atomic64_t total_dirents_bytes; }; +/* Module-wide directory cache accounting (defined in cifsfs.c) */ +extern atomic64_t cifs_dircache_bytes_used; /* bytes across all mounts */ + +static inline bool +is_valid_cached_dir(struct cached_fid *cfid) +{ + return cfid->time && cfid->has_lease; +} + extern struct cached_fids *init_cached_dirs(void); extern void free_cached_dirs(struct cached_fids *cfids); extern int open_cached_dir(unsigned int xid, struct cifs_tcon *tcon, diff --git a/fs/smb/client/cifs_debug.c b/fs/smb/client/cifs_debug.c index 2337cf795db3..1fb71d2d31b5 100644 --- a/fs/smb/client/cifs_debug.c +++ b/fs/smb/client/cifs_debug.c @@ -24,6 +24,7 @@ #endif #ifdef CONFIG_CIFS_SMB_DIRECT #include "smbdirect.h" +#include "../common/smbdirect/smbdirect_pdu.h" #endif #include "cifs_swn.h" #include "cached_dir.h" @@ -239,14 +240,18 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) struct cifs_ses *ses; struct cifs_tcon *tcon; struct cifsFileInfo *cfile; + struct inode *inode; + struct cifsInodeInfo *cinode; + char lease[4]; + int n; seq_puts(m, "# Version:1\n"); seq_puts(m, "# Format:\n"); seq_puts(m, "# <tree id> <ses id> <persistent fid> <flags> <count> <pid> <uid>"); #ifdef CONFIG_CIFS_DEBUG2 - seq_printf(m, " <filename> <mid>\n"); + seq_puts(m, " <filename> <lease> <mid>\n"); #else - seq_printf(m, " <filename>\n"); + seq_puts(m, " <filename> <lease>\n"); #endif /* CIFS_DEBUG2 */ spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { @@ -266,11 +271,30 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v) cfile->pid, from_kuid(&init_user_ns, cfile->uid), cfile->dentry); + + /* Append lease/oplock caching state as RHW letters */ + inode = d_inode(cfile->dentry); + n = 0; + if (inode) { + cinode = CIFS_I(inode); + if (CIFS_CACHE_READ(cinode)) + lease[n++] = 'R'; + if (CIFS_CACHE_HANDLE(cinode)) + lease[n++] = 'H'; + if (CIFS_CACHE_WRITE(cinode)) + lease[n++] = 'W'; + } + lease[n] = '\0'; + seq_puts(m, " "); + if (n) + seq_printf(m, "%s", lease); + else + seq_puts(m, "NONE"); + #ifdef CONFIG_CIFS_DEBUG2 - seq_printf(m, " %llu\n", cfile->fid.mid); -#else + seq_printf(m, " %llu", cfile->fid.mid); +#endif /* CONFIG_CIFS_DEBUG2 */ seq_printf(m, "\n"); -#endif /* CIFS_DEBUG2 */ } spin_unlock(&tcon->open_file_lock); } @@ -307,7 +331,10 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) if (!cfids) continue; spin_lock(&cfids->cfid_list_lock); /* check lock ordering */ - seq_printf(m, "Num entries: %d\n", cfids->num_entries); + seq_printf(m, "Num entries: %d, cached_dirents: %lu entries, %llu bytes\n", + cfids->num_entries, + (unsigned long)atomic_long_read(&cfids->total_dirents_entries), + (unsigned long long)atomic64_read(&cfids->total_dirents_bytes)); list_for_each_entry(cfid, &cfids->entries, entry) { seq_printf(m, "0x%x 0x%llx 0x%llx %s", tcon->tid, @@ -318,6 +345,9 @@ static int cifs_debug_dirs_proc_show(struct seq_file *m, void *v) seq_printf(m, "\tvalid file info"); if (cfid->dirents.is_valid) seq_printf(m, ", valid dirents"); + if (!list_empty(&cfid->dirents.entries)) + seq_printf(m, ", dirents: %lu entries, %lu bytes", + cfid->dirents.entries_count, cfid->dirents.bytes_used); seq_printf(m, "\n"); } spin_unlock(&cfids->cfid_list_lock); @@ -456,57 +486,55 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v) sc = &server->smbd_conn->socket; sp = &sc->parameters; - seq_printf(m, "\nSMBDirect (in hex) protocol version: %x " - "transport status: %x", - server->smbd_conn->protocol, - server->smbd_conn->socket.status); - seq_printf(m, "\nConn receive_credit_max: %x " - "send_credit_target: %x max_send_size: %x", + seq_printf(m, "\nSMBDirect protocol version: 0x%x " + "transport status: %s (%u)", + SMBDIRECT_V1, + smbdirect_socket_status_string(sc->status), + sc->status); + seq_printf(m, "\nConn receive_credit_max: %u " + "send_credit_target: %u max_send_size: %u", sp->recv_credit_max, sp->send_credit_target, sp->max_send_size); - seq_printf(m, "\nConn max_fragmented_recv_size: %x " - "max_fragmented_send_size: %x max_receive_size:%x", + seq_printf(m, "\nConn max_fragmented_recv_size: %u " + "max_fragmented_send_size: %u max_receive_size:%u", sp->max_fragmented_recv_size, sp->max_fragmented_send_size, sp->max_recv_size); - seq_printf(m, "\nConn keep_alive_interval: %x " - "max_readwrite_size: %x rdma_readwrite_threshold: %x", + seq_printf(m, "\nConn keep_alive_interval: %u " + "max_readwrite_size: %u rdma_readwrite_threshold: %u", sp->keepalive_interval_msec * 1000, sp->max_read_write_size, - server->smbd_conn->rdma_readwrite_threshold); - seq_printf(m, "\nDebug count_get_receive_buffer: %x " - "count_put_receive_buffer: %x count_send_empty: %x", - server->smbd_conn->count_get_receive_buffer, - server->smbd_conn->count_put_receive_buffer, - server->smbd_conn->count_send_empty); - seq_printf(m, "\nRead Queue count_reassembly_queue: %x " - "count_enqueue_reassembly_queue: %x " - "count_dequeue_reassembly_queue: %x " - "reassembly_data_length: %x " - "reassembly_queue_length: %x", - server->smbd_conn->count_reassembly_queue, - server->smbd_conn->count_enqueue_reassembly_queue, - server->smbd_conn->count_dequeue_reassembly_queue, + server->rdma_readwrite_threshold); + seq_printf(m, "\nDebug count_get_receive_buffer: %llu " + "count_put_receive_buffer: %llu count_send_empty: %llu", + sc->statistics.get_receive_buffer, + sc->statistics.put_receive_buffer, + sc->statistics.send_empty); + seq_printf(m, "\nRead Queue " + "count_enqueue_reassembly_queue: %llu " + "count_dequeue_reassembly_queue: %llu " + "reassembly_data_length: %u " + "reassembly_queue_length: %u", + sc->statistics.enqueue_reassembly_queue, + sc->statistics.dequeue_reassembly_queue, sc->recv_io.reassembly.data_length, sc->recv_io.reassembly.queue_length); - seq_printf(m, "\nCurrent Credits send_credits: %x " - "receive_credits: %x receive_credit_target: %x", - atomic_read(&server->smbd_conn->send_credits), - atomic_read(&server->smbd_conn->receive_credits), - server->smbd_conn->receive_credit_target); - seq_printf(m, "\nPending send_pending: %x ", - atomic_read(&server->smbd_conn->send_pending)); - seq_printf(m, "\nReceive buffers count_receive_queue: %x ", - server->smbd_conn->count_receive_queue); - seq_printf(m, "\nMR responder_resources: %x " - "max_frmr_depth: %x mr_type: %x", - server->smbd_conn->responder_resources, - server->smbd_conn->max_frmr_depth, - server->smbd_conn->mr_type); - seq_printf(m, "\nMR mr_ready_count: %x mr_used_count: %x", - atomic_read(&server->smbd_conn->mr_ready_count), - atomic_read(&server->smbd_conn->mr_used_count)); + seq_printf(m, "\nCurrent Credits send_credits: %u " + "receive_credits: %u receive_credit_target: %u", + atomic_read(&sc->send_io.credits.count), + atomic_read(&sc->recv_io.credits.count), + sc->recv_io.credits.target); + seq_printf(m, "\nPending send_pending: %u ", + atomic_read(&sc->send_io.pending.count)); + seq_printf(m, "\nMR responder_resources: %u " + "max_frmr_depth: %u mr_type: 0x%x", + sp->responder_resources, + sp->max_frmr_depth, + sc->mr_io.type); + seq_printf(m, "\nMR mr_ready_count: %u mr_used_count: %u", + atomic_read(&sc->mr_io.ready.count), + atomic_read(&sc->mr_io.used.count)); skip_rdma: #endif seq_printf(m, "\nNumber of credits: %d,%d,%d Dialect 0x%x", diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 43b86fa4d695..9891f55bac1e 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -24,20 +24,14 @@ static const struct cred *spnego_cred; static int cifs_spnego_key_instantiate(struct key *key, struct key_preparsed_payload *prep) { - char *payload; - int ret; + char *payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); - ret = -ENOMEM; - payload = kmemdup(prep->data, prep->datalen, GFP_KERNEL); if (!payload) - goto error; + return -ENOMEM; /* attach the data */ key->payload.data[0] = payload; - ret = 0; - -error: - return ret; + return 0; } static void diff --git a/fs/smb/client/cifs_swn.c b/fs/smb/client/cifs_swn.c index 7233c6a7e6d7..68a1f87c446d 100644 --- a/fs/smb/client/cifs_swn.c +++ b/fs/smb/client/cifs_swn.c @@ -82,10 +82,8 @@ static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg) int ret; skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (skb == NULL) { - ret = -ENOMEM; - goto fail; - } + if (!skb) + return -ENOMEM; hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_REGISTER); if (hdr == NULL) { @@ -172,7 +170,6 @@ static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg) nlmsg_fail: genlmsg_cancel(skb, hdr); nlmsg_free(skb); -fail: return ret; } @@ -313,17 +310,15 @@ static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon) reg = cifs_find_swn_reg(tcon); if (!IS_ERR(reg)) { kref_get(®->ref_count); - mutex_unlock(&cifs_swnreg_idr_mutex); - return reg; + goto unlock; } else if (PTR_ERR(reg) != -EEXIST) { - mutex_unlock(&cifs_swnreg_idr_mutex); - return reg; + goto unlock; } reg = kmalloc(sizeof(struct cifs_swn_reg), GFP_ATOMIC); if (reg == NULL) { - mutex_unlock(&cifs_swnreg_idr_mutex); - return ERR_PTR(-ENOMEM); + ret = -ENOMEM; + goto fail_unlock; } kref_init(®->ref_count); @@ -354,7 +349,7 @@ static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon) reg->ip_notify = (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT); reg->tcon = tcon; - +unlock: mutex_unlock(&cifs_swnreg_idr_mutex); return reg; @@ -365,6 +360,7 @@ fail_idr: idr_remove(&cifs_swnreg_idr, reg->id); fail: kfree(reg); +fail_unlock: mutex_unlock(&cifs_swnreg_idr_mutex); return ERR_PTR(ret); } diff --git a/fs/smb/client/cifsencrypt.c b/fs/smb/client/cifsencrypt.c index 3cc686246908..7b7c8c38fdd0 100644 --- a/fs/smb/client/cifsencrypt.c +++ b/fs/smb/client/cifsencrypt.c @@ -22,8 +22,8 @@ #include <linux/highmem.h> #include <linux/fips.h> #include <linux/iov_iter.h> -#include "../common/arc4.h" #include <crypto/aead.h> +#include <crypto/arc4.h> static size_t cifs_shash_step(void *iter_base, size_t progress, size_t len, void *priv, void *priv2) @@ -725,9 +725,9 @@ calc_seckey(struct cifs_ses *ses) return -ENOMEM; } - cifs_arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); - cifs_arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, - CIFS_CPHTXT_SIZE); + arc4_setkey(ctx_arc4, ses->auth_key.response, CIFS_SESS_KEY_SIZE); + arc4_crypt(ctx_arc4, ses->ntlmssp->ciphertext, sec_key, + CIFS_CPHTXT_SIZE); /* make secondary_key/nonce as session key */ memcpy(ses->auth_key.response, sec_key, CIFS_SESS_KEY_SIZE); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index e1848276bab4..05b1fa76e8cc 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -121,6 +121,46 @@ unsigned int dir_cache_timeout = 30; module_param(dir_cache_timeout, uint, 0644); MODULE_PARM_DESC(dir_cache_timeout, "Number of seconds to cache directory contents for which we have a lease. Default: 30 " "Range: 1 to 65000 seconds, 0 to disable caching dir contents"); +/* Module-wide total cached dirents (in bytes) across all tcons */ +atomic64_t cifs_dircache_bytes_used = ATOMIC64_INIT(0); + +/* + * Write-only module parameter to drop all cached directory entries across + * all CIFS mounts. Echo a non-zero value to trigger. + */ +static void cifs_drop_all_dir_caches(void) +{ + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; + + spin_lock(&cifs_tcp_ses_lock); + list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { + list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { + if (cifs_ses_exiting(ses)) + continue; + list_for_each_entry(tcon, &ses->tcon_list, tcon_list) + invalidate_all_cached_dirs(tcon); + } + } + spin_unlock(&cifs_tcp_ses_lock); +} + +static int cifs_param_set_drop_dir_cache(const char *val, const struct kernel_param *kp) +{ + bool bv; + int rc = kstrtobool(val, &bv); + + if (rc) + return rc; + if (bv) + cifs_drop_all_dir_caches(); + return 0; +} + +module_param_call(drop_dir_cache, cifs_param_set_drop_dir_cache, NULL, NULL, 0200); +MODULE_PARM_DESC(drop_dir_cache, "Write 1 to drop all cached directory entries across all CIFS mounts"); + #ifdef CONFIG_CIFS_STATS2 unsigned int slow_rsp_threshold = 1; module_param(slow_rsp_threshold, uint, 0644); @@ -352,11 +392,27 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len) struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct TCP_Server_Info *server = tcon->ses->server; + struct inode *inode = file_inode(file); + int rc; + + if (!server->ops->fallocate) + return -EOPNOTSUPP; - if (server->ops->fallocate) - return server->ops->fallocate(file, tcon, mode, off, len); + rc = inode_lock_killable(inode); + if (rc) + return rc; + + netfs_wait_for_outstanding_io(inode); - return -EOPNOTSUPP; + rc = file_modified(file); + if (rc) + goto out_unlock; + + rc = server->ops->fallocate(file, tcon, mode, off, len); + +out_unlock: + inode_unlock(inode); + return rc; } static int cifs_permission(struct mnt_idmap *idmap, @@ -857,7 +913,7 @@ static int cifs_drop_inode(struct inode *inode) /* no serverino => unconditional eviction */ return !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) || - generic_drop_inode(inode); + inode_generic_drop(inode); } static const struct super_operations cifs_super_ops = { @@ -1895,7 +1951,9 @@ init_cifs(void) cifs_dbg(VFS, "dir_cache_timeout set to max of 65000 seconds\n"); } - cifsiod_wq = alloc_workqueue("cifsiod", WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + cifsiod_wq = alloc_workqueue("cifsiod", + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cifsiod_wq) { rc = -ENOMEM; goto out_clean_proc; @@ -1923,28 +1981,32 @@ init_cifs(void) } cifsoplockd_wq = alloc_workqueue("cifsoplockd", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cifsoplockd_wq) { rc = -ENOMEM; goto out_destroy_fileinfo_put_wq; } deferredclose_wq = alloc_workqueue("deferredclose", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!deferredclose_wq) { rc = -ENOMEM; goto out_destroy_cifsoplockd_wq; } serverclose_wq = alloc_workqueue("serverclose", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!serverclose_wq) { rc = -ENOMEM; goto out_destroy_deferredclose_wq; } cfid_put_wq = alloc_workqueue("cfid_put_wq", - WQ_FREEZABLE|WQ_MEM_RECLAIM, 0); + WQ_FREEZABLE | WQ_MEM_RECLAIM | WQ_PERCPU, + 0); if (!cfid_put_wq) { rc = -ENOMEM; goto out_destroy_serverclose_wq; diff --git a/fs/smb/client/cifsfs.h b/fs/smb/client/cifsfs.h index 3ce7c614ccc0..e9534258d1ef 100644 --- a/fs/smb/client/cifsfs.h +++ b/fs/smb/client/cifsfs.h @@ -145,6 +145,6 @@ extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ /* when changing internal version - update following two lines at same time */ -#define SMB3_PRODUCT_BUILD 56 -#define CIFS_VERSION "2.56" +#define SMB3_PRODUCT_BUILD 57 +#define CIFS_VERSION "2.57" #endif /* _CIFSFS_H */ diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0fae95cf81c4..8f6f567d7474 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -814,6 +814,13 @@ struct TCP_Server_Info { unsigned int max_read; unsigned int max_write; unsigned int min_offload; + /* + * If payload is less than or equal to the threshold, + * use RDMA send/recv to send upper layer I/O. + * If payload is more than the threshold, + * use RDMA read/write through memory registration for I/O. + */ + unsigned int rdma_readwrite_threshold; unsigned int retrans; struct { bool requested; /* "compress" mount option set*/ @@ -1540,7 +1547,7 @@ struct cifs_io_subrequest { struct kvec iov[2]; struct TCP_Server_Info *server; #ifdef CONFIG_CIFS_SMB_DIRECT - struct smbd_mr *mr; + struct smbdirect_mr_io *mr; #endif struct cifs_credits credits; }; @@ -1559,6 +1566,11 @@ struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file); void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_hdlr, bool offload); void cifsFileInfo_put(struct cifsFileInfo *cifs_file); +int cifs_file_flush(const unsigned int xid, struct inode *inode, + struct cifsFileInfo *cfile); +int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, + const char *full_path, struct cifsFileInfo *open_file, + loff_t size); #define CIFS_CACHE_READ_FLG 1 #define CIFS_CACHE_HANDLE_FLG 2 diff --git a/fs/smb/client/cifssmb.c b/fs/smb/client/cifssmb.c index d20766f664c4..2881efcbe09a 100644 --- a/fs/smb/client/cifssmb.c +++ b/fs/smb/client/cifssmb.c @@ -1163,7 +1163,7 @@ OldOpenRetry: cpu_to_le64(le32_to_cpu(pSMBr->EndOfFile)); pfile_info->EndOfFile = pfile_info->AllocationSize; pfile_info->NumberOfLinks = cpu_to_le32(1); - pfile_info->DeletePending = 0; + pfile_info->DeletePending = 0; /* successful open = not delete pending */ } } @@ -1288,7 +1288,7 @@ openRetry: buf->AllocationSize = rsp->AllocationSize; buf->EndOfFile = rsp->EndOfFile; buf->NumberOfLinks = cpu_to_le32(1); - buf->DeletePending = 0; + buf->DeletePending = 0; /* successful open = not delete pending */ } cifs_buf_release(req); diff --git a/fs/smb/client/dir.c b/fs/smb/client/dir.c index 5223edf6d11a..da5597dbf5b9 100644 --- a/fs/smb/client/dir.c +++ b/fs/smb/client/dir.c @@ -200,8 +200,8 @@ static int cifs_do_create(struct inode *inode, struct dentry *direntry, unsigned full_path = build_path_from_dentry(direntry, page); if (IS_ERR(full_path)) { - free_dentry_path(page); - return PTR_ERR(full_path); + rc = PTR_ERR(full_path); + goto out; } /* If we're caching, we need to be able to fill in around partial writes. */ @@ -322,13 +322,14 @@ retry_open: list_for_each_entry(parent_cfid, &tcon->cfids->entries, entry) { if (parent_cfid->dentry == direntry->d_parent) { cifs_dbg(FYI, "found a parent cached file handle\n"); - if (parent_cfid->has_lease && parent_cfid->time) { + if (is_valid_cached_dir(parent_cfid)) { lease_flags |= SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE; memcpy(fid->parent_lease_key, parent_cfid->fid.lease_key, SMB2_LEASE_KEY_SIZE); parent_cfid->dirents.is_valid = false; + parent_cfid->dirents.is_failed = true; } break; } @@ -484,8 +485,6 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, * in network traffic in the other paths. */ if (!(oflags & O_CREAT)) { - struct dentry *res; - /* * Check for hashed negative dentry. We have already revalidated * the dentry and it is fine. No need to perform another lookup. @@ -493,11 +492,7 @@ cifs_atomic_open(struct inode *inode, struct dentry *direntry, if (!d_in_lookup(direntry)) return -ENOENT; - res = cifs_lookup(inode, direntry, 0); - if (IS_ERR(res)) - return PTR_ERR(res); - - return finish_no_open(file, res); + return finish_no_open(file, cifs_lookup(inode, direntry, 0)); } xid = get_xid(); @@ -683,6 +678,7 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, const char *full_path; void *page; int retry_count = 0; + struct dentry *de; xid = get_xid(); @@ -694,16 +690,15 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, cifs_sb = CIFS_SB(parent_dir_inode->i_sb); tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { - free_xid(xid); - return ERR_CAST(tlink); + de = ERR_CAST(tlink); + goto free_xid; } pTcon = tlink_tcon(tlink); rc = check_name(direntry, pTcon); if (unlikely(rc)) { - cifs_put_tlink(tlink); - free_xid(xid); - return ERR_PTR(rc); + de = ERR_PTR(rc); + goto put_tlink; } /* can not grab the rename sem here since it would @@ -712,16 +707,38 @@ cifs_lookup(struct inode *parent_dir_inode, struct dentry *direntry, page = alloc_dentry_path(); full_path = build_path_from_dentry(direntry, page); if (IS_ERR(full_path)) { - cifs_put_tlink(tlink); - free_xid(xid); - free_dentry_path(page); - return ERR_CAST(full_path); + de = ERR_CAST(full_path); + goto free_dentry_path; } if (d_really_is_positive(direntry)) { cifs_dbg(FYI, "non-NULL inode in lookup\n"); } else { + struct cached_fid *cfid = NULL; + cifs_dbg(FYI, "NULL inode in lookup\n"); + + /* + * We can only rely on negative dentries having the same + * spelling as the cached dirent if case insensitivity is + * forced on mount. + * + * XXX: if servers correctly announce Case Sensitivity Search + * on GetInfo of FileFSAttributeInformation, then we can take + * correct action even if case insensitive is not forced on + * mount. + */ + if (pTcon->nocase && !open_cached_dir_by_dentry(pTcon, direntry->d_parent, &cfid)) { + /* + * dentry is negative and parent is fully cached: + * we can assume file does not exist + */ + if (cfid->dirents.is_valid) { + close_cached_dir(cfid); + goto out; + } + close_cached_dir(cfid); + } } cifs_dbg(FYI, "Full path: %s inode = 0x%p\n", full_path, d_inode(direntry)); @@ -755,24 +772,29 @@ again: } newInode = ERR_PTR(rc); } + +out: + de = d_splice_alias(newInode, direntry); +free_dentry_path: free_dentry_path(page); +put_tlink: cifs_put_tlink(tlink); +free_xid: free_xid(xid); - return d_splice_alias(newInode, direntry); + return de; } static int cifs_d_revalidate(struct inode *dir, const struct qstr *name, struct dentry *direntry, unsigned int flags) { - struct inode *inode; - int rc; - if (flags & LOOKUP_RCU) return -ECHILD; if (d_really_is_positive(direntry)) { - inode = d_inode(direntry); + int rc; + struct inode *inode = d_inode(direntry); + if ((flags & LOOKUP_REVAL) && !CIFS_CACHE_READ(CIFS_I(inode))) CIFS_I(inode)->time = 0; /* force reval */ @@ -812,6 +834,22 @@ cifs_d_revalidate(struct inode *dir, const struct qstr *name, return 1; } + } else { + struct cifs_sb_info *cifs_sb = CIFS_SB(dir->i_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + struct cached_fid *cfid; + + if (!open_cached_dir_by_dentry(tcon, direntry->d_parent, &cfid)) { + /* + * dentry is negative and parent is fully cached: + * we can assume file does not exist + */ + if (cfid->dirents.is_valid) { + close_cached_dir(cfid); + return 1; + } + close_cached_dir(cfid); + } } /* diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c index cb907e18cc35..474dadeb1593 100644 --- a/fs/smb/client/file.c +++ b/fs/smb/client/file.c @@ -97,8 +97,12 @@ retry: cifs_trace_rw_credits_write_prepare); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->smbd_conn) - stream->sreq_max_segs = server->smbd_conn->max_frmr_depth; + if (server->smbd_conn) { + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); + + stream->sreq_max_segs = sp->max_frmr_depth; + } #endif } @@ -187,8 +191,12 @@ static int cifs_prepare_read(struct netfs_io_subrequest *subreq) cifs_trace_rw_credits_read_submit); #ifdef CONFIG_CIFS_SMB_DIRECT - if (server->smbd_conn) - rreq->io_streams[0].sreq_max_segs = server->smbd_conn->max_frmr_depth; + if (server->smbd_conn) { + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); + + rreq->io_streams[0].sreq_max_segs = sp->max_frmr_depth; + } #endif return 0; } @@ -944,6 +952,66 @@ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, } } +int cifs_file_flush(const unsigned int xid, struct inode *inode, + struct cifsFileInfo *cfile) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifs_tcon *tcon; + int rc; + + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC) + return 0; + + if (cfile && (OPEN_FMODE(cfile->f_flags) & FMODE_WRITE)) { + tcon = tlink_tcon(cfile->tlink); + return tcon->ses->server->ops->flush(xid, tcon, + &cfile->fid); + } + rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); + if (!rc) { + tcon = tlink_tcon(cfile->tlink); + rc = tcon->ses->server->ops->flush(xid, tcon, &cfile->fid); + cifsFileInfo_put(cfile); + } else if (rc == -EBADF) { + rc = 0; + } + return rc; +} + +static int cifs_do_truncate(const unsigned int xid, struct dentry *dentry) +{ + struct cifsInodeInfo *cinode = CIFS_I(d_inode(dentry)); + struct inode *inode = d_inode(dentry); + struct cifsFileInfo *cfile = NULL; + struct TCP_Server_Info *server; + struct cifs_tcon *tcon; + int rc; + + rc = filemap_write_and_wait(inode->i_mapping); + if (is_interrupt_error(rc)) + return -ERESTARTSYS; + mapping_set_error(inode->i_mapping, rc); + + cfile = find_writable_file(cinode, FIND_WR_FSUID_ONLY); + rc = cifs_file_flush(xid, inode, cfile); + if (!rc) { + if (cfile) { + tcon = tlink_tcon(cfile->tlink); + server = tcon->ses->server; + rc = server->ops->set_file_size(xid, tcon, + cfile, 0, false); + } + if (!rc) { + netfs_resize_file(&cinode->netfs, 0, true); + cifs_setsize(inode, 0); + inode->i_blocks = 0; + } + } + if (cfile) + cifsFileInfo_put(cfile); + return rc; +} + int cifs_open(struct inode *inode, struct file *file) { @@ -996,6 +1064,12 @@ int cifs_open(struct inode *inode, struct file *file) file->f_op = &cifs_file_direct_ops; } + if (file->f_flags & O_TRUNC) { + rc = cifs_do_truncate(xid, file_dentry(file)); + if (rc) + goto out; + } + /* Get the cached handle as SMB2 close is deferred */ if (OPEN_FMODE(file->f_flags) & FMODE_WRITE) { rc = cifs_get_writable_path(tcon, full_path, @@ -2677,13 +2751,10 @@ cifs_get_readable_path(struct cifs_tcon *tcon, const char *name, int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, int datasync) { - unsigned int xid; - int rc = 0; - struct cifs_tcon *tcon; - struct TCP_Server_Info *server; struct cifsFileInfo *smbfile = file->private_data; struct inode *inode = file_inode(file); - struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + unsigned int xid; + int rc; rc = file_write_and_wait_range(file, start, end); if (rc) { @@ -2691,39 +2762,15 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end, return rc; } - xid = get_xid(); - - cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n", - file, datasync); + cifs_dbg(FYI, "%s: name=%pD datasync=0x%x\n", __func__, file, datasync); if (!CIFS_CACHE_READ(CIFS_I(inode))) { rc = cifs_zap_mapping(inode); - if (rc) { - cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc); - rc = 0; /* don't care about it in fsync */ - } + cifs_dbg(FYI, "%s: invalidate mapping: rc = %d\n", __func__, rc); } - tcon = tlink_tcon(smbfile->tlink); - if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { - server = tcon->ses->server; - if (server->ops->flush == NULL) { - rc = -ENOSYS; - goto strict_fsync_exit; - } - - if ((OPEN_FMODE(smbfile->f_flags) & FMODE_WRITE) == 0) { - smbfile = find_writable_file(CIFS_I(inode), FIND_WR_ANY); - if (smbfile) { - rc = server->ops->flush(xid, tcon, &smbfile->fid); - cifsFileInfo_put(smbfile); - } else - cifs_dbg(FYI, "ignore fsync for file not open for write\n"); - } else - rc = server->ops->flush(xid, tcon, &smbfile->fid); - } - -strict_fsync_exit: + xid = get_xid(); + rc = cifs_file_flush(xid, inode, smbfile); free_xid(xid); return rc; } diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 072383899e81..e60927b2a7c8 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -773,16 +773,14 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc, } - len = 0; value = strchr(key, '='); if (value) { if (value == key) continue; *value++ = 0; - len = strlen(value); } - ret = vfs_parse_fs_string(fc, key, value, len); + ret = vfs_parse_fs_string(fc, key, value); if (ret < 0) break; } @@ -1820,6 +1818,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } + /* + * Multichannel is not meaningful if max_channels is 1. + * Force multichannel to false to ensure consistent configuration. + */ + if (ctx->multichannel && ctx->max_channels == 1) + ctx->multichannel = false; + return 0; cifs_parse_mount_err: diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 0f0d2dae6283..239dd84a336f 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -2704,7 +2704,7 @@ cifs_dentry_needs_reval(struct dentry *dentry) return true; if (!open_cached_dir_by_dentry(tcon, dentry->d_parent, &cfid)) { - if (cfid->time && cifs_i->time > cfid->time) { + if (cifs_i->time > cfid->time) { close_cached_dir(cfid); return false; } @@ -2844,7 +2844,7 @@ int cifs_revalidate_dentry_attr(struct dentry *dentry) } cifs_dbg(FYI, "Update attributes: %s inode 0x%p count %d dentry: 0x%p d_time %ld jiffies %ld\n", - full_path, inode, inode->i_count.counter, + full_path, inode, icount_read(inode), dentry, cifs_get_time(dentry), jiffies); again: @@ -3007,28 +3007,25 @@ int cifs_fiemap(struct inode *inode, struct fiemap_extent_info *fei, u64 start, void cifs_setsize(struct inode *inode, loff_t offset) { - struct cifsInodeInfo *cifs_i = CIFS_I(inode); - spin_lock(&inode->i_lock); i_size_write(inode, offset); spin_unlock(&inode->i_lock); - - /* Cached inode must be refreshed on truncate */ - cifs_i->time = 0; + inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); truncate_pagecache(inode, offset); + netfs_wait_for_outstanding_io(inode); } -static int -cifs_set_file_size(struct inode *inode, struct iattr *attrs, - unsigned int xid, const char *full_path, struct dentry *dentry) +int cifs_file_set_size(const unsigned int xid, struct dentry *dentry, + const char *full_path, struct cifsFileInfo *open_file, + loff_t size) { - int rc; - struct cifsFileInfo *open_file; - struct cifsInodeInfo *cifsInode = CIFS_I(inode); + struct inode *inode = d_inode(dentry); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsInodeInfo *cifsInode = CIFS_I(inode); struct tcon_link *tlink = NULL; struct cifs_tcon *tcon = NULL; struct TCP_Server_Info *server; + int rc = -EINVAL; /* * To avoid spurious oplock breaks from server, in the case of @@ -3039,19 +3036,25 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, * writebehind data than the SMB timeout for the SetPathInfo * request would allow */ - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); - if (open_file) { + if (open_file && (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE)) { tcon = tlink_tcon(open_file->tlink); server = tcon->ses->server; - if (server->ops->set_file_size) - rc = server->ops->set_file_size(xid, tcon, open_file, - attrs->ia_size, false); - else - rc = -ENOSYS; - cifsFileInfo_put(open_file); - cifs_dbg(FYI, "SetFSize for attrs rc = %d\n", rc); - } else - rc = -EINVAL; + rc = server->ops->set_file_size(xid, tcon, + open_file, + size, false); + cifs_dbg(FYI, "%s: set_file_size: rc = %d\n", __func__, rc); + } else { + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + if (open_file) { + tcon = tlink_tcon(open_file->tlink); + server = tcon->ses->server; + rc = server->ops->set_file_size(xid, tcon, + open_file, + size, false); + cifs_dbg(FYI, "%s: set_file_size: rc = %d\n", __func__, rc); + cifsFileInfo_put(open_file); + } + } if (!rc) goto set_size_out; @@ -3069,20 +3072,15 @@ cifs_set_file_size(struct inode *inode, struct iattr *attrs, * valid, writeable file handle for it was found or because there was * an error setting it by handle. */ - if (server->ops->set_path_size) - rc = server->ops->set_path_size(xid, tcon, full_path, - attrs->ia_size, cifs_sb, false, dentry); - else - rc = -ENOSYS; - cifs_dbg(FYI, "SetEOF by path (setattrs) rc = %d\n", rc); - - if (tlink) - cifs_put_tlink(tlink); + rc = server->ops->set_path_size(xid, tcon, full_path, size, + cifs_sb, false, dentry); + cifs_dbg(FYI, "%s: SetEOF by path (setattrs) rc = %d\n", __func__, rc); + cifs_put_tlink(tlink); set_size_out: if (rc == 0) { - netfs_resize_file(&cifsInode->netfs, attrs->ia_size, true); - cifs_setsize(inode, attrs->ia_size); + netfs_resize_file(&cifsInode->netfs, size, true); + cifs_setsize(inode, size); /* * i_blocks is not related to (i_size / i_blksize), but instead * 512 byte (2**9) size is required for calculating num blocks. @@ -3090,15 +3088,7 @@ set_size_out: * this is best estimate we have for blocks allocated for a file * Number of blocks must be rounded up so size 1 is not 0 blocks */ - inode->i_blocks = (512 - 1 + attrs->ia_size) >> 9; - - /* - * The man page of truncate says if the size changed, - * then the st_ctime and st_mtime fields for the file - * are updated. - */ - attrs->ia_ctime = attrs->ia_mtime = current_time(inode); - attrs->ia_valid |= ATTR_CTIME | ATTR_MTIME; + inode->i_blocks = (512 - 1 + size) >> 9; } return rc; @@ -3118,7 +3108,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) struct tcon_link *tlink; struct cifs_tcon *pTcon; struct cifs_unix_set_info_args *args = NULL; - struct cifsFileInfo *open_file; + struct cifsFileInfo *open_file = NULL; cifs_dbg(FYI, "setattr_unix on file %pd attrs->ia_valid=0x%x\n", direntry, attrs->ia_valid); @@ -3132,6 +3122,9 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) if (rc < 0) goto out; + if (attrs->ia_valid & ATTR_FILE) + open_file = attrs->ia_file->private_data; + full_path = build_path_from_dentry(direntry, page); if (IS_ERR(full_path)) { rc = PTR_ERR(full_path); @@ -3159,9 +3152,16 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) rc = 0; if (attrs->ia_valid & ATTR_SIZE) { - rc = cifs_set_file_size(inode, attrs, xid, full_path, direntry); + rc = cifs_file_set_size(xid, direntry, full_path, + open_file, attrs->ia_size); if (rc != 0) goto out; + /* + * Avoid setting timestamps on the server for ftruncate(2) to + * prevent it from disabling automatic timestamp updates as per + * MS-FSA 2.1.4.17. + */ + attrs->ia_valid &= ~(ATTR_CTIME | ATTR_MTIME); } /* skip mode change if it's just for clearing setuid/setgid */ @@ -3206,14 +3206,24 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) args->ctime = NO_CHANGE_64; args->device = 0; - open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); - if (open_file) { - u16 nfid = open_file->fid.netfid; - u32 npid = open_file->pid; + rc = -EINVAL; + if (open_file && (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE)) { pTcon = tlink_tcon(open_file->tlink); - rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, nfid, npid); - cifsFileInfo_put(open_file); + rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, + open_file->fid.netfid, + open_file->pid); } else { + open_file = find_writable_file(cifsInode, FIND_WR_FSUID_ONLY); + if (open_file) { + pTcon = tlink_tcon(open_file->tlink); + rc = CIFSSMBUnixSetFileInfo(xid, pTcon, args, + open_file->fid.netfid, + open_file->pid); + cifsFileInfo_put(open_file); + } + } + + if (rc) { tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) { rc = PTR_ERR(tlink); @@ -3221,8 +3231,8 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs) } pTcon = tlink_tcon(tlink); rc = CIFSSMBUnixSetPathInfo(xid, pTcon, full_path, args, - cifs_sb->local_nls, - cifs_remap(cifs_sb)); + cifs_sb->local_nls, + cifs_remap(cifs_sb)); cifs_put_tlink(tlink); } @@ -3264,8 +3274,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) struct inode *inode = d_inode(direntry); struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); struct cifsInodeInfo *cifsInode = CIFS_I(inode); - struct cifsFileInfo *wfile; - struct cifs_tcon *tcon; + struct cifsFileInfo *cfile = NULL; const char *full_path; void *page = alloc_dentry_path(); int rc = -EACCES; @@ -3285,6 +3294,9 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) if (rc < 0) goto cifs_setattr_exit; + if (attrs->ia_valid & ATTR_FILE) + cfile = attrs->ia_file->private_data; + full_path = build_path_from_dentry(direntry, page); if (IS_ERR(full_path)) { rc = PTR_ERR(full_path); @@ -3311,25 +3323,23 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs) rc = 0; - if ((attrs->ia_valid & ATTR_MTIME) && - !(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) { - rc = cifs_get_writable_file(cifsInode, FIND_WR_ANY, &wfile); - if (!rc) { - tcon = tlink_tcon(wfile->tlink); - rc = tcon->ses->server->ops->flush(xid, tcon, &wfile->fid); - cifsFileInfo_put(wfile); - if (rc) - goto cifs_setattr_exit; - } else if (rc != -EBADF) + if (attrs->ia_valid & ATTR_MTIME) { + rc = cifs_file_flush(xid, inode, cfile); + if (rc) goto cifs_setattr_exit; - else - rc = 0; } if (attrs->ia_valid & ATTR_SIZE) { - rc = cifs_set_file_size(inode, attrs, xid, full_path, direntry); + rc = cifs_file_set_size(xid, direntry, full_path, + cfile, attrs->ia_size); if (rc != 0) goto cifs_setattr_exit; + /* + * Avoid setting timestamps on the server for ftruncate(2) to + * prevent it from disabling automatic timestamp updates as per + * MS-FSA 2.1.4.17. + */ + attrs->ia_valid &= ~(ATTR_CTIME | ATTR_MTIME); } if (attrs->ia_valid & ATTR_UID) @@ -3459,6 +3469,13 @@ cifs_setattr(struct mnt_idmap *idmap, struct dentry *direntry, if (unlikely(cifs_forced_shutdown(cifs_sb))) return -EIO; + /* + * Avoid setting [cm]time with O_TRUNC to prevent the server from + * disabling automatic timestamp updates as specified in + * MS-FSA 2.1.4.17. + */ + if (attrs->ia_valid & ATTR_OPEN) + return 0; do { #ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY diff --git a/fs/smb/client/readdir.c b/fs/smb/client/readdir.c index 4e5460206397..f0ce26622a14 100644 --- a/fs/smb/client/readdir.c +++ b/fs/smb/client/readdir.c @@ -874,39 +874,42 @@ static void finished_cached_dirents_count(struct cached_dirents *cde, cde->is_valid = 1; } -static void add_cached_dirent(struct cached_dirents *cde, - struct dir_context *ctx, - const char *name, int namelen, - struct cifs_fattr *fattr, - struct file *file) +static bool add_cached_dirent(struct cached_dirents *cde, + struct dir_context *ctx, const char *name, + int namelen, struct cifs_fattr *fattr, + struct file *file) { struct cached_dirent *de; if (cde->file != file) - return; + return false; if (cde->is_valid || cde->is_failed) - return; + return false; if (ctx->pos != cde->pos) { cde->is_failed = 1; - return; + return false; } de = kzalloc(sizeof(*de), GFP_ATOMIC); if (de == NULL) { cde->is_failed = 1; - return; + return false; } de->namelen = namelen; de->name = kstrndup(name, namelen, GFP_ATOMIC); if (de->name == NULL) { kfree(de); cde->is_failed = 1; - return; + return false; } de->pos = ctx->pos; memcpy(&de->fattr, fattr, sizeof(struct cifs_fattr)); list_add_tail(&de->entry, &cde->entries); + /* update accounting */ + cde->entries_count++; + cde->bytes_used += sizeof(*de) + (size_t)namelen + 1; + return true; } static bool cifs_dir_emit(struct dir_context *ctx, @@ -915,7 +918,8 @@ static bool cifs_dir_emit(struct dir_context *ctx, struct cached_fid *cfid, struct file *file) { - bool rc; + size_t delta_bytes = 0; + bool rc, added = false; ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); rc = dir_emit(ctx, name, namelen, ino, fattr->cf_dtype); @@ -923,10 +927,20 @@ static bool cifs_dir_emit(struct dir_context *ctx, return rc; if (cfid) { + /* Cost of this entry */ + delta_bytes = sizeof(struct cached_dirent) + (size_t)namelen + 1; + mutex_lock(&cfid->dirents.de_mutex); - add_cached_dirent(&cfid->dirents, ctx, name, namelen, - fattr, file); + added = add_cached_dirent(&cfid->dirents, ctx, name, namelen, + fattr, file); mutex_unlock(&cfid->dirents.de_mutex); + + if (added) { + /* per-tcon then global for consistency with free path */ + atomic64_add((long long)delta_bytes, &cfid->cfids->total_dirents_bytes); + atomic_long_inc(&cfid->cfids->total_dirents_entries); + atomic64_add((long long)delta_bytes, &cifs_dircache_bytes_used); + } } return rc; diff --git a/fs/smb/client/smb1ops.c b/fs/smb/client/smb1ops.c index a02d41d1ce4a..ca8f3dd7ff63 100644 --- a/fs/smb/client/smb1ops.c +++ b/fs/smb/client/smb1ops.c @@ -652,13 +652,71 @@ static int cifs_query_path_info(const unsigned int xid, #ifdef CONFIG_CIFS_XATTR /* + * For non-symlink WSL reparse points it is required to fetch + * EA $LXMOD which contains in its S_DT part the mandatory file type. + */ + if (!rc && data->reparse_point) { + struct smb2_file_full_ea_info *ea; + u32 next = 0; + + ea = (struct smb2_file_full_ea_info *)data->wsl.eas; + do { + ea = (void *)((u8 *)ea + next); + next = le32_to_cpu(ea->next_entry_offset); + } while (next); + if (le16_to_cpu(ea->ea_value_length)) { + ea->next_entry_offset = cpu_to_le32(ALIGN(sizeof(*ea) + + ea->ea_name_length + 1 + + le16_to_cpu(ea->ea_value_length), 4)); + ea = (void *)((u8 *)ea + le32_to_cpu(ea->next_entry_offset)); + } + + rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_MODE, + &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1], + SMB2_WSL_XATTR_MODE_SIZE, cifs_sb); + if (rc == SMB2_WSL_XATTR_MODE_SIZE) { + ea->next_entry_offset = cpu_to_le32(0); + ea->flags = 0; + ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN; + ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_MODE_SIZE); + memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_MODE, SMB2_WSL_XATTR_NAME_LEN + 1); + data->wsl.eas_len += ALIGN(sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 + + SMB2_WSL_XATTR_MODE_SIZE, 4); + rc = 0; + } else if (rc >= 0) { + /* It is an error if EA $LXMOD has wrong size. */ + rc = -EINVAL; + } else { + /* + * In all other cases ignore error if fetching + * of EA $LXMOD failed. It is needed only for + * non-symlink WSL reparse points and wsl_to_fattr() + * handle the case when EA is missing. + */ + rc = 0; + } + } + + /* * For WSL CHR and BLK reparse points it is required to fetch * EA $LXDEV which contains major and minor device numbers. */ if (!rc && data->reparse_point) { struct smb2_file_full_ea_info *ea; + u32 next = 0; ea = (struct smb2_file_full_ea_info *)data->wsl.eas; + do { + ea = (void *)((u8 *)ea + next); + next = le32_to_cpu(ea->next_entry_offset); + } while (next); + if (le16_to_cpu(ea->ea_value_length)) { + ea->next_entry_offset = cpu_to_le32(ALIGN(sizeof(*ea) + + ea->ea_name_length + 1 + + le16_to_cpu(ea->ea_value_length), 4)); + ea = (void *)((u8 *)ea + le32_to_cpu(ea->next_entry_offset)); + } + rc = CIFSSMBQAllEAs(xid, tcon, full_path, SMB2_WSL_XATTR_DEV, &ea->ea_data[SMB2_WSL_XATTR_NAME_LEN + 1], SMB2_WSL_XATTR_DEV_SIZE, cifs_sb); @@ -668,8 +726,8 @@ static int cifs_query_path_info(const unsigned int xid, ea->ea_name_length = SMB2_WSL_XATTR_NAME_LEN; ea->ea_value_length = cpu_to_le16(SMB2_WSL_XATTR_DEV_SIZE); memcpy(&ea->ea_data[0], SMB2_WSL_XATTR_DEV, SMB2_WSL_XATTR_NAME_LEN + 1); - data->wsl.eas_len = sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 + - SMB2_WSL_XATTR_DEV_SIZE; + data->wsl.eas_len += ALIGN(sizeof(*ea) + SMB2_WSL_XATTR_NAME_LEN + 1 + + SMB2_WSL_XATTR_MODE_SIZE, 4); rc = 0; } else if (rc >= 0) { /* It is an error if EA $LXDEV has wrong size. */ @@ -818,6 +876,11 @@ cifs_mkdir_setinfo(struct inode *inode, const char *full_path, info.Attributes = cpu_to_le32(dosattrs); rc = CIFSSMBSetPathInfo(xid, tcon, full_path, &info, cifs_sb->local_nls, cifs_sb); + if (rc == -EOPNOTSUPP || rc == -EINVAL) + rc = SMBSetInformation(xid, tcon, full_path, + info.Attributes, + 0 /* do not change write time */, + cifs_sb->local_nls, cifs_sb); if (rc == 0) cifsInode->cifsAttrs = dosattrs; } @@ -974,7 +1037,7 @@ smb_set_file_info(struct inode *inode, const char *full_path, .tcon = tcon, .cifs_sb = cifs_sb, .desired_access = SYNCHRONIZE | FILE_WRITE_ATTRIBUTES, - .create_options = cifs_create_options(cifs_sb, CREATE_NOT_DIR), + .create_options = cifs_create_options(cifs_sb, 0), .disposition = FILE_OPEN, .path = full_path, .fid = &fid, diff --git a/fs/smb/client/smb2inode.c b/fs/smb/client/smb2inode.c index 0985db9f86e5..09e3fc81d7cb 100644 --- a/fs/smb/client/smb2inode.c +++ b/fs/smb/client/smb2inode.c @@ -676,7 +676,7 @@ finished: idata->fi.EndOfFile = create_rsp->EndofFile; if (le32_to_cpu(idata->fi.NumberOfLinks) == 0) idata->fi.NumberOfLinks = cpu_to_le32(1); /* dummy value */ - idata->fi.DeletePending = 0; + idata->fi.DeletePending = 0; /* successful open = not delete pending */ idata->fi.Directory = !!(le32_to_cpu(create_rsp->FileAttributes) & ATTR_DIRECTORY); /* smb2_parse_contexts() fills idata->fi.IndexNumber */ @@ -1382,31 +1382,33 @@ int smb2_set_file_info(struct inode *inode, const char *full_path, FILE_BASIC_INFO *buf, const unsigned int xid) { - struct cifs_open_parms oparms; + struct kvec in_iov = { .iov_base = buf, .iov_len = sizeof(*buf), }; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); + struct cifsFileInfo *cfile = NULL; + struct cifs_open_parms oparms; struct tcon_link *tlink; struct cifs_tcon *tcon; - struct cifsFileInfo *cfile; - struct kvec in_iov = { .iov_base = buf, .iov_len = sizeof(*buf), }; - int rc; - - if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && - (buf->LastWriteTime == 0) && (buf->ChangeTime == 0) && - (buf->Attributes == 0)) - return 0; /* would be a no op, no sense sending this */ + int rc = 0; tlink = cifs_sb_tlink(cifs_sb); if (IS_ERR(tlink)) return PTR_ERR(tlink); tcon = tlink_tcon(tlink); - cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + if ((buf->CreationTime == 0) && (buf->LastAccessTime == 0) && + (buf->LastWriteTime == 0) && (buf->ChangeTime == 0)) { + if (buf->Attributes == 0) + goto out; /* would be a no op, no sense sending this */ + cifs_get_writable_path(tcon, full_path, FIND_WR_ANY, &cfile); + } + oparms = CIFS_OPARMS(cifs_sb, tcon, full_path, FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, ACL_NO_MODE); rc = smb2_compound_op(xid, tcon, cifs_sb, full_path, &oparms, &in_iov, &(int){SMB2_OP_SET_INFO}, 1, cfile, NULL, NULL, NULL); +out: cifs_put_tlink(tlink); return rc; } diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index e586f3f4b5c9..7c392cf5940b 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -504,8 +504,8 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) wsize = min_t(unsigned int, wsize, server->max_write); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { - struct smbdirect_socket_parameters *sp = - &server->smbd_conn->socket.parameters; + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); if (server->sign) /* @@ -555,8 +555,8 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) rsize = min_t(unsigned int, rsize, server->max_read); #ifdef CONFIG_CIFS_SMB_DIRECT if (server->rdma) { - struct smbdirect_socket_parameters *sp = - &server->smbd_conn->socket.parameters; + const struct smbdirect_socket_parameters *sp = + smbd_get_parameters(server->smbd_conn); if (server->sign) /* @@ -954,11 +954,8 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon, rc = open_cached_dir(xid, tcon, full_path, cifs_sb, true, &cfid); if (!rc) { - if (cfid->has_lease) { - close_cached_dir(cfid); - return 0; - } close_cached_dir(cfid); + return 0; } utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb); @@ -1806,140 +1803,226 @@ free_vars: return rc; } +/** + * calc_chunk_count - calculates the number chunks to be filled in the Chunks[] + * array of struct copychunk_ioctl + * + * @tcon: destination file tcon + * @bytes_left: how many bytes are left to copy + * + * Return: maximum number of chunks with which Chunks[] can be filled. + */ +static inline u32 +calc_chunk_count(struct cifs_tcon *tcon, u64 bytes_left) +{ + u32 max_chunks = READ_ONCE(tcon->max_chunks); + u32 max_bytes_copy = READ_ONCE(tcon->max_bytes_copy); + u32 max_bytes_chunk = READ_ONCE(tcon->max_bytes_chunk); + u64 need; + u32 allowed; + + if (!max_bytes_chunk || !max_bytes_copy || !max_chunks) + return 0; + + /* chunks needed for the remaining bytes */ + need = DIV_ROUND_UP_ULL(bytes_left, max_bytes_chunk); + /* chunks allowed per cc request */ + allowed = DIV_ROUND_UP(max_bytes_copy, max_bytes_chunk); + + return (u32)umin(need, umin(max_chunks, allowed)); +} + +/** + * smb2_copychunk_range - server-side copy of data range + * + * @xid: transaction id + * @src_file: source file + * @dst_file: destination file + * @src_off: source file byte offset + * @len: number of bytes to copy + * @dst_off: destination file byte offset + * + * Obtains a resume key for @src_file and issues FSCTL_SRV_COPYCHUNK_WRITE + * IOCTLs, splitting the request into chunks limited by tcon->max_*. + * + * Return: @len on success; negative errno on failure. + */ static ssize_t smb2_copychunk_range(const unsigned int xid, - struct cifsFileInfo *srcfile, - struct cifsFileInfo *trgtfile, u64 src_off, - u64 len, u64 dest_off) + struct cifsFileInfo *src_file, + struct cifsFileInfo *dst_file, + u64 src_off, + u64 len, + u64 dst_off) { - int rc; - unsigned int ret_data_len; - struct copychunk_ioctl *pcchunk; - struct copychunk_ioctl_rsp *retbuf = NULL; + int rc = 0; + unsigned int ret_data_len = 0; + struct copychunk_ioctl *cc_req = NULL; + struct copychunk_ioctl_rsp *cc_rsp = NULL; struct cifs_tcon *tcon; - int chunks_copied = 0; - bool chunk_sizes_updated = false; - ssize_t bytes_written, total_bytes_written = 0; + struct copychunk *chunk; + u32 chunks, chunk_count, chunk_bytes; + u32 copy_bytes, copy_bytes_left; + u32 chunks_written, bytes_written; + u64 total_bytes_left = len; + u64 src_off_prev, dst_off_prev; + u32 retries = 0; + + tcon = tlink_tcon(dst_file->tlink); + + trace_smb3_copychunk_enter(xid, src_file->fid.volatile_fid, + dst_file->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dst_off, len); + +retry: + chunk_count = calc_chunk_count(tcon, total_bytes_left); + if (!chunk_count) { + rc = -EOPNOTSUPP; + goto out; + } - pcchunk = kmalloc(sizeof(struct copychunk_ioctl), GFP_KERNEL); - if (pcchunk == NULL) - return -ENOMEM; + cc_req = kzalloc(struct_size(cc_req, Chunks, chunk_count), GFP_KERNEL); + if (!cc_req) { + rc = -ENOMEM; + goto out; + } - cifs_dbg(FYI, "%s: about to call request res key\n", __func__); /* Request a key from the server to identify the source of the copy */ - rc = SMB2_request_res_key(xid, tlink_tcon(srcfile->tlink), - srcfile->fid.persistent_fid, - srcfile->fid.volatile_fid, pcchunk); + rc = SMB2_request_res_key(xid, + tlink_tcon(src_file->tlink), + src_file->fid.persistent_fid, + src_file->fid.volatile_fid, + cc_req); - /* Note: request_res_key sets res_key null only if rc !=0 */ + /* Note: request_res_key sets res_key null only if rc != 0 */ if (rc) - goto cchunk_out; + goto out; + + while (total_bytes_left > 0) { - /* For now array only one chunk long, will make more flexible later */ - pcchunk->ChunkCount = cpu_to_le32(1); - pcchunk->Reserved = 0; - pcchunk->Reserved2 = 0; + /* Store previous offsets to allow rewind */ + src_off_prev = src_off; + dst_off_prev = dst_off; - tcon = tlink_tcon(trgtfile->tlink); + chunks = 0; + copy_bytes = 0; + copy_bytes_left = umin(total_bytes_left, tcon->max_bytes_copy); + while (copy_bytes_left > 0 && chunks < chunk_count) { + chunk = &cc_req->Chunks[chunks++]; - trace_smb3_copychunk_enter(xid, srcfile->fid.volatile_fid, - trgtfile->fid.volatile_fid, tcon->tid, - tcon->ses->Suid, src_off, dest_off, len); + chunk->SourceOffset = cpu_to_le64(src_off); + chunk->TargetOffset = cpu_to_le64(dst_off); - while (len > 0) { - pcchunk->SourceOffset = cpu_to_le64(src_off); - pcchunk->TargetOffset = cpu_to_le64(dest_off); - pcchunk->Length = - cpu_to_le32(min_t(u64, len, tcon->max_bytes_chunk)); + chunk_bytes = umin(copy_bytes_left, tcon->max_bytes_chunk); + + chunk->Length = cpu_to_le32(chunk_bytes); + /* Buffer is zeroed, no need to set chunk->Reserved = 0 */ + + src_off += chunk_bytes; + dst_off += chunk_bytes; + + copy_bytes_left -= chunk_bytes; + copy_bytes += chunk_bytes; + } + + cc_req->ChunkCount = cpu_to_le32(chunks); + /* Buffer is zeroed, no need to set cc_req->Reserved = 0 */ /* Request server copy to target from src identified by key */ - kfree(retbuf); - retbuf = NULL; - rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, - trgtfile->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, - (char *)pcchunk, sizeof(struct copychunk_ioctl), - CIFSMaxBufSize, (char **)&retbuf, &ret_data_len); + kfree(cc_rsp); + cc_rsp = NULL; + rc = SMB2_ioctl(xid, tcon, dst_file->fid.persistent_fid, + dst_file->fid.volatile_fid, FSCTL_SRV_COPYCHUNK_WRITE, + (char *)cc_req, struct_size(cc_req, Chunks, chunks), + CIFSMaxBufSize, (char **)&cc_rsp, &ret_data_len); + + if (rc && rc != -EINVAL) + goto out; + + if (unlikely(ret_data_len != sizeof(*cc_rsp))) { + cifs_tcon_dbg(VFS, "Copychunk invalid response: size %u/%zu\n", + ret_data_len, sizeof(*cc_rsp)); + rc = -EIO; + goto out; + } + + bytes_written = le32_to_cpu(cc_rsp->TotalBytesWritten); + chunks_written = le32_to_cpu(cc_rsp->ChunksWritten); + chunk_bytes = le32_to_cpu(cc_rsp->ChunkBytesWritten); + if (rc == 0) { - if (ret_data_len != - sizeof(struct copychunk_ioctl_rsp)) { - cifs_tcon_dbg(VFS, "Invalid cchunk response size\n"); - rc = -EIO; - goto cchunk_out; - } - if (retbuf->TotalBytesWritten == 0) { - cifs_dbg(FYI, "no bytes copied\n"); + /* Check if server claimed to write more than we asked */ + if (unlikely(!bytes_written || bytes_written > copy_bytes || + !chunks_written || chunks_written > chunks)) { + cifs_tcon_dbg(VFS, "Copychunk invalid response: bytes written %u/%u, chunks written %u/%u\n", + bytes_written, copy_bytes, chunks_written, chunks); rc = -EIO; - goto cchunk_out; - } - /* - * Check if server claimed to write more than we asked - */ - if (le32_to_cpu(retbuf->TotalBytesWritten) > - le32_to_cpu(pcchunk->Length)) { - cifs_tcon_dbg(VFS, "Invalid copy chunk response\n"); - rc = -EIO; - goto cchunk_out; + goto out; } - if (le32_to_cpu(retbuf->ChunksWritten) != 1) { - cifs_tcon_dbg(VFS, "Invalid num chunks written\n"); - rc = -EIO; - goto cchunk_out; + + /* Partial write: rewind */ + if (bytes_written < copy_bytes) { + u32 delta = copy_bytes - bytes_written; + + src_off -= delta; + dst_off -= delta; } - chunks_copied++; - - bytes_written = le32_to_cpu(retbuf->TotalBytesWritten); - src_off += bytes_written; - dest_off += bytes_written; - len -= bytes_written; - total_bytes_written += bytes_written; - - cifs_dbg(FYI, "Chunks %d PartialChunk %d Total %zu\n", - le32_to_cpu(retbuf->ChunksWritten), - le32_to_cpu(retbuf->ChunkBytesWritten), - bytes_written); - trace_smb3_copychunk_done(xid, srcfile->fid.volatile_fid, - trgtfile->fid.volatile_fid, tcon->tid, - tcon->ses->Suid, src_off, dest_off, len); - } else if (rc == -EINVAL) { - if (ret_data_len != sizeof(struct copychunk_ioctl_rsp)) - goto cchunk_out; - - cifs_dbg(FYI, "MaxChunks %d BytesChunk %d MaxCopy %d\n", - le32_to_cpu(retbuf->ChunksWritten), - le32_to_cpu(retbuf->ChunkBytesWritten), - le32_to_cpu(retbuf->TotalBytesWritten)); - /* - * Check if this is the first request using these sizes, - * (ie check if copy succeed once with original sizes - * and check if the server gave us different sizes after - * we already updated max sizes on previous request). - * if not then why is the server returning an error now - */ - if ((chunks_copied != 0) || chunk_sizes_updated) - goto cchunk_out; - - /* Check that server is not asking us to grow size */ - if (le32_to_cpu(retbuf->ChunkBytesWritten) < - tcon->max_bytes_chunk) - tcon->max_bytes_chunk = - le32_to_cpu(retbuf->ChunkBytesWritten); - else - goto cchunk_out; /* server gave us bogus size */ + total_bytes_left -= bytes_written; + continue; + } - /* No need to change MaxChunks since already set to 1 */ - chunk_sizes_updated = true; - } else - goto cchunk_out; + /* + * Check if server is not asking us to reduce size. + * + * Note: As per MS-SMB2 2.2.32.1, the values returned + * in cc_rsp are not strictly lower than what existed + * before. + */ + if (bytes_written < tcon->max_bytes_copy) { + cifs_tcon_dbg(FYI, "Copychunk MaxBytesCopy updated: %u -> %u\n", + tcon->max_bytes_copy, bytes_written); + tcon->max_bytes_copy = bytes_written; + } + + if (chunks_written < tcon->max_chunks) { + cifs_tcon_dbg(FYI, "Copychunk MaxChunks updated: %u -> %u\n", + tcon->max_chunks, chunks_written); + tcon->max_chunks = chunks_written; + } + + if (chunk_bytes < tcon->max_bytes_chunk) { + cifs_tcon_dbg(FYI, "Copychunk MaxBytesChunk updated: %u -> %u\n", + tcon->max_bytes_chunk, chunk_bytes); + tcon->max_bytes_chunk = chunk_bytes; + } + + /* reset to last offsets */ + if (retries++ < 2) { + src_off = src_off_prev; + dst_off = dst_off_prev; + kfree(cc_req); + cc_req = NULL; + goto retry; + } + + break; } -cchunk_out: - kfree(pcchunk); - kfree(retbuf); - if (rc) +out: + kfree(cc_req); + kfree(cc_rsp); + if (rc) { + trace_smb3_copychunk_err(xid, src_file->fid.volatile_fid, + dst_file->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dst_off, len, rc); return rc; - else - return total_bytes_written; + } else { + trace_smb3_copychunk_done(xid, src_file->fid.volatile_fid, + dst_file->fid.volatile_fid, tcon->tid, + tcon->ses->Suid, src_off, dst_off, len); + return len; + } } static int @@ -3284,7 +3367,6 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, trace_smb3_zero_enter(xid, cfile->fid.persistent_fid, tcon->tid, ses->Suid, offset, len); - inode_lock(inode); filemap_invalidate_lock(inode->i_mapping); i_size = i_size_read(inode); @@ -3302,6 +3384,7 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, * first, otherwise the data may be inconsistent with the server. */ truncate_pagecache_range(inode, offset, offset + len - 1); + netfs_wait_for_outstanding_io(inode); /* if file not oplocked can't be sure whether asking to extend size */ rc = -EOPNOTSUPP; @@ -3330,7 +3413,6 @@ static long smb3_zero_range(struct file *file, struct cifs_tcon *tcon, zero_range_exit: filemap_invalidate_unlock(inode->i_mapping); - inode_unlock(inode); free_xid(xid); if (rc) trace_smb3_zero_err(xid, cfile->fid.persistent_fid, tcon->tid, @@ -3354,7 +3436,6 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode_lock(inode); /* Need to make file sparse, if not already, before freeing range. */ /* Consider adding equivalent for compressed since it could also work */ if (!smb2_set_sparse(xid, tcon, cfile, inode, set_sparse)) { @@ -3368,6 +3449,7 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, * caches first, otherwise the data may be inconsistent with the server. */ truncate_pagecache_range(inode, offset, offset + len - 1); + netfs_wait_for_outstanding_io(inode); cifs_dbg(FYI, "Offset %lld len %lld\n", offset, len); @@ -3402,7 +3484,6 @@ static long smb3_punch_hole(struct file *file, struct cifs_tcon *tcon, unlock: filemap_invalidate_unlock(inode->i_mapping); out: - inode_unlock(inode); free_xid(xid); return rc; } @@ -3666,8 +3747,6 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode_lock(inode); - old_eof = i_size_read(inode); if ((off >= old_eof) || off + len >= old_eof) { @@ -3682,6 +3761,7 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, truncate_pagecache_range(inode, off, old_eof); ictx->zero_point = old_eof; + netfs_wait_for_outstanding_io(inode); rc = smb2_copychunk_range(xid, cfile, cfile, off + len, old_eof - off - len, off); @@ -3702,8 +3782,7 @@ static long smb3_collapse_range(struct file *file, struct cifs_tcon *tcon, fscache_resize_cookie(cifs_inode_cookie(inode), new_eof); out_2: filemap_invalidate_unlock(inode->i_mapping); - out: - inode_unlock(inode); +out: free_xid(xid); return rc; } @@ -3720,8 +3799,6 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, xid = get_xid(); - inode_lock(inode); - old_eof = i_size_read(inode); if (off >= old_eof) { rc = -EINVAL; @@ -3736,6 +3813,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, if (rc < 0) goto out_2; truncate_pagecache_range(inode, off, old_eof); + netfs_wait_for_outstanding_io(inode); rc = SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid, cfile->fid.volatile_fid, cfile->pid, new_eof); @@ -3758,8 +3836,7 @@ static long smb3_insert_range(struct file *file, struct cifs_tcon *tcon, rc = 0; out_2: filemap_invalidate_unlock(inode->i_mapping); - out: - inode_unlock(inode); +out: free_xid(xid); return rc; } @@ -4219,7 +4296,7 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len, static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst *rqst, int num_rqst, const u8 *sig, u8 **iv, struct aead_request **req, struct sg_table *sgt, - unsigned int *num_sgs, size_t *sensitive_size) + unsigned int *num_sgs) { unsigned int req_size = sizeof(**req) + crypto_aead_reqsize(tfm); unsigned int iv_size = crypto_aead_ivsize(tfm); @@ -4236,9 +4313,8 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst len += req_size; len = ALIGN(len, __alignof__(struct scatterlist)); len += array_size(*num_sgs, sizeof(struct scatterlist)); - *sensitive_size = len; - p = kvzalloc(len, GFP_NOFS); + p = kzalloc(len, GFP_NOFS); if (!p) return ERR_PTR(-ENOMEM); @@ -4252,16 +4328,14 @@ static void *smb2_aead_req_alloc(struct crypto_aead *tfm, const struct smb_rqst static void *smb2_get_aead_req(struct crypto_aead *tfm, struct smb_rqst *rqst, int num_rqst, const u8 *sig, u8 **iv, - struct aead_request **req, struct scatterlist **sgl, - size_t *sensitive_size) + struct aead_request **req, struct scatterlist **sgl) { struct sg_table sgtable = {}; unsigned int skip, num_sgs, i, j; ssize_t rc; void *p; - p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable, - &num_sgs, sensitive_size); + p = smb2_aead_req_alloc(tfm, rqst, num_rqst, sig, iv, req, &sgtable, &num_sgs); if (IS_ERR(p)) return ERR_CAST(p); @@ -4350,7 +4424,6 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, DECLARE_CRYPTO_WAIT(wait); unsigned int crypt_len = le32_to_cpu(tr_hdr->OriginalMessageSize); void *creq; - size_t sensitive_size; rc = smb2_get_enc_key(server, le64_to_cpu(tr_hdr->SessionId), enc, key); if (rc) { @@ -4376,8 +4449,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, return rc; } - creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg, - &sensitive_size); + creq = smb2_get_aead_req(tfm, rqst, num_rqst, sign, &iv, &req, &sg); if (IS_ERR(creq)) return PTR_ERR(creq); @@ -4407,7 +4479,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, if (!rc && enc) memcpy(&tr_hdr->Signature, sign, SMB2_SIGNATURE_SIZE); - kvfree_sensitive(creq, sensitive_size); + kfree_sensitive(creq); return rc; } @@ -4658,7 +4730,7 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, unsigned int pad_len; struct cifs_io_subrequest *rdata = mid->callback_data; struct smb2_hdr *shdr = (struct smb2_hdr *)buf; - int length; + size_t copied; bool use_rdma_mr = false; if (shdr->Command != SMB2_READ) { @@ -4771,10 +4843,10 @@ handle_read_data(struct TCP_Server_Info *server, struct mid_q_entry *mid, } else if (buf_len >= data_offset + data_len) { /* read response payload is in buf */ WARN_ONCE(buffer, "read data can be either in buf or in buffer"); - length = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); - if (length < 0) - return length; - rdata->got_bytes = data_len; + copied = copy_to_iter(buf + data_offset, data_len, &rdata->subreq.io_iter); + if (copied == 0) + return -EIO; + rdata->got_bytes = copied; } else { /* read response payload cannot be in both buf and pages */ WARN_ONCE(1, "buf can not contain only a part of read data"); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index c3b9d3f6210f..b0739a2661bf 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -240,8 +240,8 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, */ if (smb2_command != SMB2_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); - cifs_dbg(FYI, "can not send cmd %d while umounting\n", - smb2_command); + cifs_tcon_dbg(FYI, "can not send cmd %d while umounting\n", + smb2_command); return -ENODEV; } } @@ -296,9 +296,9 @@ again: return 0; } spin_unlock(&ses->chan_lock); - cifs_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d", - tcon->ses->chans_need_reconnect, - tcon->need_reconnect); + cifs_tcon_dbg(FYI, "sess reconnect mask: 0x%lx, tcon reconnect: %d\n", + tcon->ses->chans_need_reconnect, + tcon->need_reconnect); mutex_lock(&ses->session_mutex); /* @@ -392,11 +392,11 @@ skip_sess_setup: rc = cifs_tree_connect(0, tcon); - cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); + cifs_tcon_dbg(FYI, "reconnect tcon rc = %d\n", rc); if (rc) { /* If sess reconnected but tcon didn't, something strange ... */ mutex_unlock(&ses->session_mutex); - cifs_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); + cifs_tcon_dbg(VFS, "reconnect tcon failed rc = %d\n", rc); goto out; } @@ -442,8 +442,8 @@ skip_sess_setup: from_reconnect); goto skip_add_channels; } else if (rc) - cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", - __func__, rc); + cifs_tcon_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); if (ses->chan_max > ses->chan_count && ses->iface_count && @@ -3277,7 +3277,7 @@ replay_again: buf->EndOfFile = rsp->EndofFile; buf->Attributes = rsp->FileAttributes; buf->NumberOfLinks = cpu_to_le32(1); - buf->DeletePending = 0; + buf->DeletePending = 0; /* successful open = not delete pending */ } @@ -4411,7 +4411,7 @@ static inline bool smb3_use_rdma_offload(struct cifs_io_parms *io_parms) return false; /* offload also has its overhead, so only do it if desired */ - if (io_parms->length < server->smbd_conn->rdma_readwrite_threshold) + if (io_parms->length < server->rdma_readwrite_threshold) return false; return true; diff --git a/fs/smb/client/smb2pdu.h b/fs/smb/client/smb2pdu.h index 3c09a58dfd07..101024f8f725 100644 --- a/fs/smb/client/smb2pdu.h +++ b/fs/smb/client/smb2pdu.h @@ -201,16 +201,20 @@ struct resume_key_req { char Context[]; /* ignored, Windows sets to 4 bytes of zero */ } __packed; + +struct copychunk { + __le64 SourceOffset; + __le64 TargetOffset; + __le32 Length; + __le32 Reserved; +} __packed; + /* this goes in the ioctl buffer when doing a copychunk request */ struct copychunk_ioctl { char SourceKey[COPY_CHUNK_RES_KEY_SIZE]; - __le32 ChunkCount; /* we are only sending 1 */ + __le32 ChunkCount; __le32 Reserved; - /* array will only be one chunk long for us */ - __le64 SourceOffset; - __le64 TargetOffset; - __le32 Length; /* how many bytes to copy */ - __u32 Reserved2; + struct copychunk Chunks[]; } __packed; struct copychunk_ioctl_rsp { diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index bc0e92eb2b64..33f33013b392 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -240,11 +240,6 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) return NULL; } tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); - if (!tcon) { - spin_unlock(&cifs_tcp_ses_lock); - cifs_put_smb_ses(ses); - return NULL; - } spin_unlock(&cifs_tcp_ses_lock); /* tcon already has a ref to ses, so we don't need ses anymore */ cifs_put_smb_ses(ses); diff --git a/fs/smb/client/smbdirect.c b/fs/smb/client/smbdirect.c index e0fce5033004..316f398c70f4 100644 --- a/fs/smb/client/smbdirect.c +++ b/fs/smb/client/smbdirect.c @@ -13,28 +13,35 @@ #include "cifsproto.h" #include "smb2proto.h" +const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn) +{ + struct smbdirect_socket *sc = &conn->socket; + + return &sc->parameters; +} + static struct smbdirect_recv_io *get_receive_buffer( - struct smbd_connection *info); + struct smbdirect_socket *sc); static void put_receive_buffer( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response); -static int allocate_receive_buffers(struct smbd_connection *info, int num_buf); -static void destroy_receive_buffers(struct smbd_connection *info); +static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf); +static void destroy_receive_buffers(struct smbdirect_socket *sc); static void enqueue_reassembly( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response, int data_length); static struct smbdirect_recv_io *_get_first_reassembly( - struct smbd_connection *info); + struct smbdirect_socket *sc); static int smbd_post_recv( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response); -static int smbd_post_send_empty(struct smbd_connection *info); +static int smbd_post_send_empty(struct smbdirect_socket *sc); -static void destroy_mr_list(struct smbd_connection *info); -static int allocate_mr_list(struct smbd_connection *info); +static void destroy_mr_list(struct smbdirect_socket *sc); +static int allocate_mr_list(struct smbdirect_socket *sc); struct smb_extract_to_rdma { struct ib_sge *sge; @@ -57,6 +64,9 @@ static ssize_t smb_extract_iter_to_rdma(struct iov_iter *iter, size_t len, /* SMBD negotiation timeout in seconds */ #define SMBD_NEGOTIATE_TIMEOUT 120 +/* The timeout to wait for a keepalive message from peer in seconds */ +#define KEEPALIVE_RECV_TIMEOUT 5 + /* SMBD minimum receive size and fragmented sized defined in [MS-SMBD] */ #define SMBD_MIN_RECEIVE_SIZE 128 #define SMBD_MIN_FRAGMENTED_SIZE 131072 @@ -155,65 +165,277 @@ do { \ #define log_rdma_mr(level, fmt, args...) \ log_rdma(level, LOG_RDMA_MR, fmt, ##args) +static void smbd_disconnect_wake_up_all(struct smbdirect_socket *sc) +{ + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + wake_up_all(&sc->status_wait); + wake_up_all(&sc->send_io.credits.wait_queue); + wake_up_all(&sc->send_io.pending.dec_wait_queue); + wake_up_all(&sc->send_io.pending.zero_wait_queue); + wake_up_all(&sc->recv_io.reassembly.wait_queue); + wake_up_all(&sc->mr_io.ready.wait_queue); + wake_up_all(&sc->mr_io.cleanup.wait_queue); +} + static void smbd_disconnect_rdma_work(struct work_struct *work) { - struct smbd_connection *info = - container_of(work, struct smbd_connection, disconnect_work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, disconnect_work); - if (sc->status == SMBDIRECT_SOCKET_CONNECTED) { + /* + * make sure this and other work is not queued again + * but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->disconnect_work); + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_CONNECTED: + case SMBDIRECT_SOCKET_ERROR: sc->status = SMBDIRECT_SOCKET_DISCONNECTING; rdma_disconnect(sc->rdma.cm_id); + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + /* + * rdma_connect() never reached + * RDMA_CM_EVENT_ESTABLISHED + */ + sc->status = SMBDIRECT_SOCKET_DISCONNECTED; + break; + + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + break; } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbd_disconnect_wake_up_all(sc); } -static void smbd_disconnect_rdma_connection(struct smbd_connection *info) +static void smbd_disconnect_rdma_connection(struct smbdirect_socket *sc) { - queue_work(info->workqueue, &info->disconnect_work); + /* + * make sure other work (than disconnect_work) is + * not queued again but here we don't block and avoid + * disable[_delayed]_work_sync() + */ + disable_work(&sc->recv_io.posted.refill_work); + disable_work(&sc->mr_io.recovery_work); + disable_work(&sc->idle.immediate_work); + disable_delayed_work(&sc->idle.timer_work); + + if (sc->first_error == 0) + sc->first_error = -ECONNABORTED; + + switch (sc->status) { + case SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED: + case SMBDIRECT_SOCKET_NEGOTIATE_FAILED: + case SMBDIRECT_SOCKET_ERROR: + case SMBDIRECT_SOCKET_DISCONNECTING: + case SMBDIRECT_SOCKET_DISCONNECTED: + case SMBDIRECT_SOCKET_DESTROYED: + /* + * Keep the current error status + */ + break; + + case SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + break; + + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED: + case SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING: + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + break; + + case SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED: + case SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING: + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + break; + + case SMBDIRECT_SOCKET_NEGOTIATE_NEEDED: + case SMBDIRECT_SOCKET_NEGOTIATE_RUNNING: + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + break; + + case SMBDIRECT_SOCKET_CREATED: + case SMBDIRECT_SOCKET_CONNECTED: + sc->status = SMBDIRECT_SOCKET_ERROR; + break; + } + + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + */ + smbd_disconnect_wake_up_all(sc); + + queue_work(sc->workqueue, &sc->disconnect_work); } /* Upcall from RDMA CM */ static int smbd_conn_upcall( struct rdma_cm_id *id, struct rdma_cm_event *event) { - struct smbd_connection *info = id->context; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = id->context; + struct smbdirect_socket_parameters *sp = &sc->parameters; const char *event_name = rdma_event_msg(event->event); + u8 peer_initiator_depth; + u8 peer_responder_resources; log_rdma_event(INFO, "event=%s status=%d\n", event_name, event->status); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED; + wake_up(&sc->status_wait); + break; + case RDMA_CM_EVENT_ROUTE_RESOLVED: - info->ri_rc = 0; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED; + wake_up(&sc->status_wait); break; case RDMA_CM_EVENT_ADDR_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - info->ri_rc = -EHOSTUNREACH; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ROUTE_ERROR: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - info->ri_rc = -ENETUNREACH; - complete(&info->ri_done); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_ESTABLISHED: log_rdma_event(INFO, "connected event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_CONNECTED; - wake_up_interruptible(&info->status_wait); + + /* + * Here we work around an inconsistency between + * iWarp and other devices (at least rxe and irdma using RoCEv2) + */ + if (rdma_protocol_iwarp(id->device, id->port_num)) { + /* + * iWarp devices report the peer's values + * with the perspective of the peer here. + * Tested with siw and irdma (in iwarp mode) + * We need to change to our perspective here, + * so we need to switch the values. + */ + peer_initiator_depth = event->param.conn.responder_resources; + peer_responder_resources = event->param.conn.initiator_depth; + } else { + /* + * Non iWarp devices report the peer's values + * already changed to our perspective here. + * Tested with rxe and irdma (in roce mode). + */ + peer_initiator_depth = event->param.conn.initiator_depth; + peer_responder_resources = event->param.conn.responder_resources; + } + if (rdma_protocol_iwarp(id->device, id->port_num) && + event->param.conn.private_data_len == 8) { + /* + * Legacy clients with only iWarp MPA v1 support + * need a private blob in order to negotiate + * the IRD/ORD values. + */ + const __be32 *ird_ord_hdr = event->param.conn.private_data; + u32 ird32 = be32_to_cpu(ird_ord_hdr[0]); + u32 ord32 = be32_to_cpu(ird_ord_hdr[1]); + + /* + * cifs.ko sends the legacy IRD/ORD negotiation + * event if iWarp MPA v2 was used. + * + * Here we check that the values match and only + * mark the client as legacy if they don't match. + */ + if ((u32)event->param.conn.initiator_depth != ird32 || + (u32)event->param.conn.responder_resources != ord32) { + /* + * There are broken clients (old cifs.ko) + * using little endian and also + * struct rdma_conn_param only uses u8 + * for initiator_depth and responder_resources, + * so we truncate the value to U8_MAX. + * + * smb_direct_accept_client() will then + * do the real negotiation in order to + * select the minimum between client and + * server. + */ + ird32 = min_t(u32, ird32, U8_MAX); + ord32 = min_t(u32, ord32, U8_MAX); + + sc->rdma.legacy_iwarp = true; + peer_initiator_depth = (u8)ird32; + peer_responder_resources = (u8)ord32; + } + } + + /* + * negotiate the value by using the minimum + * between client and server if the client provided + * non 0 values. + */ + if (peer_initiator_depth != 0) + sp->initiator_depth = + min_t(u8, sp->initiator_depth, + peer_initiator_depth); + if (peer_responder_resources != 0) + sp->responder_resources = + min_t(u8, sp->responder_resources, + peer_responder_resources); + + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_NEEDED; + wake_up(&sc->status_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: case RDMA_CM_EVENT_UNREACHABLE: case RDMA_CM_EVENT_REJECTED: log_rdma_event(ERR, "connecting failed event=%s\n", event_name); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->status_wait); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_FAILED; + smbd_disconnect_rdma_work(&sc->disconnect_work); break; case RDMA_CM_EVENT_DEVICE_REMOVAL: @@ -221,15 +443,10 @@ static int smbd_conn_upcall( /* This happens when we fail the negotiation */ if (sc->status == SMBDIRECT_SOCKET_NEGOTIATE_FAILED) { log_rdma_event(ERR, "event=%s during negotiation\n", event_name); - sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up(&info->status_wait); - break; } sc->status = SMBDIRECT_SOCKET_DISCONNECTED; - wake_up_interruptible(&info->status_wait); - wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); - wake_up_interruptible_all(&info->wait_send_queue); + smbd_disconnect_rdma_work(&sc->disconnect_work); break; default: @@ -245,15 +462,15 @@ static int smbd_conn_upcall( static void smbd_qp_async_error_upcall(struct ib_event *event, void *context) { - struct smbd_connection *info = context; + struct smbdirect_socket *sc = context; - log_rdma_event(ERR, "%s on device %s info %p\n", - ib_event_msg(event->event), event->device->name, info); + log_rdma_event(ERR, "%s on device %s socket %p\n", + ib_event_msg(event->event), event->device->name, sc); switch (event->event) { case IB_EVENT_CQ_ERR: case IB_EVENT_QP_FATAL: - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); break; default: @@ -278,11 +495,9 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) struct smbdirect_send_io *request = container_of(wc->wr_cqe, struct smbdirect_send_io, cqe); struct smbdirect_socket *sc = request->socket; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); - log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%d\n", - request, wc->status); + log_rdma_send(INFO, "smbdirect_send_io 0x%p completed wc->status=%s\n", + request, ib_wc_status_msg(wc->status)); for (i = 0; i < request->num_sge; i++) ib_dma_unmap_single(sc->ib.dev, @@ -291,17 +506,18 @@ static void send_done(struct ib_cq *cq, struct ib_wc *wc) DMA_TO_DEVICE); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_SEND) { - log_rdma_send(ERR, "wc->status=%d wc->opcode=%d\n", - wc->status, wc->opcode); + if (wc->status != IB_WC_WR_FLUSH_ERR) + log_rdma_send(ERR, "wc->status=%s wc->opcode=%d\n", + ib_wc_status_msg(wc->status), wc->opcode); mempool_free(request, sc->send_io.mem.pool); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); return; } - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); - wake_up(&info->wait_post_send); + wake_up(&sc->send_io.pending.dec_wait_queue); mempool_free(request, sc->send_io.mem.pool); } @@ -325,8 +541,6 @@ static bool process_negotiation_response( struct smbdirect_recv_io *response, int packet_length) { struct smbdirect_socket *sc = response->socket; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); struct smbdirect_socket_parameters *sp = &sc->parameters; struct smbdirect_negotiate_resp *packet = smbdirect_recv_io_payload(response); @@ -341,21 +555,19 @@ static bool process_negotiation_response( le16_to_cpu(packet->negotiated_version)); return false; } - info->protocol = le16_to_cpu(packet->negotiated_version); if (packet->credits_requested == 0) { log_rdma_event(ERR, "error: credits_requested==0\n"); return false; } - info->receive_credit_target = le16_to_cpu(packet->credits_requested); + sc->recv_io.credits.target = le16_to_cpu(packet->credits_requested); + sc->recv_io.credits.target = min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); if (packet->credits_granted == 0) { log_rdma_event(ERR, "error: credits_granted==0\n"); return false; } - atomic_set(&info->send_credits, le16_to_cpu(packet->credits_granted)); - - atomic_set(&info->receive_credits, 0); + atomic_set(&sc->send_io.credits.count, le16_to_cpu(packet->credits_granted)); if (le32_to_cpu(packet->preferred_send_size) > sp->max_recv_size) { log_rdma_event(ERR, "error: preferred_send_size=%d\n", @@ -380,16 +592,12 @@ static bool process_negotiation_response( } sp->max_fragmented_send_size = le32_to_cpu(packet->max_fragmented_size); - info->rdma_readwrite_threshold = - rdma_readwrite_threshold > sp->max_fragmented_send_size ? - sp->max_fragmented_send_size : - rdma_readwrite_threshold; sp->max_read_write_size = min_t(u32, le32_to_cpu(packet->max_readwrite_size), - info->max_frmr_depth * PAGE_SIZE); - info->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; + sp->max_frmr_depth * PAGE_SIZE); + sp->max_frmr_depth = sp->max_read_write_size / PAGE_SIZE; sc->recv_io.expected = SMBDIRECT_EXPECT_DATA_TRANSFER; return true; @@ -397,52 +605,40 @@ static bool process_negotiation_response( static void smbd_post_send_credits(struct work_struct *work) { - int ret = 0; int rc; struct smbdirect_recv_io *response; - struct smbd_connection *info = - container_of(work, struct smbd_connection, - post_send_credits_work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, recv_io.posted.refill_work); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { - wake_up(&info->wait_receive_queues); return; } - if (info->receive_credit_target > - atomic_read(&info->receive_credits)) { + if (sc->recv_io.credits.target > + atomic_read(&sc->recv_io.credits.count)) { while (true) { - response = get_receive_buffer(info); + response = get_receive_buffer(sc); if (!response) break; response->first_segment = false; - rc = smbd_post_recv(info, response); + rc = smbd_post_recv(sc, response); if (rc) { log_rdma_recv(ERR, "post_recv failed rc=%d\n", rc); - put_receive_buffer(info, response); + put_receive_buffer(sc, response); break; } - ret++; + atomic_inc(&sc->recv_io.posted.count); } } - spin_lock(&info->lock_new_credits_offered); - info->new_credits_offered += ret; - spin_unlock(&info->lock_new_credits_offered); - /* Promptly send an immediate packet as defined in [MS-SMBD] 3.1.1.1 */ - info->send_immediate = true; - if (atomic_read(&info->receive_credits) < - info->receive_credit_target - 1) { - if (info->keep_alive_requested == KEEP_ALIVE_PENDING || - info->send_immediate) { - log_keep_alive(INFO, "send an empty message\n"); - smbd_post_send_empty(info); - } + if (atomic_read(&sc->recv_io.credits.count) < + sc->recv_io.credits.target - 1) { + log_keep_alive(INFO, "schedule send of an empty message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } } @@ -454,19 +650,22 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) container_of(wc->wr_cqe, struct smbdirect_recv_io, cqe); struct smbdirect_socket *sc = response->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; - struct smbd_connection *info = - container_of(sc, struct smbd_connection, socket); + u16 old_recv_credit_target; u32 data_offset = 0; u32 data_length = 0; u32 remaining_data_length = 0; + bool negotiate_done = false; - log_rdma_recv(INFO, "response=0x%p type=%d wc status=%d wc opcode %d byte_len=%d pkey_index=%u\n", - response, sc->recv_io.expected, wc->status, wc->opcode, + log_rdma_recv(INFO, + "response=0x%p type=%d wc status=%s wc opcode %d byte_len=%d pkey_index=%u\n", + response, sc->recv_io.expected, + ib_wc_status_msg(wc->status), wc->opcode, wc->byte_len, wc->pkey_index); if (wc->status != IB_WC_SUCCESS || wc->opcode != IB_WC_RECV) { - log_rdma_recv(INFO, "wc->status=%d opcode=%d\n", - wc->status, wc->opcode); + if (wc->status != IB_WC_WR_FLUSH_ERR) + log_rdma_recv(ERR, "wc->status=%s opcode=%d\n", + ib_wc_status_msg(wc->status), wc->opcode); goto error; } @@ -476,15 +675,31 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) response->sge.length, DMA_FROM_DEVICE); + /* + * Reset timer to the keepalive interval in + * order to trigger our next keepalive message. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_NONE; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_interval_msec)); + switch (sc->recv_io.expected) { /* SMBD negotiation response */ case SMBDIRECT_EXPECT_NEGOTIATE_REP: dump_smbdirect_negotiate_resp(smbdirect_recv_io_payload(response)); sc->recv_io.reassembly.full_packet_received = true; - info->negotiate_done = + negotiate_done = process_negotiation_response(response, wc->byte_len); - put_receive_buffer(info, response); - complete(&info->negotiate_completion); + put_receive_buffer(sc, response); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING); + if (!negotiate_done) { + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; + smbd_disconnect_rdma_connection(sc); + } else { + sc->status = SMBDIRECT_SOCKET_CONNECTED; + wake_up(&sc->status_wait); + } + return; /* SMBD data transfer packet */ @@ -517,17 +732,23 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) sc->recv_io.reassembly.full_packet_received = true; } - atomic_dec(&info->receive_credits); - info->receive_credit_target = + atomic_dec(&sc->recv_io.posted.count); + atomic_dec(&sc->recv_io.credits.count); + old_recv_credit_target = sc->recv_io.credits.target; + sc->recv_io.credits.target = le16_to_cpu(data_transfer->credits_requested); + sc->recv_io.credits.target = + min_t(u16, sc->recv_io.credits.target, sp->recv_credit_max); + sc->recv_io.credits.target = + max_t(u16, sc->recv_io.credits.target, 1); if (le16_to_cpu(data_transfer->credits_granted)) { atomic_add(le16_to_cpu(data_transfer->credits_granted), - &info->send_credits); + &sc->send_io.credits.count); /* * We have new send credits granted from remote peer * If any sender is waiting for credits, unblock it */ - wake_up_interruptible(&info->wait_send_queue); + wake_up(&sc->send_io.credits.wait_queue); } log_incoming(INFO, "data flags %d data_offset %d data_length %d remaining_data_length %d\n", @@ -536,11 +757,11 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) le32_to_cpu(data_transfer->data_length), le32_to_cpu(data_transfer->remaining_data_length)); - /* Send a KEEP_ALIVE response right away if requested */ - info->keep_alive_requested = KEEP_ALIVE_NONE; + /* Send an immediate response right away if requested */ if (le16_to_cpu(data_transfer->flags) & SMBDIRECT_FLAG_RESPONSE_REQUESTED) { - info->keep_alive_requested = KEEP_ALIVE_PENDING; + log_keep_alive(INFO, "schedule send of immediate response\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } /* @@ -548,10 +769,13 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) * reassembly queue and wake up the reading thread */ if (data_length) { - enqueue_reassembly(info, response, data_length); - wake_up_interruptible(&sc->recv_io.reassembly.wait_queue); + if (sc->recv_io.credits.target > old_recv_credit_target) + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); + + enqueue_reassembly(sc, response, data_length); + wake_up(&sc->recv_io.reassembly.wait_queue); } else - put_receive_buffer(info, response); + put_receive_buffer(sc, response); return; @@ -566,19 +790,20 @@ static void recv_done(struct ib_cq *cq, struct ib_wc *wc) log_rdma_recv(ERR, "unexpected response type=%d\n", sc->recv_io.expected); WARN_ON_ONCE(sc->recv_io.expected != SMBDIRECT_EXPECT_DATA_TRANSFER); error: - put_receive_buffer(info, response); - smbd_disconnect_rdma_connection(info); + put_receive_buffer(sc, response); + smbd_disconnect_rdma_connection(sc); } static struct rdma_cm_id *smbd_create_id( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct sockaddr *dstaddr, int port) { + struct smbdirect_socket_parameters *sp = &sc->parameters; struct rdma_cm_id *id; int rc; __be16 *sport; - id = rdma_create_id(&init_net, smbd_conn_upcall, info, + id = rdma_create_id(&init_net, smbd_conn_upcall, sc, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) { rc = PTR_ERR(id); @@ -593,43 +818,57 @@ static struct rdma_cm_id *smbd_create_id( *sport = htons(port); - init_completion(&info->ri_done); - info->ri_rc = -ETIMEDOUT; - + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING; rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)dstaddr, - RDMA_RESOLVE_TIMEOUT); + sp->resolve_addr_timeout_msec); if (rc) { log_rdma_event(ERR, "rdma_resolve_addr() failed %i\n", rc); goto out; } - rc = wait_for_completion_interruptible_timeout( - &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING, + msecs_to_jiffies(sp->resolve_addr_timeout_msec)); /* e.g. if interrupted returns -ERESTARTSYS */ if (rc < 0) { log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); goto out; } - rc = info->ri_rc; - if (rc) { + if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ADDR_RUNNING) { + rc = -ETIMEDOUT; + log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); + goto out; + } + if (sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED) { + rc = -EHOSTUNREACH; log_rdma_event(ERR, "rdma_resolve_addr() completed %i\n", rc); goto out; } - info->ri_rc = -ETIMEDOUT; - rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_NEEDED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING; + rc = rdma_resolve_route(id, sp->resolve_route_timeout_msec); if (rc) { log_rdma_event(ERR, "rdma_resolve_route() failed %i\n", rc); goto out; } - rc = wait_for_completion_interruptible_timeout( - &info->ri_done, msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING, + msecs_to_jiffies(sp->resolve_route_timeout_msec)); /* e.g. if interrupted returns -ERESTARTSYS */ if (rc < 0) { log_rdma_event(ERR, "rdma_resolve_addr timeout rc: %i\n", rc); goto out; } - rc = info->ri_rc; - if (rc) { + if (sc->status == SMBDIRECT_SOCKET_RESOLVE_ROUTE_RUNNING) { + rc = -ETIMEDOUT; + log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); + goto out; + } + if (sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED) { + rc = -ENETUNREACH; log_rdma_event(ERR, "rdma_resolve_route() completed %i\n", rc); goto out; } @@ -656,13 +895,16 @@ static bool frwr_is_supported(struct ib_device_attr *attrs) } static int smbd_ia_open( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct sockaddr *dstaddr, int port) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; - sc->rdma.cm_id = smbd_create_id(info, dstaddr, port); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_CREATED); + sc->status = SMBDIRECT_SOCKET_RESOLVE_ADDR_NEEDED; + + sc->rdma.cm_id = smbd_create_id(sc, dstaddr, port); if (IS_ERR(sc->rdma.cm_id)) { rc = PTR_ERR(sc->rdma.cm_id); goto out1; @@ -677,19 +919,12 @@ static int smbd_ia_open( rc = -EPROTONOSUPPORT; goto out2; } - info->max_frmr_depth = min_t(int, - smbd_max_frmr_depth, + sp->max_frmr_depth = min_t(u32, + sp->max_frmr_depth, sc->ib.dev->attrs.max_fast_reg_page_list_len); - info->mr_type = IB_MR_TYPE_MEM_REG; + sc->mr_io.type = IB_MR_TYPE_MEM_REG; if (sc->ib.dev->attrs.kernel_cap_flags & IBK_SG_GAPS_REG) - info->mr_type = IB_MR_TYPE_SG_GAPS; - - sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); - if (IS_ERR(sc->ib.pd)) { - rc = PTR_ERR(sc->ib.pd); - log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); - goto out2; - } + sc->mr_io.type = IB_MR_TYPE_SG_GAPS; return 0; @@ -707,9 +942,8 @@ out1: * After negotiation, the transport is connected and ready for * carrying upper layer SMB payload */ -static int smbd_post_send_negotiate_req(struct smbd_connection *info) +static int smbd_post_send_negotiate_req(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc = -ENOMEM; @@ -761,18 +995,18 @@ static int smbd_post_send_negotiate_req(struct smbd_connection *info) request->sge[0].addr, request->sge[0].length, request->sge[0].lkey); - atomic_inc(&info->send_pending); + atomic_inc(&sc->send_io.pending.count); rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (!rc) return 0; /* if we reach here, post send failed */ log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - atomic_dec(&info->send_pending); + atomic_dec(&sc->send_io.pending.count); ib_dma_unmap_single(sc->ib.dev, request->sge[0].addr, request->sge[0].length, DMA_TO_DEVICE); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); dma_mapping_failed: mempool_free(request, sc->send_io.mem.pool); @@ -787,14 +1021,20 @@ dma_mapping_failed: * buffer as possible, and extend the receive credits to remote peer * return value: the new credtis being granted. */ -static int manage_credits_prior_sending(struct smbd_connection *info) +static int manage_credits_prior_sending(struct smbdirect_socket *sc) { int new_credits; - spin_lock(&info->lock_new_credits_offered); - new_credits = info->new_credits_offered; - info->new_credits_offered = 0; - spin_unlock(&info->lock_new_credits_offered); + if (atomic_read(&sc->recv_io.credits.count) >= sc->recv_io.credits.target) + return 0; + + new_credits = atomic_read(&sc->recv_io.posted.count); + if (new_credits == 0) + return 0; + + new_credits -= atomic_read(&sc->recv_io.credits.count); + if (new_credits <= 0) + return 0; return new_credits; } @@ -808,21 +1048,27 @@ static int manage_credits_prior_sending(struct smbd_connection *info) * 1 if SMBDIRECT_FLAG_RESPONSE_REQUESTED needs to be set * 0: otherwise */ -static int manage_keep_alive_before_sending(struct smbd_connection *info) +static int manage_keep_alive_before_sending(struct smbdirect_socket *sc) { - if (info->keep_alive_requested == KEEP_ALIVE_PENDING) { - info->keep_alive_requested = KEEP_ALIVE_SENT; + struct smbdirect_socket_parameters *sp = &sc->parameters; + + if (sc->idle.keepalive == SMBDIRECT_KEEPALIVE_PENDING) { + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_SENT; + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); return 1; } return 0; } /* Post the send request */ -static int smbd_post_send(struct smbd_connection *info, +static int smbd_post_send(struct smbdirect_socket *sc, struct smbdirect_send_io *request) { - struct smbdirect_socket *sc = &info->socket; - struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_send_wr send_wr; int rc, i; @@ -849,21 +1095,17 @@ static int smbd_post_send(struct smbd_connection *info, rc = ib_post_send(sc->ib.qp, &send_wr, NULL); if (rc) { log_rdma_send(ERR, "ib_post_send failed rc=%d\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); rc = -EAGAIN; - } else - /* Reset timer for idle connection after packet is sent */ - mod_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); + } return rc; } -static int smbd_post_send_iter(struct smbd_connection *info, +static int smbd_post_send_iter(struct smbdirect_socket *sc, struct iov_iter *iter, int *_remaining_data_length) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; int i, rc; int header_length; @@ -874,8 +1116,8 @@ static int smbd_post_send_iter(struct smbd_connection *info, wait_credit: /* Wait for send credits. A SMBD packet needs one credit */ - rc = wait_event_interruptible(info->wait_send_queue, - atomic_read(&info->send_credits) > 0 || + rc = wait_event_interruptible(sc->send_io.credits.wait_queue, + atomic_read(&sc->send_io.credits.count) > 0 || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) goto err_wait_credit; @@ -885,14 +1127,14 @@ wait_credit: rc = -EAGAIN; goto err_wait_credit; } - if (unlikely(atomic_dec_return(&info->send_credits) < 0)) { - atomic_inc(&info->send_credits); + if (unlikely(atomic_dec_return(&sc->send_io.credits.count) < 0)) { + atomic_inc(&sc->send_io.credits.count); goto wait_credit; } wait_send_queue: - wait_event(info->wait_post_send, - atomic_read(&info->send_pending) < sp->send_credit_target || + wait_event(sc->send_io.pending.dec_wait_queue, + atomic_read(&sc->send_io.pending.count) < sp->send_credit_target || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { @@ -901,9 +1143,9 @@ wait_send_queue: goto err_wait_send_queue; } - if (unlikely(atomic_inc_return(&info->send_pending) > + if (unlikely(atomic_inc_return(&sc->send_io.pending.count) > sp->send_credit_target)) { - atomic_dec(&info->send_pending); + atomic_dec(&sc->send_io.pending.count); goto wait_send_queue; } @@ -916,10 +1158,30 @@ wait_send_queue: request->socket = sc; memset(request->sge, 0, sizeof(request->sge)); + /* Map the packet to DMA */ + header_length = sizeof(struct smbdirect_data_transfer); + /* If this is a packet without payload, don't send padding */ + if (!iter) + header_length = offsetof(struct smbdirect_data_transfer, padding); + + packet = smbdirect_send_io_payload(request); + request->sge[0].addr = ib_dma_map_single(sc->ib.dev, + (void *)packet, + header_length, + DMA_TO_DEVICE); + if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { + rc = -EIO; + goto err_dma; + } + + request->sge[0].length = header_length; + request->sge[0].lkey = sc->ib.pd->local_dma_lkey; + request->num_sge = 1; + /* Fill in the data payload to find out how much data we can add */ if (iter) { struct smb_extract_to_rdma extract = { - .nr_sge = 1, + .nr_sge = request->num_sge, .max_sge = SMBDIRECT_SEND_IO_MAX_SGE, .sge = request->sge, .device = sc->ib.dev, @@ -938,21 +1200,17 @@ wait_send_queue: *_remaining_data_length -= data_length; } else { data_length = 0; - request->num_sge = 1; } /* Fill in the packet header */ - packet = smbdirect_send_io_payload(request); packet->credits_requested = cpu_to_le16(sp->send_credit_target); - new_credits = manage_credits_prior_sending(info); - atomic_add(new_credits, &info->receive_credits); + new_credits = manage_credits_prior_sending(sc); + atomic_add(new_credits, &sc->recv_io.credits.count); packet->credits_granted = cpu_to_le16(new_credits); - info->send_immediate = false; - packet->flags = 0; - if (manage_keep_alive_before_sending(info)) + if (manage_keep_alive_before_sending(sc)) packet->flags |= cpu_to_le16(SMBDIRECT_FLAG_RESPONSE_REQUESTED); packet->reserved = 0; @@ -971,26 +1229,7 @@ wait_send_queue: le32_to_cpu(packet->data_length), le32_to_cpu(packet->remaining_data_length)); - /* Map the packet to DMA */ - header_length = sizeof(struct smbdirect_data_transfer); - /* If this is a packet without payload, don't send padding */ - if (!data_length) - header_length = offsetof(struct smbdirect_data_transfer, padding); - - request->sge[0].addr = ib_dma_map_single(sc->ib.dev, - (void *)packet, - header_length, - DMA_TO_DEVICE); - if (ib_dma_mapping_error(sc->ib.dev, request->sge[0].addr)) { - rc = -EIO; - request->sge[0].addr = 0; - goto err_dma; - } - - request->sge[0].length = header_length; - request->sge[0].lkey = sc->ib.pd->local_dma_lkey; - - rc = smbd_post_send(info, request); + rc = smbd_post_send(sc, request); if (!rc) return 0; @@ -1003,19 +1242,16 @@ err_dma: DMA_TO_DEVICE); mempool_free(request, sc->send_io.mem.pool); - /* roll back receive credits and credits to be offered */ - spin_lock(&info->lock_new_credits_offered); - info->new_credits_offered += new_credits; - spin_unlock(&info->lock_new_credits_offered); - atomic_sub(new_credits, &info->receive_credits); + /* roll back the granted receive credits */ + atomic_sub(new_credits, &sc->recv_io.credits.count); err_alloc: - if (atomic_dec_and_test(&info->send_pending)) - wake_up(&info->wait_send_pending); + if (atomic_dec_and_test(&sc->send_io.pending.count)) + wake_up(&sc->send_io.pending.zero_wait_queue); err_wait_send_queue: /* roll back send credits and pending */ - atomic_inc(&info->send_credits); + atomic_inc(&sc->send_io.credits.count); err_wait_credit: return rc; @@ -1026,15 +1262,15 @@ err_wait_credit: * Empty message is used to extend credits to peer to for keep live * while there is no upper layer payload to send at the time */ -static int smbd_post_send_empty(struct smbd_connection *info) +static int smbd_post_send_empty(struct smbdirect_socket *sc) { int remaining_data_length = 0; - info->count_send_empty++; - return smbd_post_send_iter(info, NULL, &remaining_data_length); + sc->statistics.send_empty++; + return smbd_post_send_iter(sc, NULL, &remaining_data_length); } -static int smbd_post_send_full_iter(struct smbd_connection *info, +static int smbd_post_send_full_iter(struct smbdirect_socket *sc, struct iov_iter *iter, int *_remaining_data_length) { @@ -1047,7 +1283,7 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, */ while (iov_iter_count(iter) > 0) { - rc = smbd_post_send_iter(info, iter, _remaining_data_length); + rc = smbd_post_send_iter(sc, iter, _remaining_data_length); if (rc < 0) break; } @@ -1061,9 +1297,8 @@ static int smbd_post_send_full_iter(struct smbd_connection *info, * The interaction is controlled by send/receive credit system */ static int smbd_post_recv( - struct smbd_connection *info, struct smbdirect_recv_io *response) + struct smbdirect_socket *sc, struct smbdirect_recv_io *response) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; struct ib_recv_wr recv_wr; int rc = -EIO; @@ -1089,7 +1324,7 @@ static int smbd_post_recv( ib_dma_unmap_single(sc->ib.dev, response->sge.addr, response->sge.length, DMA_FROM_DEVICE); response->sge.length = 0; - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); log_rdma_recv(ERR, "ib_post_recv failed rc=%d\n", rc); } @@ -1097,33 +1332,36 @@ static int smbd_post_recv( } /* Perform SMBD negotiate according to [MS-SMBD] 3.1.5.2 */ -static int smbd_negotiate(struct smbd_connection *info) +static int smbd_negotiate(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int rc; - struct smbdirect_recv_io *response = get_receive_buffer(info); + struct smbdirect_recv_io *response = get_receive_buffer(sc); + + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED); + sc->status = SMBDIRECT_SOCKET_NEGOTIATE_RUNNING; sc->recv_io.expected = SMBDIRECT_EXPECT_NEGOTIATE_REP; - rc = smbd_post_recv(info, response); + rc = smbd_post_recv(sc, response); log_rdma_event(INFO, "smbd_post_recv rc=%d iov.addr=0x%llx iov.length=%u iov.lkey=0x%x\n", rc, response->sge.addr, response->sge.length, response->sge.lkey); if (rc) { - put_receive_buffer(info, response); + put_receive_buffer(sc, response); return rc; } - init_completion(&info->negotiate_completion); - info->negotiate_done = false; - rc = smbd_post_send_negotiate_req(info); + rc = smbd_post_send_negotiate_req(sc); if (rc) return rc; - rc = wait_for_completion_interruptible_timeout( - &info->negotiate_completion, SMBD_NEGOTIATE_TIMEOUT * HZ); - log_rdma_event(INFO, "wait_for_completion_timeout rc=%d\n", rc); + rc = wait_event_interruptible_timeout( + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_NEGOTIATE_RUNNING, + msecs_to_jiffies(sp->negotiate_timeout_msec)); + log_rdma_event(INFO, "wait_event_interruptible_timeout rc=%d\n", rc); - if (info->negotiate_done) + if (sc->status == SMBDIRECT_SOCKET_CONNECTED) return 0; if (rc == 0) @@ -1147,13 +1385,13 @@ static int smbd_negotiate(struct smbd_connection *info) * data_length: the size of payload in this packet */ static void enqueue_reassembly( - struct smbd_connection *info, + struct smbdirect_socket *sc, struct smbdirect_recv_io *response, int data_length) { - struct smbdirect_socket *sc = &info->socket; + unsigned long flags; - spin_lock(&sc->recv_io.reassembly.lock); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); list_add_tail(&response->list, &sc->recv_io.reassembly.list); sc->recv_io.reassembly.queue_length++; /* @@ -1164,9 +1402,8 @@ static void enqueue_reassembly( */ virt_wmb(); sc->recv_io.reassembly.data_length += data_length; - spin_unlock(&sc->recv_io.reassembly.lock); - info->count_reassembly_queue++; - info->count_enqueue_reassembly_queue++; + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); + sc->statistics.enqueue_reassembly_queue++; } /* @@ -1174,9 +1411,8 @@ static void enqueue_reassembly( * Caller is responsible for locking * return value: the first entry if any, NULL if queue is empty */ -static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *info) +static struct smbdirect_recv_io *_get_first_reassembly(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; if (!list_empty(&sc->recv_io.reassembly.list)) { @@ -1193,9 +1429,8 @@ static struct smbdirect_recv_io *_get_first_reassembly(struct smbd_connection *i * pre-allocated in advance. * return value: the receive buffer, NULL if none is available */ -static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info) +static struct smbdirect_recv_io *get_receive_buffer(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *ret = NULL; unsigned long flags; @@ -1205,8 +1440,7 @@ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&ret->list); - info->count_receive_queue--; - info->count_get_receive_buffer++; + sc->statistics.get_receive_buffer++; } spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); @@ -1220,9 +1454,8 @@ static struct smbdirect_recv_io *get_receive_buffer(struct smbd_connection *info * receive buffer is returned. */ static void put_receive_buffer( - struct smbd_connection *info, struct smbdirect_recv_io *response) + struct smbdirect_socket *sc, struct smbdirect_recv_io *response) { - struct smbdirect_socket *sc = &info->socket; unsigned long flags; if (likely(response->sge.length != 0)) { @@ -1235,31 +1468,18 @@ static void put_receive_buffer( spin_lock_irqsave(&sc->recv_io.free.lock, flags); list_add_tail(&response->list, &sc->recv_io.free.list); - info->count_receive_queue++; - info->count_put_receive_buffer++; + sc->statistics.put_receive_buffer++; spin_unlock_irqrestore(&sc->recv_io.free.lock, flags); - queue_work(info->workqueue, &info->post_send_credits_work); + queue_work(sc->workqueue, &sc->recv_io.posted.refill_work); } /* Preallocate all receive buffer on transport establishment */ -static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) +static int allocate_receive_buffers(struct smbdirect_socket *sc, int num_buf) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; int i; - INIT_LIST_HEAD(&sc->recv_io.reassembly.list); - spin_lock_init(&sc->recv_io.reassembly.lock); - sc->recv_io.reassembly.data_length = 0; - sc->recv_io.reassembly.queue_length = 0; - - INIT_LIST_HEAD(&sc->recv_io.free.list); - spin_lock_init(&sc->recv_io.free.lock); - info->count_receive_queue = 0; - - init_waitqueue_head(&info->wait_receive_queues); - for (i = 0; i < num_buf; i++) { response = mempool_alloc(sc->recv_io.mem.pool, GFP_KERNEL); if (!response) @@ -1268,7 +1488,6 @@ static int allocate_receive_buffers(struct smbd_connection *info, int num_buf) response->socket = sc; response->sge.length = 0; list_add_tail(&response->list, &sc->recv_io.free.list); - info->count_receive_queue++; } return 0; @@ -1279,45 +1498,59 @@ allocate_failed: &sc->recv_io.free.list, struct smbdirect_recv_io, list); list_del(&response->list); - info->count_receive_queue--; mempool_free(response, sc->recv_io.mem.pool); } return -ENOMEM; } -static void destroy_receive_buffers(struct smbd_connection *info) +static void destroy_receive_buffers(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_recv_io *response; - while ((response = get_receive_buffer(info))) + while ((response = get_receive_buffer(sc))) mempool_free(response, sc->recv_io.mem.pool); } +static void send_immediate_empty_message(struct work_struct *work) +{ + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.immediate_work); + + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; + + log_keep_alive(INFO, "send an empty message\n"); + smbd_post_send_empty(sc); +} + /* Implement idle connection timer [MS-SMBD] 3.1.6.2 */ static void idle_connection_timer(struct work_struct *work) { - struct smbd_connection *info = container_of( - work, struct smbd_connection, - idle_timer_work.work); - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, idle.timer_work.work); struct smbdirect_socket_parameters *sp = &sc->parameters; - if (info->keep_alive_requested != KEEP_ALIVE_NONE) { + if (sc->idle.keepalive != SMBDIRECT_KEEPALIVE_NONE) { log_keep_alive(ERR, - "error status info->keep_alive_requested=%d\n", - info->keep_alive_requested); - smbd_disconnect_rdma_connection(info); + "error status sc->idle.keepalive=%d\n", + sc->idle.keepalive); + smbd_disconnect_rdma_connection(sc); return; } - log_keep_alive(INFO, "about to send an empty idle message\n"); - smbd_post_send_empty(info); + if (sc->status != SMBDIRECT_SOCKET_CONNECTED) + return; - /* Setup the next idle timeout work */ - queue_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); + /* + * Now use the keepalive timeout (instead of keepalive interval) + * in order to wait for a response + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->keepalive_timeout_msec)); + log_keep_alive(INFO, "schedule send of empty idle message\n"); + queue_work(sc->workqueue, &sc->idle.immediate_work); } /* @@ -1329,7 +1562,6 @@ void smbd_destroy(struct TCP_Server_Info *server) { struct smbd_connection *info = server->smbd_conn; struct smbdirect_socket *sc; - struct smbdirect_socket_parameters *sp; struct smbdirect_recv_io *response; unsigned long flags; @@ -1338,19 +1570,30 @@ void smbd_destroy(struct TCP_Server_Info *server) return; } sc = &info->socket; - sp = &sc->parameters; + + log_rdma_event(INFO, "cancelling and disable disconnect_work\n"); + disable_work_sync(&sc->disconnect_work); log_rdma_event(INFO, "destroying rdma session\n"); - if (sc->status != SMBDIRECT_SOCKET_DISCONNECTED) { - rdma_disconnect(sc->rdma.cm_id); + if (sc->status < SMBDIRECT_SOCKET_DISCONNECTING) { + smbd_disconnect_rdma_work(&sc->disconnect_work); log_rdma_event(INFO, "wait for transport being disconnected\n"); wait_event_interruptible( - info->status_wait, + sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); } - log_rdma_event(INFO, "cancelling post_send_credits_work\n"); - disable_work_sync(&info->post_send_credits_work); + /* + * Wake up all waiters in all wait queues + * in order to notice the broken connection. + * + * Most likely this was already called via + * smbd_disconnect_rdma_work(), but call it again... + */ + smbd_disconnect_wake_up_all(sc); + + log_rdma_event(INFO, "cancelling recv_io.posted.refill_work\n"); + disable_work_sync(&sc->recv_io.posted.refill_work); log_rdma_event(INFO, "destroying qp\n"); ib_drain_qp(sc->ib.qp); @@ -1358,18 +1601,20 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->ib.qp = NULL; log_rdma_event(INFO, "cancelling idle timer\n"); - disable_delayed_work_sync(&info->idle_timer_work); + disable_delayed_work_sync(&sc->idle.timer_work); + log_rdma_event(INFO, "cancelling send immediate work\n"); + disable_work_sync(&sc->idle.immediate_work); /* It's not possible for upper layer to get to reassembly */ log_rdma_event(INFO, "drain the reassembly queue\n"); do { spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); - response = _get_first_reassembly(info); + response = _get_first_reassembly(sc); if (response) { list_del(&response->list); spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); - put_receive_buffer(info, response); + put_receive_buffer(sc, response); } else spin_unlock_irqrestore( &sc->recv_io.reassembly.lock, flags); @@ -1377,9 +1622,7 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->recv_io.reassembly.data_length = 0; log_rdma_event(INFO, "free receive buffers\n"); - wait_event(info->wait_receive_queues, - info->count_receive_queue == sp->recv_credit_max); - destroy_receive_buffers(info); + destroy_receive_buffers(sc); /* * For performance reasons, memory registration and deregistration @@ -1389,13 +1632,12 @@ void smbd_destroy(struct TCP_Server_Info *server) * path when sending data, and then release memory registrations. */ log_rdma_event(INFO, "freeing mr list\n"); - wake_up_interruptible_all(&info->wait_mr); - while (atomic_read(&info->mr_used_count)) { + while (atomic_read(&sc->mr_io.used.count)) { cifs_server_unlock(server); msleep(1000); cifs_server_lock(server); } - destroy_mr_list(info); + destroy_mr_list(sc); ib_free_cq(sc->ib.send_cq); ib_free_cq(sc->ib.recv_cq); @@ -1411,7 +1653,7 @@ void smbd_destroy(struct TCP_Server_Info *server) sc->status = SMBDIRECT_SOCKET_DESTROYED; - destroy_workqueue(info->workqueue); + destroy_workqueue(sc->workqueue); log_rdma_event(INFO, "rdma session destroyed\n"); kfree(info); server->smbd_conn = NULL; @@ -1453,12 +1695,9 @@ create_conn: return -ENOENT; } -static void destroy_caches_and_workqueue(struct smbd_connection *info) +static void destroy_caches(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - - destroy_receive_buffers(info); - destroy_workqueue(info->workqueue); + destroy_receive_buffers(sc); mempool_destroy(sc->recv_io.mem.pool); kmem_cache_destroy(sc->recv_io.mem.cache); mempool_destroy(sc->send_io.mem.pool); @@ -1466,9 +1705,8 @@ static void destroy_caches_and_workqueue(struct smbd_connection *info) } #define MAX_NAME_LEN 80 -static int allocate_caches_and_workqueue(struct smbd_connection *info) +static int allocate_caches(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; struct smbdirect_socket_parameters *sp = &sc->parameters; char name[MAX_NAME_LEN]; int rc; @@ -1476,7 +1714,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (WARN_ON_ONCE(sp->max_recv_size < sizeof(struct smbdirect_data_transfer))) return -ENOMEM; - scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_send_io_%p", sc); sc->send_io.mem.cache = kmem_cache_create( name, @@ -1492,7 +1730,7 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!sc->send_io.mem.pool) goto out1; - scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", info); + scnprintf(name, MAX_NAME_LEN, "smbdirect_recv_io_%p", sc); struct kmem_cache_args response_args = { .align = __alignof__(struct smbdirect_recv_io), @@ -1513,21 +1751,14 @@ static int allocate_caches_and_workqueue(struct smbd_connection *info) if (!sc->recv_io.mem.pool) goto out3; - scnprintf(name, MAX_NAME_LEN, "smbd_%p", info); - info->workqueue = create_workqueue(name); - if (!info->workqueue) - goto out4; - - rc = allocate_receive_buffers(info, sp->recv_credit_max); + rc = allocate_receive_buffers(sc, sp->recv_credit_max); if (rc) { log_rdma_event(ERR, "failed to allocate receive buffers\n"); - goto out5; + goto out4; } return 0; -out5: - destroy_workqueue(info->workqueue); out4: mempool_destroy(sc->recv_io.mem.pool); out3: @@ -1551,46 +1782,63 @@ static struct smbd_connection *_smbd_get_connection( struct ib_qp_init_attr qp_attr; struct sockaddr_in *addr_in = (struct sockaddr_in *) dstaddr; struct ib_port_immutable port_immutable; - u32 ird_ord_hdr[2]; + __be32 ird_ord_hdr[2]; + char wq_name[80]; + struct workqueue_struct *workqueue; info = kzalloc(sizeof(struct smbd_connection), GFP_KERNEL); if (!info) return NULL; sc = &info->socket; + scnprintf(wq_name, ARRAY_SIZE(wq_name), "smbd_%p", sc); + workqueue = create_workqueue(wq_name); + if (!workqueue) + goto create_wq_failed; + smbdirect_socket_init(sc); + sc->workqueue = workqueue; sp = &sc->parameters; - sc->status = SMBDIRECT_SOCKET_CONNECTING; - rc = smbd_ia_open(info, dstaddr, port); + INIT_WORK(&sc->disconnect_work, smbd_disconnect_rdma_work); + + sp->resolve_addr_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->resolve_route_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->rdma_connect_timeout_msec = RDMA_RESOLVE_TIMEOUT; + sp->negotiate_timeout_msec = SMBD_NEGOTIATE_TIMEOUT * 1000; + sp->initiator_depth = 1; + sp->responder_resources = SMBD_CM_RESPONDER_RESOURCES; + sp->recv_credit_max = smbd_receive_credit_max; + sp->send_credit_target = smbd_send_credit_target; + sp->max_send_size = smbd_max_send_size; + sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; + sp->max_recv_size = smbd_max_receive_size; + sp->max_frmr_depth = smbd_max_frmr_depth; + sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; + sp->keepalive_timeout_msec = KEEPALIVE_RECV_TIMEOUT * 1000; + + rc = smbd_ia_open(sc, dstaddr, port); if (rc) { log_rdma_event(INFO, "smbd_ia_open rc=%d\n", rc); goto create_id_failed; } - if (smbd_send_credit_target > sc->ib.dev->attrs.max_cqe || - smbd_send_credit_target > sc->ib.dev->attrs.max_qp_wr) { + if (sp->send_credit_target > sc->ib.dev->attrs.max_cqe || + sp->send_credit_target > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering send_credit_target = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - smbd_send_credit_target, + sp->send_credit_target, sc->ib.dev->attrs.max_cqe, sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - if (smbd_receive_credit_max > sc->ib.dev->attrs.max_cqe || - smbd_receive_credit_max > sc->ib.dev->attrs.max_qp_wr) { + if (sp->recv_credit_max > sc->ib.dev->attrs.max_cqe || + sp->recv_credit_max > sc->ib.dev->attrs.max_qp_wr) { log_rdma_event(ERR, "consider lowering receive_credit_max = %d. Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n", - smbd_receive_credit_max, + sp->recv_credit_max, sc->ib.dev->attrs.max_cqe, sc->ib.dev->attrs.max_qp_wr); goto config_failed; } - sp->recv_credit_max = smbd_receive_credit_max; - sp->send_credit_target = smbd_send_credit_target; - sp->max_send_size = smbd_max_send_size; - sp->max_fragmented_recv_size = smbd_max_fragmented_recv_size; - sp->max_recv_size = smbd_max_receive_size; - sp->keepalive_interval_msec = smbd_keep_alive_interval * 1000; - if (sc->ib.dev->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE || sc->ib.dev->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) { log_rdma_event(ERR, @@ -1602,8 +1850,16 @@ static struct smbd_connection *_smbd_get_connection( goto config_failed; } + sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0); + if (IS_ERR(sc->ib.pd)) { + rc = PTR_ERR(sc->ib.pd); + sc->ib.pd = NULL; + log_rdma_event(ERR, "ib_alloc_pd() returned %d\n", rc); + goto alloc_pd_failed; + } + sc->ib.send_cq = - ib_alloc_cq_any(sc->ib.dev, info, + ib_alloc_cq_any(sc->ib.dev, sc, sp->send_credit_target, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.send_cq)) { sc->ib.send_cq = NULL; @@ -1611,7 +1867,7 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.recv_cq = - ib_alloc_cq_any(sc->ib.dev, info, + ib_alloc_cq_any(sc->ib.dev, sc, sp->recv_credit_max, IB_POLL_SOFTIRQ); if (IS_ERR(sc->ib.recv_cq)) { sc->ib.recv_cq = NULL; @@ -1620,7 +1876,7 @@ static struct smbd_connection *_smbd_get_connection( memset(&qp_attr, 0, sizeof(qp_attr)); qp_attr.event_handler = smbd_qp_async_error_upcall; - qp_attr.qp_context = info; + qp_attr.qp_context = sc; qp_attr.cap.max_send_wr = sp->send_credit_target; qp_attr.cap.max_recv_wr = sp->recv_credit_max; qp_attr.cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE; @@ -1639,22 +1895,22 @@ static struct smbd_connection *_smbd_get_connection( } sc->ib.qp = sc->rdma.cm_id->qp; - memset(&conn_param, 0, sizeof(conn_param)); - conn_param.initiator_depth = 0; - - conn_param.responder_resources = - min(sc->ib.dev->attrs.max_qp_rd_atom, - SMBD_CM_RESPONDER_RESOURCES); - info->responder_resources = conn_param.responder_resources; + sp->responder_resources = + min_t(u8, sp->responder_resources, + sc->ib.dev->attrs.max_qp_rd_atom); log_rdma_mr(INFO, "responder_resources=%d\n", - info->responder_resources); + sp->responder_resources); + + memset(&conn_param, 0, sizeof(conn_param)); + conn_param.initiator_depth = sp->initiator_depth; + conn_param.responder_resources = sp->responder_resources; /* Need to send IRD/ORD in private data for iWARP */ sc->ib.dev->ops.get_port_immutable( sc->ib.dev, sc->rdma.cm_id->port_num, &port_immutable); if (port_immutable.core_cap_flags & RDMA_CORE_PORT_IWARP) { - ird_ord_hdr[0] = info->responder_resources; - ird_ord_hdr[1] = 1; + ird_ord_hdr[0] = cpu_to_be32(conn_param.responder_resources); + ird_ord_hdr[1] = cpu_to_be32(conn_param.initiator_depth); conn_param.private_data = ird_ord_hdr; conn_param.private_data_len = sizeof(ird_ord_hdr); } else { @@ -1669,8 +1925,8 @@ static struct smbd_connection *_smbd_get_connection( log_rdma_event(INFO, "connecting to IP %pI4 port %d\n", &addr_in->sin_addr, port); - init_waitqueue_head(&info->status_wait); - init_waitqueue_head(&sc->recv_io.reassembly.wait_queue); + WARN_ON_ONCE(sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_NEEDED); + sc->status = SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING; rc = rdma_connect(sc->rdma.cm_id, &conn_param); if (rc) { log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc); @@ -1678,45 +1934,42 @@ static struct smbd_connection *_smbd_get_connection( } wait_event_interruptible_timeout( - info->status_wait, - sc->status != SMBDIRECT_SOCKET_CONNECTING, - msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT)); + sc->status_wait, + sc->status != SMBDIRECT_SOCKET_RDMA_CONNECT_RUNNING, + msecs_to_jiffies(sp->rdma_connect_timeout_msec)); - if (sc->status != SMBDIRECT_SOCKET_CONNECTED) { + if (sc->status != SMBDIRECT_SOCKET_NEGOTIATE_NEEDED) { log_rdma_event(ERR, "rdma_connect failed port=%d\n", port); goto rdma_connect_failed; } log_rdma_event(INFO, "rdma_connect connected\n"); - rc = allocate_caches_and_workqueue(info); + rc = allocate_caches(sc); if (rc) { log_rdma_event(ERR, "cache allocation failed\n"); goto allocate_cache_failed; } - init_waitqueue_head(&info->wait_send_queue); - INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer); - queue_delayed_work(info->workqueue, &info->idle_timer_work, - msecs_to_jiffies(sp->keepalive_interval_msec)); - - init_waitqueue_head(&info->wait_send_pending); - atomic_set(&info->send_pending, 0); - - init_waitqueue_head(&info->wait_post_send); + INIT_WORK(&sc->idle.immediate_work, send_immediate_empty_message); + INIT_DELAYED_WORK(&sc->idle.timer_work, idle_connection_timer); + /* + * start with the negotiate timeout and SMBDIRECT_KEEPALIVE_PENDING + * so that the timer will cause a disconnect. + */ + sc->idle.keepalive = SMBDIRECT_KEEPALIVE_PENDING; + mod_delayed_work(sc->workqueue, &sc->idle.timer_work, + msecs_to_jiffies(sp->negotiate_timeout_msec)); - INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work); - INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits); - info->new_credits_offered = 0; - spin_lock_init(&info->lock_new_credits_offered); + INIT_WORK(&sc->recv_io.posted.refill_work, smbd_post_send_credits); - rc = smbd_negotiate(info); + rc = smbd_negotiate(sc); if (rc) { log_rdma_event(ERR, "smbd_negotiate rc=%d\n", rc); goto negotiation_failed; } - rc = allocate_mr_list(info); + rc = allocate_mr_list(sc); if (rc) { log_rdma_mr(ERR, "memory registration allocation failed\n"); goto allocate_mr_failed; @@ -1731,11 +1984,11 @@ allocate_mr_failed: return NULL; negotiation_failed: - disable_delayed_work_sync(&info->idle_timer_work); - destroy_caches_and_workqueue(info); + disable_delayed_work_sync(&sc->idle.timer_work); + destroy_caches(sc); sc->status = SMBDIRECT_SOCKET_NEGOTIATE_FAILED; rdma_disconnect(sc->rdma.cm_id); - wait_event(info->status_wait, + wait_event(sc->status_wait, sc->status == SMBDIRECT_SOCKET_DISCONNECTED); allocate_cache_failed: @@ -1749,11 +2002,15 @@ alloc_cq_failed: if (sc->ib.recv_cq) ib_free_cq(sc->ib.recv_cq); -config_failed: ib_dealloc_pd(sc->ib.pd); + +alloc_pd_failed: +config_failed: rdma_destroy_id(sc->rdma.cm_id); create_id_failed: + destroy_workqueue(sc->workqueue); +create_wq_failed: kfree(info); return NULL; } @@ -1762,6 +2019,7 @@ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr) { struct smbd_connection *ret; + const struct smbdirect_socket_parameters *sp; int port = SMBD_PORT; try_again: @@ -1772,6 +2030,16 @@ try_again: port = SMB_PORT; goto try_again; } + if (!ret) + return NULL; + + sp = &ret->socket.parameters; + + server->rdma_readwrite_threshold = + rdma_readwrite_threshold > sp->max_fragmented_send_size ? + sp->max_fragmented_send_size : + rdma_readwrite_threshold; + return ret; } @@ -1813,6 +2081,7 @@ again: if (sc->recv_io.reassembly.data_length >= size) { int queue_length; int queue_removed = 0; + unsigned long flags; /* * Need to make sure reassembly_data_length is read before @@ -1827,7 +2096,7 @@ again: to_read = size; offset = sc->recv_io.reassembly.first_entry_offset; while (data_read < size) { - response = _get_first_reassembly(info); + response = _get_first_reassembly(sc); data_transfer = smbdirect_recv_io_payload(response); data_length = le32_to_cpu(data_transfer->data_length); remaining_data_length = @@ -1872,16 +2141,15 @@ again: if (queue_length) list_del(&response->list); else { - spin_lock_irq( - &sc->recv_io.reassembly.lock); + spin_lock_irqsave( + &sc->recv_io.reassembly.lock, flags); list_del(&response->list); - spin_unlock_irq( - &sc->recv_io.reassembly.lock); + spin_unlock_irqrestore( + &sc->recv_io.reassembly.lock, flags); } queue_removed++; - info->count_reassembly_queue--; - info->count_dequeue_reassembly_queue++; - put_receive_buffer(info, response); + sc->statistics.dequeue_reassembly_queue++; + put_receive_buffer(sc, response); offset = 0; log_read(INFO, "put_receive_buffer offset=0\n"); } else @@ -1895,10 +2163,10 @@ again: to_read, data_read, offset); } - spin_lock_irq(&sc->recv_io.reassembly.lock); + spin_lock_irqsave(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.data_length -= data_read; sc->recv_io.reassembly.queue_length -= queue_removed; - spin_unlock_irq(&sc->recv_io.reassembly.lock); + spin_unlock_irqrestore(&sc->recv_io.reassembly.lock, flags); sc->recv_io.reassembly.first_entry_offset = offset; log_read(INFO, "returning to thread data_read=%d reassembly_data_length=%d first_entry_offset=%d\n", @@ -1983,13 +2251,13 @@ int smbd_send(struct TCP_Server_Info *server, klen += rqst->rq_iov[i].iov_len; iov_iter_kvec(&iter, ITER_SOURCE, rqst->rq_iov, rqst->rq_nvec, klen); - rc = smbd_post_send_full_iter(info, &iter, &remaining_data_length); + rc = smbd_post_send_full_iter(sc, &iter, &remaining_data_length); if (rc < 0) break; if (iov_iter_count(&rqst->rq_iter) > 0) { /* And then the data pages if there are any */ - rc = smbd_post_send_full_iter(info, &rqst->rq_iter, + rc = smbd_post_send_full_iter(sc, &rqst->rq_iter, &remaining_data_length); if (rc < 0) break; @@ -2004,8 +2272,8 @@ int smbd_send(struct TCP_Server_Info *server, * that means all the I/Os have been out and we are good to return */ - wait_event(info->wait_send_pending, - atomic_read(&info->send_pending) == 0 || + wait_event(sc->send_io.pending.zero_wait_queue, + atomic_read(&sc->send_io.pending.count) == 0 || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (sc->status != SMBDIRECT_SOCKET_CONNECTED && rc == 0) @@ -2016,14 +2284,13 @@ int smbd_send(struct TCP_Server_Info *server, static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_mr *mr; - struct ib_cqe *cqe; + struct smbdirect_mr_io *mr = + container_of(wc->wr_cqe, struct smbdirect_mr_io, cqe); + struct smbdirect_socket *sc = mr->socket; if (wc->status) { log_rdma_mr(ERR, "status=%d\n", wc->status); - cqe = wc->wr_cqe; - mr = container_of(cqe, struct smbd_mr, cqe); - smbd_disconnect_rdma_connection(mr->conn); + smbd_disconnect_rdma_connection(sc); } } @@ -2038,14 +2305,14 @@ static void register_mr_done(struct ib_cq *cq, struct ib_wc *wc) */ static void smbd_mr_recovery_work(struct work_struct *work) { - struct smbd_connection *info = - container_of(work, struct smbd_connection, mr_recovery_work); - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *smbdirect_mr; + struct smbdirect_socket *sc = + container_of(work, struct smbdirect_socket, mr_io.recovery_work); + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *smbdirect_mr; int rc; - list_for_each_entry(smbdirect_mr, &info->mr_list, list) { - if (smbdirect_mr->state == MR_ERROR) { + list_for_each_entry(smbdirect_mr, &sc->mr_io.all.list, list) { + if (smbdirect_mr->state == SMBDIRECT_MR_ERROR) { /* recover this MR entry */ rc = ib_dereg_mr(smbdirect_mr->mr); @@ -2053,25 +2320,25 @@ static void smbd_mr_recovery_work(struct work_struct *work) log_rdma_mr(ERR, "ib_dereg_mr failed rc=%x\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); continue; } smbdirect_mr->mr = ib_alloc_mr( - sc->ib.pd, info->mr_type, - info->max_frmr_depth); + sc->ib.pd, sc->mr_io.type, + sp->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - info->mr_type, - info->max_frmr_depth); - smbd_disconnect_rdma_connection(info); + sc->mr_io.type, + sp->max_frmr_depth); + smbd_disconnect_rdma_connection(sc); continue; } } else /* This MR is being used, don't recover it */ continue; - smbdirect_mr->state = MR_READY; + smbdirect_mr->state = SMBDIRECT_MR_READY; /* smbdirect_mr->state is updated by this function * and is read and updated by I/O issuing CPUs trying @@ -2080,19 +2347,18 @@ static void smbd_mr_recovery_work(struct work_struct *work) * value is updated before waking up any calls to * get_mr() from the I/O issuing CPUs */ - if (atomic_inc_return(&info->mr_ready_count) == 1) - wake_up_interruptible(&info->wait_mr); + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); } } -static void destroy_mr_list(struct smbd_connection *info) +static void destroy_mr_list(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *mr, *tmp; + struct smbdirect_mr_io *mr, *tmp; - disable_work_sync(&info->mr_recovery_work); - list_for_each_entry_safe(mr, tmp, &info->mr_list, list) { - if (mr->state == MR_INVALIDATED) + disable_work_sync(&sc->mr_io.recovery_work); + list_for_each_entry_safe(mr, tmp, &sc->mr_io.all.list, list) { + if (mr->state == SMBDIRECT_MR_INVALIDATED) ib_dma_unmap_sg(sc->ib.dev, mr->sgt.sgl, mr->sgt.nents, mr->dir); ib_dereg_mr(mr->mr); @@ -2108,32 +2374,32 @@ static void destroy_mr_list(struct smbd_connection *info) * Recovery is done in smbd_mr_recovery_work. The content of list entry changes * as MRs are used and recovered for I/O, but the list links will not change */ -static int allocate_mr_list(struct smbd_connection *info) +static int allocate_mr_list(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket_parameters *sp = &sc->parameters; int i; - struct smbd_mr *smbdirect_mr, *tmp; - - INIT_LIST_HEAD(&info->mr_list); - init_waitqueue_head(&info->wait_mr); - spin_lock_init(&info->mr_list_lock); - atomic_set(&info->mr_ready_count, 0); - atomic_set(&info->mr_used_count, 0); - init_waitqueue_head(&info->wait_for_mr_cleanup); - INIT_WORK(&info->mr_recovery_work, smbd_mr_recovery_work); + struct smbdirect_mr_io *smbdirect_mr, *tmp; + + INIT_WORK(&sc->mr_io.recovery_work, smbd_mr_recovery_work); + + if (sp->responder_resources == 0) { + log_rdma_mr(ERR, "responder_resources negotiated as 0\n"); + return -EINVAL; + } + /* Allocate more MRs (2x) than hardware responder_resources */ - for (i = 0; i < info->responder_resources * 2; i++) { + for (i = 0; i < sp->responder_resources * 2; i++) { smbdirect_mr = kzalloc(sizeof(*smbdirect_mr), GFP_KERNEL); if (!smbdirect_mr) goto cleanup_entries; - smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, info->mr_type, - info->max_frmr_depth); + smbdirect_mr->mr = ib_alloc_mr(sc->ib.pd, sc->mr_io.type, + sp->max_frmr_depth); if (IS_ERR(smbdirect_mr->mr)) { log_rdma_mr(ERR, "ib_alloc_mr failed mr_type=%x max_frmr_depth=%x\n", - info->mr_type, info->max_frmr_depth); + sc->mr_io.type, sp->max_frmr_depth); goto out; } - smbdirect_mr->sgt.sgl = kcalloc(info->max_frmr_depth, + smbdirect_mr->sgt.sgl = kcalloc(sp->max_frmr_depth, sizeof(struct scatterlist), GFP_KERNEL); if (!smbdirect_mr->sgt.sgl) { @@ -2141,18 +2407,18 @@ static int allocate_mr_list(struct smbd_connection *info) ib_dereg_mr(smbdirect_mr->mr); goto out; } - smbdirect_mr->state = MR_READY; - smbdirect_mr->conn = info; + smbdirect_mr->state = SMBDIRECT_MR_READY; + smbdirect_mr->socket = sc; - list_add_tail(&smbdirect_mr->list, &info->mr_list); - atomic_inc(&info->mr_ready_count); + list_add_tail(&smbdirect_mr->list, &sc->mr_io.all.list); + atomic_inc(&sc->mr_io.ready.count); } return 0; out: kfree(smbdirect_mr); cleanup_entries: - list_for_each_entry_safe(smbdirect_mr, tmp, &info->mr_list, list) { + list_for_each_entry_safe(smbdirect_mr, tmp, &sc->mr_io.all.list, list) { list_del(&smbdirect_mr->list); ib_dereg_mr(smbdirect_mr->mr); kfree(smbdirect_mr->sgt.sgl); @@ -2169,14 +2435,14 @@ cleanup_entries: * issuing I/O trying to get MR at the same time, mr_list_lock is used to * protect this situation. */ -static struct smbd_mr *get_mr(struct smbd_connection *info) +static struct smbdirect_mr_io *get_mr(struct smbdirect_socket *sc) { - struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *ret; + struct smbdirect_mr_io *ret; + unsigned long flags; int rc; again: - rc = wait_event_interruptible(info->wait_mr, - atomic_read(&info->mr_ready_count) || + rc = wait_event_interruptible(sc->mr_io.ready.wait_queue, + atomic_read(&sc->mr_io.ready.count) || sc->status != SMBDIRECT_SOCKET_CONNECTED); if (rc) { log_rdma_mr(ERR, "wait_event_interruptible rc=%x\n", rc); @@ -2188,18 +2454,18 @@ again: return NULL; } - spin_lock(&info->mr_list_lock); - list_for_each_entry(ret, &info->mr_list, list) { - if (ret->state == MR_READY) { - ret->state = MR_REGISTERED; - spin_unlock(&info->mr_list_lock); - atomic_dec(&info->mr_ready_count); - atomic_inc(&info->mr_used_count); + spin_lock_irqsave(&sc->mr_io.all.lock, flags); + list_for_each_entry(ret, &sc->mr_io.all.list, list) { + if (ret->state == SMBDIRECT_MR_READY) { + ret->state = SMBDIRECT_MR_REGISTERED; + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); + atomic_dec(&sc->mr_io.ready.count); + atomic_inc(&sc->mr_io.used.count); return ret; } } - spin_unlock(&info->mr_list_lock); + spin_unlock_irqrestore(&sc->mr_io.all.lock, flags); /* * It is possible that we could fail to get MR because other processes may * try to acquire a MR at the same time. If this is the case, retry it. @@ -2210,8 +2476,7 @@ again: /* * Transcribe the pages from an iterator into an MR scatterlist. */ -static int smbd_iter_to_mr(struct smbd_connection *info, - struct iov_iter *iter, +static int smbd_iter_to_mr(struct iov_iter *iter, struct sg_table *sgt, unsigned int max_sg) { @@ -2233,25 +2498,26 @@ static int smbd_iter_to_mr(struct smbd_connection *info, * need_invalidate: true if this MR needs to be locally invalidated after I/O * return value: the MR registered, NULL if failed. */ -struct smbd_mr *smbd_register_mr(struct smbd_connection *info, +struct smbdirect_mr_io *smbd_register_mr(struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate) { struct smbdirect_socket *sc = &info->socket; - struct smbd_mr *smbdirect_mr; + struct smbdirect_socket_parameters *sp = &sc->parameters; + struct smbdirect_mr_io *smbdirect_mr; int rc, num_pages; enum dma_data_direction dir; struct ib_reg_wr *reg_wr; - num_pages = iov_iter_npages(iter, info->max_frmr_depth + 1); - if (num_pages > info->max_frmr_depth) { + num_pages = iov_iter_npages(iter, sp->max_frmr_depth + 1); + if (num_pages > sp->max_frmr_depth) { log_rdma_mr(ERR, "num_pages=%d max_frmr_depth=%d\n", - num_pages, info->max_frmr_depth); + num_pages, sp->max_frmr_depth); WARN_ON_ONCE(1); return NULL; } - smbdirect_mr = get_mr(info); + smbdirect_mr = get_mr(sc); if (!smbdirect_mr) { log_rdma_mr(ERR, "get_mr returning NULL\n"); return NULL; @@ -2264,8 +2530,8 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, smbdirect_mr->sgt.orig_nents = 0; log_rdma_mr(INFO, "num_pages=0x%x count=0x%zx depth=%u\n", - num_pages, iov_iter_count(iter), info->max_frmr_depth); - smbd_iter_to_mr(info, iter, &smbdirect_mr->sgt, info->max_frmr_depth); + num_pages, iov_iter_count(iter), sp->max_frmr_depth); + smbd_iter_to_mr(iter, &smbdirect_mr->sgt, sp->max_frmr_depth); rc = ib_dma_map_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, dir); @@ -2310,32 +2576,32 @@ struct smbd_mr *smbd_register_mr(struct smbd_connection *info, log_rdma_mr(ERR, "ib_post_send failed rc=%x reg_wr->key=%x\n", rc, reg_wr->key); - /* If all failed, attempt to recover this MR by setting it MR_ERROR*/ + /* If all failed, attempt to recover this MR by setting it SMBDIRECT_MR_ERROR*/ map_mr_error: ib_dma_unmap_sg(sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); dma_map_error: - smbdirect_mr->state = MR_ERROR; - if (atomic_dec_and_test(&info->mr_used_count)) - wake_up(&info->wait_for_mr_cleanup); + smbdirect_mr->state = SMBDIRECT_MR_ERROR; + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); return NULL; } static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) { - struct smbd_mr *smbdirect_mr; + struct smbdirect_mr_io *smbdirect_mr; struct ib_cqe *cqe; cqe = wc->wr_cqe; - smbdirect_mr = container_of(cqe, struct smbd_mr, cqe); - smbdirect_mr->state = MR_INVALIDATED; + smbdirect_mr = container_of(cqe, struct smbdirect_mr_io, cqe); + smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; if (wc->status != IB_WC_SUCCESS) { log_rdma_mr(ERR, "invalidate failed status=%x\n", wc->status); - smbdirect_mr->state = MR_ERROR; + smbdirect_mr->state = SMBDIRECT_MR_ERROR; } complete(&smbdirect_mr->invalidate_done); } @@ -2346,11 +2612,10 @@ static void local_inv_done(struct ib_cq *cq, struct ib_wc *wc) * and we have to locally invalidate the buffer to prevent data is being * modified by remote peer after upper layer consumes it */ -int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) +int smbd_deregister_mr(struct smbdirect_mr_io *smbdirect_mr) { struct ib_send_wr *wr; - struct smbd_connection *info = smbdirect_mr->conn; - struct smbdirect_socket *sc = &info->socket; + struct smbdirect_socket *sc = smbdirect_mr->socket; int rc = 0; if (smbdirect_mr->need_invalidate) { @@ -2367,36 +2632,36 @@ int smbd_deregister_mr(struct smbd_mr *smbdirect_mr) rc = ib_post_send(sc->ib.qp, wr, NULL); if (rc) { log_rdma_mr(ERR, "ib_post_send failed rc=%x\n", rc); - smbd_disconnect_rdma_connection(info); + smbd_disconnect_rdma_connection(sc); goto done; } wait_for_completion(&smbdirect_mr->invalidate_done); smbdirect_mr->need_invalidate = false; } else /* - * For remote invalidation, just set it to MR_INVALIDATED + * For remote invalidation, just set it to SMBDIRECT_MR_INVALIDATED * and defer to mr_recovery_work to recover the MR for next use */ - smbdirect_mr->state = MR_INVALIDATED; + smbdirect_mr->state = SMBDIRECT_MR_INVALIDATED; - if (smbdirect_mr->state == MR_INVALIDATED) { + if (smbdirect_mr->state == SMBDIRECT_MR_INVALIDATED) { ib_dma_unmap_sg( sc->ib.dev, smbdirect_mr->sgt.sgl, smbdirect_mr->sgt.nents, smbdirect_mr->dir); - smbdirect_mr->state = MR_READY; - if (atomic_inc_return(&info->mr_ready_count) == 1) - wake_up_interruptible(&info->wait_mr); + smbdirect_mr->state = SMBDIRECT_MR_READY; + if (atomic_inc_return(&sc->mr_io.ready.count) == 1) + wake_up(&sc->mr_io.ready.wait_queue); } else /* * Schedule the work to do MR recovery for future I/Os MR * recovery is slow and don't want it to block current I/O */ - queue_work(info->workqueue, &info->mr_recovery_work); + queue_work(sc->workqueue, &sc->mr_io.recovery_work); done: - if (atomic_dec_and_test(&info->mr_used_count)) - wake_up(&info->wait_for_mr_cleanup); + if (atomic_dec_and_test(&sc->mr_io.used.count)) + wake_up(&sc->mr_io.cleanup.wait_queue); return rc; } diff --git a/fs/smb/client/smbdirect.h b/fs/smb/client/smbdirect.h index e45aa9ddd71d..d67ac5ddaff4 100644 --- a/fs/smb/client/smbdirect.h +++ b/fs/smb/client/smbdirect.h @@ -27,12 +27,6 @@ extern int smbd_max_send_size; extern int smbd_send_credit_target; extern int smbd_receive_credit_max; -enum keep_alive_status { - KEEP_ALIVE_NONE, - KEEP_ALIVE_PENDING, - KEEP_ALIVE_SENT, -}; - /* * The context for the SMBDirect transport * Everything related to the transport is here. It has several logical parts @@ -44,79 +38,14 @@ enum keep_alive_status { */ struct smbd_connection { struct smbdirect_socket socket; - - int ri_rc; - struct completion ri_done; - wait_queue_head_t status_wait; - - struct completion negotiate_completion; - bool negotiate_done; - - struct work_struct disconnect_work; - struct work_struct post_send_credits_work; - - spinlock_t lock_new_credits_offered; - int new_credits_offered; - - /* dynamic connection parameters defined in [MS-SMBD] 3.1.1.1 */ - enum keep_alive_status keep_alive_requested; - int protocol; - atomic_t send_credits; - atomic_t receive_credits; - int receive_credit_target; - - /* Memory registrations */ - /* Maximum number of RDMA read/write outstanding on this connection */ - int responder_resources; - /* Maximum number of pages in a single RDMA write/read on this connection */ - int max_frmr_depth; - /* - * If payload is less than or equal to the threshold, - * use RDMA send/recv to send upper layer I/O. - * If payload is more than the threshold, - * use RDMA read/write through memory registration for I/O. - */ - int rdma_readwrite_threshold; - enum ib_mr_type mr_type; - struct list_head mr_list; - spinlock_t mr_list_lock; - /* The number of available MRs ready for memory registration */ - atomic_t mr_ready_count; - atomic_t mr_used_count; - wait_queue_head_t wait_mr; - struct work_struct mr_recovery_work; - /* Used by transport to wait until all MRs are returned */ - wait_queue_head_t wait_for_mr_cleanup; - - /* Activity accounting */ - atomic_t send_pending; - wait_queue_head_t wait_send_pending; - wait_queue_head_t wait_post_send; - - /* Receive queue */ - int count_receive_queue; - wait_queue_head_t wait_receive_queues; - - bool send_immediate; - - wait_queue_head_t wait_send_queue; - - struct workqueue_struct *workqueue; - struct delayed_work idle_timer_work; - - /* for debug purposes */ - unsigned int count_get_receive_buffer; - unsigned int count_put_receive_buffer; - unsigned int count_reassembly_queue; - unsigned int count_enqueue_reassembly_queue; - unsigned int count_dequeue_reassembly_queue; - unsigned int count_send_empty; }; /* Create a SMBDirect session */ struct smbd_connection *smbd_get_connection( struct TCP_Server_Info *server, struct sockaddr *dstaddr); +const struct smbdirect_socket_parameters *smbd_get_parameters(struct smbd_connection *conn); + /* Reconnect SMBDirect session */ int smbd_reconnect(struct TCP_Server_Info *server); /* Destroy SMBDirect session */ @@ -127,34 +56,11 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg); int smbd_send(struct TCP_Server_Info *server, int num_rqst, struct smb_rqst *rqst); -enum mr_state { - MR_READY, - MR_REGISTERED, - MR_INVALIDATED, - MR_ERROR -}; - -struct smbd_mr { - struct smbd_connection *conn; - struct list_head list; - enum mr_state state; - struct ib_mr *mr; - struct sg_table sgt; - enum dma_data_direction dir; - union { - struct ib_reg_wr wr; - struct ib_send_wr inv_wr; - }; - struct ib_cqe cqe; - bool need_invalidate; - struct completion invalidate_done; -}; - /* Interfaces to register and deregister MR for RDMA read/write */ -struct smbd_mr *smbd_register_mr( +struct smbdirect_mr_io *smbd_register_mr( struct smbd_connection *info, struct iov_iter *iter, bool writing, bool need_invalidate); -int smbd_deregister_mr(struct smbd_mr *mr); +int smbd_deregister_mr(struct smbdirect_mr_io *mr); #else #define cifs_rdma_enabled(server) 0 diff --git a/fs/smb/client/trace.h b/fs/smb/client/trace.h index fd650e2afc76..28e00c34df1c 100644 --- a/fs/smb/client/trace.h +++ b/fs/smb/client/trace.h @@ -266,7 +266,7 @@ DEFINE_EVENT(smb3_copy_range_err_class, smb3_##name, \ TP_ARGS(xid, src_fid, target_fid, tid, sesid, src_offset, target_offset, len, rc)) DEFINE_SMB3_COPY_RANGE_ERR_EVENT(clone_err); -/* TODO: Add SMB3_COPY_RANGE_ERR_EVENT(copychunk_err) */ +DEFINE_SMB3_COPY_RANGE_ERR_EVENT(copychunk_err); DECLARE_EVENT_CLASS(smb3_copy_range_done_class, TP_PROTO(unsigned int xid, diff --git a/fs/smb/client/transport.c b/fs/smb/client/transport.c index a61ba7f3fb86..051cd9dbba13 100644 --- a/fs/smb/client/transport.c +++ b/fs/smb/client/transport.c @@ -22,6 +22,7 @@ #include <linux/mempool.h> #include <linux/sched/signal.h> #include <linux/task_io_accounting_ops.h> +#include <linux/task_work.h> #include "cifspdu.h" #include "cifsglob.h" #include "cifsproto.h" @@ -173,9 +174,16 @@ smb_send_kvec(struct TCP_Server_Info *server, struct msghdr *smb_msg, * send a packet. In most cases if we fail to send * after the retries we will kill the socket and * reconnect which may clear the network problem. + * + * Even if regular signals are masked, EINTR might be + * propagated from sk_stream_wait_memory() to here when + * TIF_NOTIFY_SIGNAL is used for task work. For example, + * certain io_uring completions will use that. Treat + * having EINTR with pending task work the same as EAGAIN + * to avoid unnecessary reconnects. */ rc = sock_sendmsg(ssocket, smb_msg); - if (rc == -EAGAIN) { + if (rc == -EAGAIN || unlikely(rc == -EINTR && task_work_pending(current))) { retries++; if (retries >= 14 || (!server->noblocksnd && (retries > 2))) { @@ -323,8 +331,7 @@ int __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst, break; total_len += sent; } - -} + } unmask: sigprocmask(SIG_SETMASK, &oldmask, NULL); |