From f9bdad8ca8a40270576dd8751ac225febaa87f93 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 29 Oct 2018 13:23:40 -0400 Subject: NFS NFSD: defining nl4_servers structure needed by both These structures are needed by COPY_NOTIFY on the client and needed by the nfsd as well Reviewed-by: Jeff Layton Signed-off-by: Olga Kornievskaia --- include/linux/nfs4.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index fd59904a282c..5810e248c1bd 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -16,6 +16,7 @@ #include #include #include +#include enum nfs4_acl_whotype { NFS4_ACL_WHO_NAMED = 0, @@ -674,4 +675,27 @@ struct nfs4_op_map { } u; }; +struct nfs42_netaddr { + char netid[RPCBIND_MAXNETIDLEN]; + char addr[RPCBIND_MAXUADDRLEN + 1]; + u32 netid_len; + u32 addr_len; +}; + +enum netloc_type4 { + NL4_NAME = 1, + NL4_URL = 2, + NL4_NETADDR = 3, +}; + +struct nl4_server { + enum netloc_type4 nl4_type; + union { + struct { /* NL4_NAME, NL4_URL */ + int nl4_str_sz; + char nl4_str[NFS4_OPAQUE_LIMIT + 1]; + }; + struct nfs42_netaddr nl4_addr; /* NL4_NETADDR */ + } u; +}; #endif -- cgit From 0491567b51efeca807da1125a1a0d5193875e286 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 16:14:30 -0400 Subject: NFS: add COPY_NOTIFY operation Try using the delegation stateid, then the open stateid. Only NL4_NETATTR, No support for NL4_NAME and NL4_URL. Allow only one source server address to be returned for now. To distinguish between same server copy offload ("intra") and a copy between different server ("inter"), do a check of server owner identity and also make sure server is capable of doing a copy offload. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 16 ++++++++++++++++ 3 files changed, 18 insertions(+) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5810e248c1bd..5e7a5261af4e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,6 +537,7 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, + NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..e1c8748e1e82 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -276,5 +276,6 @@ struct nfs_server { #define NFS_CAP_COPY (1U << 24) #define NFS_CAP_OFFLOAD_CANCEL (1U << 25) #define NFS_CAP_LAYOUTERROR (1U << 26) +#define NFS_CAP_COPY_NOTIFY (1U << 27) #endif diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..0a7af40026d7 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1463,6 +1463,22 @@ struct nfs42_offload_status_res { int osr_status; }; +struct nfs42_copy_notify_args { + struct nfs4_sequence_args cna_seq_args; + + struct nfs_fh *cna_src_fh; + nfs4_stateid cna_src_stateid; + struct nl4_server cna_dst; +}; + +struct nfs42_copy_notify_res { + struct nfs4_sequence_res cnr_seq_res; + + struct nfstime4 cnr_lease_time; + nfs4_stateid cnr_stateid; + struct nl4_server cnr_src; +}; + struct nfs42_seek_args { struct nfs4_sequence_args seq_args; -- cgit From 1d38f3f0d70008671f4dc055697ff3c3bb44a284 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Tue, 4 Jun 2019 11:54:18 -0400 Subject: NFS: add ca_source_server<> to COPY Support only one source server address: the same address that the client and source server use. Signed-off-by: Andy Adamson Signed-off-by: Olga Kornievskaia --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0a7af40026d7..008facac8a30 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1435,6 +1435,7 @@ struct nfs42_copy_args { u64 count; bool sync; + struct nl4_server *cp_src; }; struct nfs42_write_res { -- cgit From 0e65a32c8a569db363048e17a708b1a0913adbef Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Fri, 14 Jun 2019 14:38:40 -0400 Subject: NFS: handle source server reboot When the source server reboots after a server-to-server copy was issued, we need to retry the copy from COPY_NOTIFY. We need to detect that the source server rebooted and there is a copy waiting on a destination server and wake it up. Signed-off-by: Olga Kornievskaia --- include/linux/nfs_fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 570a60c2f4f4..c06b1fd130f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -189,13 +189,15 @@ struct nfs_inode { struct nfs4_copy_state { struct list_head copies; + struct list_head src_copies; nfs4_stateid stateid; struct completion completion; uint64_t count; struct nfs_writeverf verf; int error; int flags; - struct nfs4_state *parent_state; + struct nfs4_state *parent_src_state; + struct nfs4_state *parent_dst_state; }; /* -- cgit From bf7ca707ae60045342e145c88a83bbe00f66775f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 12:58:14 -0400 Subject: SUNRPC: Add trace points to observe transport congestion control To help debug problems with RPC/RDMA credit management, replace dprintk() call sites in the transport send lock paths with trace events. Similar trace points are defined for the non-congestion paths. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/sunrpc.h | 93 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ffa3c51dbb1a..378233fe5ac7 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -777,6 +777,99 @@ TRACE_EVENT(xprt_ping, __get_str(addr), __get_str(port), __entry->status) ); +DECLARE_EVENT_CLASS(xprt_writelock_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + ), + + TP_printk("task:%u@%u snd_task:%u", + __entry->task_id, __entry->client_id, + __entry->snd_task_id) +); + +#define DEFINE_WRITELOCK_EVENT(name) \ + DEFINE_EVENT(xprt_writelock_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_WRITELOCK_EVENT(reserve_xprt); +DEFINE_WRITELOCK_EVENT(release_xprt); + +DECLARE_EVENT_CLASS(xprt_cong_event, + TP_PROTO( + const struct rpc_xprt *xprt, const struct rpc_task *task + ), + + TP_ARGS(xprt, task), + + TP_STRUCT__entry( + __field(unsigned int, task_id) + __field(unsigned int, client_id) + __field(unsigned int, snd_task_id) + __field(unsigned long, cong) + __field(unsigned long, cwnd) + __field(bool, wait) + ), + + TP_fast_assign( + if (task) { + __entry->task_id = task->tk_pid; + __entry->client_id = task->tk_client ? + task->tk_client->cl_clid : -1; + } else { + __entry->task_id = -1; + __entry->client_id = -1; + } + __entry->snd_task_id = xprt->snd_task ? + xprt->snd_task->tk_pid : -1; + __entry->cong = xprt->cong; + __entry->cwnd = xprt->cwnd; + __entry->wait = test_bit(XPRT_CWND_WAIT, &xprt->state); + ), + + TP_printk("task:%u@%u snd_task:%u cong=%lu cwnd=%lu%s", + __entry->task_id, __entry->client_id, + __entry->snd_task_id, __entry->cong, __entry->cwnd, + __entry->wait ? " (wait)" : "") +); + +#define DEFINE_CONG_EVENT(name) \ + DEFINE_EVENT(xprt_cong_event, xprt_##name, \ + TP_PROTO( \ + const struct rpc_xprt *xprt, \ + const struct rpc_task *task \ + ), \ + TP_ARGS(xprt, task)) + +DEFINE_CONG_EVENT(reserve_cong); +DEFINE_CONG_EVENT(release_cong); +DEFINE_CONG_EVENT(get_cong); +DEFINE_CONG_EVENT(put_cong); + TRACE_EVENT(xs_stream_read_data, TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total), -- cgit From 4b93dab36f28e673725e5e6123ebfccf7697f96a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 9 Oct 2019 13:07:21 -0400 Subject: xprtrdma: Add unique trace points for posting Local Invalidate WRs When adding frwr_unmap_async way back when, I re-used the existing trace_xprtrdma_post_send() trace point to record the return code of ib_post_send. Unfortunately there are some cases where re-using that trace point causes a crash. Instead, construct a trace point specific to posting Local Invalidate WRs that will always be safe to use in that context, and will act as a trace log eye-catcher for Local Invalidation. Fixes: 847568942f93 ("xprtrdma: Remove fr_state") Fixes: d8099feda483 ("xprtrdma: Reduce context switching due ... ") Signed-off-by: Chuck Lever Tested-by: Bill Baker Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index a13830616107..7fd11ec1c9a4 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -735,6 +735,31 @@ TRACE_EVENT(xprtrdma_post_recvs, ) ); +TRACE_EVENT(xprtrdma_post_linv, + TP_PROTO( + const struct rpcrdma_req *req, + int status + ), + + TP_ARGS(req, status), + + TP_STRUCT__entry( + __field(const void *, req) + __field(int, status) + __field(u32, xid) + ), + + TP_fast_assign( + __entry->req = req; + __entry->status = status; + __entry->xid = be32_to_cpu(req->rl_slot.rq_xid); + ), + + TP_printk("req=%p xid=0x%08x status=%d", + __entry->req, __entry->xid, __entry->status + ) +); + /** ** Completion events **/ -- cgit From dc15c3d5f16808f7c171b55da6a82a5c0f279647 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:35 -0400 Subject: xprtrdma: Move the rpcrdma_sendctx::sc_wr field Clean up: This field is not needed in the Send completion handler, so it can be moved to struct rpcrdma_req to reduce the size of struct rpcrdma_sendctx, and to reduce the amount of memory that is sloshed between the sending process and the Send completion process. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 7fd11ec1c9a4..f8edab91e09c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -667,9 +667,8 @@ TRACE_EVENT(xprtrdma_post_send, __entry->client_id = rqst->rq_task->tk_client ? rqst->rq_task->tk_client->cl_clid : -1; __entry->req = req; - __entry->num_sge = req->rl_sendctx->sc_wr.num_sge; - __entry->signaled = req->rl_sendctx->sc_wr.send_flags & - IB_SEND_SIGNALED; + __entry->num_sge = req->rl_wr.num_sge; + __entry->signaled = req->rl_wr.send_flags & IB_SEND_SIGNALED; __entry->status = status; ), -- cgit From 614f3c96d7e5efd1c4dc699524857130a52c6a7f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 17 Oct 2019 14:31:53 -0400 Subject: xprtrdma: Pull up sometimes On some platforms, DMA mapping part of a page is more costly than copying bytes. Restore the pull-up code and use that when we think it's going to be faster. The heuristic for now is to pull-up when the size of the RPC message body fits in the buffer underlying the head iovec. Indeed, not involving the I/O MMU can help the RPC/RDMA transport scale better for tiny I/Os across more RDMA devices. This is because interaction with the I/O MMU is eliminated, as is handling a Send completion, for each of these small I/Os. Without the explicit unmapping, the NIC no longer needs to do a costly internal TLB shoot down for buffers that are just a handful of bytes. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f8edab91e09c..213c72585a5f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -532,6 +532,8 @@ DEFINE_WRCH_EVENT(write); DEFINE_WRCH_EVENT(reply); TRACE_DEFINE_ENUM(rpcrdma_noch); +TRACE_DEFINE_ENUM(rpcrdma_noch_pullup); +TRACE_DEFINE_ENUM(rpcrdma_noch_mapped); TRACE_DEFINE_ENUM(rpcrdma_readch); TRACE_DEFINE_ENUM(rpcrdma_areadch); TRACE_DEFINE_ENUM(rpcrdma_writech); @@ -540,6 +542,8 @@ TRACE_DEFINE_ENUM(rpcrdma_replych); #define xprtrdma_show_chunktype(x) \ __print_symbolic(x, \ { rpcrdma_noch, "inline" }, \ + { rpcrdma_noch_pullup, "pullup" }, \ + { rpcrdma_noch_mapped, "mapped" }, \ { rpcrdma_readch, "read list" }, \ { rpcrdma_areadch, "*read list" }, \ { rpcrdma_writech, "write list" }, \ -- cgit From 7b020f17bbd34c219419b634d9efb9e93a3af4c2 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:01:58 -0400 Subject: xprtrdma: Report the computed connect delay For debugging, the op_connect trace point should report the computed connect delay. We can then ensure that the delay is computed at the proper times, for example. As a further clean-up, remove a few low-value "heartbeat" trace points in the connect path. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 77 ++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 213c72585a5f..99e3c61609b2 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -85,6 +85,44 @@ DECLARE_EVENT_CLASS(xprtrdma_rxprt, ), \ TP_ARGS(r_xprt)) +DECLARE_EVENT_CLASS(xprtrdma_connect_class, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt, + int rc + ), + + TP_ARGS(r_xprt, rc), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(int, rc) + __field(int, connect_status) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + __entry->r_xprt = r_xprt; + __entry->rc = rc; + __entry->connect_status = r_xprt->rx_ep.rep_connected; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->rc, __entry->connect_status + ) +); + +#define DEFINE_CONN_EVENT(name) \ + DEFINE_EVENT(xprtrdma_connect_class, xprtrdma_##name, \ + TP_PROTO( \ + const struct rpcrdma_xprt *r_xprt, \ + int rc \ + ), \ + TP_ARGS(r_xprt, rc)) + DECLARE_EVENT_CLASS(xprtrdma_rdch_event, TP_PROTO( const struct rpc_task *task, @@ -333,47 +371,44 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); -TRACE_EVENT(xprtrdma_disconnect, +DEFINE_CONN_EVENT(connect); +DEFINE_CONN_EVENT(disconnect); + +DEFINE_RXPRT_EVENT(xprtrdma_create); +DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); +DEFINE_RXPRT_EVENT(xprtrdma_remove); +DEFINE_RXPRT_EVENT(xprtrdma_reinsert); +DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); +DEFINE_RXPRT_EVENT(xprtrdma_op_close); + +TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( const struct rpcrdma_xprt *r_xprt, - int status + unsigned long delay ), - TP_ARGS(r_xprt, status), + TP_ARGS(r_xprt, delay), TP_STRUCT__entry( __field(const void *, r_xprt) - __field(int, status) - __field(int, connected) + __field(unsigned long, delay) __string(addr, rpcrdma_addrstr(r_xprt)) __string(port, rpcrdma_portstr(r_xprt)) ), TP_fast_assign( __entry->r_xprt = r_xprt; - __entry->status = status; - __entry->connected = r_xprt->rx_ep.rep_connected; + __entry->delay = delay; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: status=%d %sconnected", - __get_str(addr), __get_str(port), - __entry->r_xprt, __entry->status, - __entry->connected == 1 ? "still " : "dis" + TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->delay ) ); -DEFINE_RXPRT_EVENT(xprtrdma_conn_start); -DEFINE_RXPRT_EVENT(xprtrdma_conn_tout); -DEFINE_RXPRT_EVENT(xprtrdma_create); -DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); -DEFINE_RXPRT_EVENT(xprtrdma_remove); -DEFINE_RXPRT_EVENT(xprtrdma_reinsert); -DEFINE_RXPRT_EVENT(xprtrdma_reconnect); -DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); -DEFINE_RXPRT_EVENT(xprtrdma_op_close); -DEFINE_RXPRT_EVENT(xprtrdma_op_connect); TRACE_EVENT(xprtrdma_op_set_cto, TP_PROTO( -- cgit From d4957f01d29b2a01200117fc04b9faaa52aca4bf Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:03 -0400 Subject: xprtrdma: Refine trace_xprtrdma_fixup Slightly reduce overhead and display more useful information. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 60 +++++++++--------------------------------- 1 file changed, 13 insertions(+), 47 deletions(-) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 99e3c61609b2..542177c5896f 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1084,66 +1084,32 @@ DEFINE_REPLY_EVENT(xprtrdma_reply_hdr); TRACE_EVENT(xprtrdma_fixup, TP_PROTO( const struct rpc_rqst *rqst, - int len, - int hdrlen + unsigned long fixup ), - TP_ARGS(rqst, len, hdrlen), + TP_ARGS(rqst, fixup), TP_STRUCT__entry( __field(unsigned int, task_id) __field(unsigned int, client_id) - __field(const void *, base) - __field(int, len) - __field(int, hdrlen) - ), - - TP_fast_assign( - __entry->task_id = rqst->rq_task->tk_pid; - __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->base = rqst->rq_rcv_buf.head[0].iov_base; - __entry->len = len; - __entry->hdrlen = hdrlen; - ), - - TP_printk("task:%u@%u base=%p len=%d hdrlen=%d", - __entry->task_id, __entry->client_id, - __entry->base, __entry->len, __entry->hdrlen - ) -); - -TRACE_EVENT(xprtrdma_fixup_pg, - TP_PROTO( - const struct rpc_rqst *rqst, - int pageno, - const void *pos, - int len, - int curlen - ), - - TP_ARGS(rqst, pageno, pos, len, curlen), - - TP_STRUCT__entry( - __field(unsigned int, task_id) - __field(unsigned int, client_id) - __field(const void *, pos) - __field(int, pageno) - __field(int, len) - __field(int, curlen) + __field(unsigned long, fixup) + __field(size_t, headlen) + __field(unsigned int, pagelen) + __field(size_t, taillen) ), TP_fast_assign( __entry->task_id = rqst->rq_task->tk_pid; __entry->client_id = rqst->rq_task->tk_client->cl_clid; - __entry->pos = pos; - __entry->pageno = pageno; - __entry->len = len; - __entry->curlen = curlen; + __entry->fixup = fixup; + __entry->headlen = rqst->rq_rcv_buf.head[0].iov_len; + __entry->pagelen = rqst->rq_rcv_buf.page_len; + __entry->taillen = rqst->rq_rcv_buf.tail[0].iov_len; ), - TP_printk("task:%u@%u pageno=%d pos=%p len=%d curlen=%d", - __entry->task_id, __entry->client_id, - __entry->pageno, __entry->pos, __entry->len, __entry->curlen + TP_printk("task:%u@%u fixup=%lu xdr=%zu/%u/%zu", + __entry->task_id, __entry->client_id, __entry->fixup, + __entry->headlen, __entry->pagelen, __entry->taillen ) ); -- cgit From f54c870d326aa02b73b68d2e0a503ec81dd3a4e4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:09 -0400 Subject: xprtrdma: Replace dprintk() in rpcrdma_update_connect_private() Clean up: Use a single trace point to record each connection's negotiated inline thresholds and the computed maximum byte size of transport headers. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 542177c5896f..0ca118bcc5ce 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -371,6 +371,42 @@ TRACE_EVENT(xprtrdma_cm_event, ) ); +TRACE_EVENT(xprtrdma_inline_thresh, + TP_PROTO( + const struct rpcrdma_xprt *r_xprt + ), + + TP_ARGS(r_xprt), + + TP_STRUCT__entry( + __field(const void *, r_xprt) + __field(unsigned int, inline_send) + __field(unsigned int, inline_recv) + __field(unsigned int, max_send) + __field(unsigned int, max_recv) + __string(addr, rpcrdma_addrstr(r_xprt)) + __string(port, rpcrdma_portstr(r_xprt)) + ), + + TP_fast_assign( + const struct rpcrdma_ep *ep = &r_xprt->rx_ep; + + __entry->r_xprt = r_xprt; + __entry->inline_send = ep->rep_inline_send; + __entry->inline_recv = ep->rep_inline_recv; + __entry->max_send = ep->rep_max_inline_send; + __entry->max_recv = ep->rep_max_inline_recv; + __assign_str(addr, rpcrdma_addrstr(r_xprt)); + __assign_str(port, rpcrdma_portstr(r_xprt)); + ), + + TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", + __get_str(addr), __get_str(port), __entry->r_xprt, + __entry->inline_send, __entry->inline_recv, + __entry->max_send, __entry->max_recv + ) +); + DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); -- cgit From a52c23b8b207d676d6cdf531af482a79fa622b9d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 23 Oct 2019 10:02:14 -0400 Subject: xprtrdma: Replace dprintk in xprt_rdma_set_port Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 0ca118bcc5ce..69a8278e5cef 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -416,6 +416,7 @@ DEFINE_RXPRT_EVENT(xprtrdma_remove); DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); +DEFINE_RXPRT_EVENT(xprtrdma_op_setport); TRACE_EVENT(xprtrdma_op_connect, TP_PROTO( -- cgit From e86d5a02874c1364c50e1b532481835b54173ed2 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 16:38:56 -0400 Subject: NFS: Convert struct nfs_fattr to use struct timespec64 NFSv4 supports 64-bit times, so we should switch to using struct timespec64 when decoding attributes. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 2 +- include/linux/nfs_xdr.h | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a87fe854f008..47266870a235 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -171,7 +171,7 @@ struct nfs_server { struct nfs_fsid fsid; __u64 maxfilesize; /* maximum file size */ - struct timespec time_delta; /* smallest time granularity */ + struct timespec64 time_delta; /* smallest time granularity */ unsigned long mount_time; /* when this fs was mounted */ struct super_block *super; /* VFS super block */ dev_t s_dev; /* superblock dev numbers */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 9b8324ec08f3..db5c01001937 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -62,14 +62,14 @@ struct nfs_fattr { struct nfs_fsid fsid; __u64 fileid; __u64 mounted_on_fileid; - struct timespec atime; - struct timespec mtime; - struct timespec ctime; + struct timespec64 atime; + struct timespec64 mtime; + struct timespec64 ctime; __u64 change_attr; /* NFSv4 change attribute */ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ __u64 pre_size; /* pre_op_attr.size */ - struct timespec pre_mtime; /* pre_op_attr.mtime */ - struct timespec pre_ctime; /* pre_op_attr.ctime */ + struct timespec64 pre_mtime; /* pre_op_attr.mtime */ + struct timespec64 pre_ctime; /* pre_op_attr.ctime */ unsigned long time_start; unsigned long gencount; struct nfs4_string *owner_name; @@ -143,7 +143,7 @@ struct nfs_fsinfo { __u32 wtmult; /* writes should be multiple of this */ __u32 dtpref; /* pref. readdir transfer size */ __u64 maxfilesize; - struct timespec time_delta; /* server time granularity */ + struct timespec64 time_delta; /* server time granularity */ __u32 lease_time; /* in seconds */ __u32 nlayouttypes; /* number of layouttypes */ __u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */ -- cgit From 6430b323ae09f146dfc26e6d17c432bfc3d11452 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 4 Oct 2019 17:00:02 -0400 Subject: NFSv3: Clean up timespec encode Simplify the struct iattr timestamp encoding by skipping the step of an intermediate struct timespec. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index db5c01001937..22bc6613474e 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -869,7 +869,7 @@ struct nfs3_sattrargs { struct nfs_fh * fh; struct iattr * sattr; unsigned int guard; - struct timespec guardtime; + struct timespec64 guardtime; }; struct nfs3_diropargs { -- cgit From 4b1b69cedf9de8c203101ea74510c07d428538f7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 3 Oct 2019 14:08:43 -0400 Subject: NFS: Add a flag to tell nfs_client to set RPC_CLNT_CREATE_NOPING Add a flag to tell the nfs_client it should set RPC_CLNT_CREATE_NOPING when creating the rpc client. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 47266870a235..a50dd432475b 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -45,6 +45,7 @@ struct nfs_client { #define NFS_CS_INFINITE_SLOTS 3 /* - don't limit TCP slots */ #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ +#define NFS_CS_NOPING 6 /* - don't ping on connect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit From 52f98f1a2ddd2bb561f2c7e3b19a81d816a63118 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 09:49:45 -0400 Subject: NFS/pnfs: Separate NFSv3 DS and MDS traffic If a NFSv3 server is being used as both a DS and as a regular NFSv3 server, we may want to keep the IO traffic on a separate TCP connection, since it will typically have very different timeout characteristics. This patch therefore sets up a flag to separate the two modes of operation for the nfs_client. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50dd432475b..69e80cef5a81 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -46,6 +46,7 @@ struct nfs_client { #define NFS_CS_NO_RETRANS_TIMEOUT 4 /* - Disable retransmit timeouts */ #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ +#define NFS_CS_DS 7 /* - Server is a DS */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ -- cgit From e6237b6feb37582fbd6bd7a8336d1256a6b4b4f9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Oct 2019 11:13:54 -0400 Subject: NFSv4.1: Don't rebind to the same source port when reconnecting to the server NFSv2, v3 and NFSv4 servers often have duplicate replay caches that look at the source port when deciding whether or not an RPC call is a replay of a previous call. This requires clients to perform strange TCP gymnastics in order to ensure that when they reconnect to the server, they bind to the same source port. NFSv4.1 and NFSv4.2 have sessions that provide proper replay semantics, that do not look at the source port of the connection. This patch therefore ensures they can ignore the rebind requirement. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/xprt.h | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 69e80cef5a81..df61ff8981e8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -47,6 +47,7 @@ struct nfs_client { #define NFS_CS_TSM_POSSIBLE 5 /* - Maybe state migration */ #define NFS_CS_NOPING 6 /* - don't ping on connect */ #define NFS_CS_DS 7 /* - Server is a DS */ +#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */ struct sockaddr_storage cl_addr; /* server identifier */ size_t cl_addrlen; char * cl_hostname; /* hostname of server */ diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index abc63bd1be2b..ec52e78d432b 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -149,6 +149,7 @@ struct rpc_add_xprt_test { #define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT (1UL << 8) #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) #define RPC_CLNT_CREATE_SOFTERR (1UL << 10) +#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11) struct rpc_clnt *rpc_create(struct rpc_create_args *args); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index d783e15ba898..ccd35cf4fc41 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -207,7 +207,8 @@ struct rpc_xprt { unsigned int min_reqs; /* min number of slots */ unsigned int num_reqs; /* total slots */ unsigned long state; /* transport state */ - unsigned char resvport : 1; /* use a reserved port */ + unsigned char resvport : 1, /* use a reserved port */ + reuseport : 1; /* reuse port on reconnect */ atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ -- cgit From 634d811c619b0dbe16dc890a53d2c978e9d055d5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 5 Nov 2019 14:59:02 -0500 Subject: nfsv4: Move NFSPROC4_CLNT_COPY_NOTIFY to end of list We shouldn't insert things into the NFSPROC4_CLNT enums, since that causes the nfsstat array to be reordered. Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 5e7a5261af4e..82d8fb422092 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -537,10 +537,11 @@ enum { NFSPROC4_CLNT_CLONE, NFSPROC4_CLNT_COPY, NFSPROC4_CLNT_OFFLOAD_CANCEL, - NFSPROC4_CLNT_COPY_NOTIFY, NFSPROC4_CLNT_LOOKUPP, NFSPROC4_CLNT_LAYOUTERROR, + + NFSPROC4_CLNT_COPY_NOTIFY, }; /* nfs41 types */ -- cgit From a264abad51d8ecb7954a2f6d9f1885b38daffc74 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 Nov 2019 16:25:52 -0500 Subject: SUNRPC: Capture completion of all RPC tasks RPC tasks on the backchannel never invoke xprt_complete_rqst(), so there is no way to report their tk_status at completion. Also, any RPC task that exits via rpc_exit_task() before it is replied to will also disappear without a trace. Introduce a trace point that is symmetrical with rpc_task_begin that captures the termination status of each RPC task. Sample trace output for callback requests initiated on the server: kworker/u8:12-448 [003] 127.025240: rpc_task_end: task:50@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [002] 127.567310: rpc_task_end: task:51@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task kworker/u8:12-448 [001] 130.506817: rpc_task_end: task:52@3 flags=ASYNC|DYNAMIC|SOFT|SOFTCONN|SENT runstate=RUNNING|ACTIVE status=0 action=rpc_exit_task Odd, though, that I never see trace_rpc_task_complete, either in the forward or backchannel. Should it be removed? Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/trace/events/sunrpc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 378233fe5ac7..205bf28bcada 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -165,6 +165,7 @@ DECLARE_EVENT_CLASS(rpc_task_running, DEFINE_RPC_RUNNING_EVENT(begin); DEFINE_RPC_RUNNING_EVENT(run_action); DEFINE_RPC_RUNNING_EVENT(complete); +DEFINE_RPC_RUNNING_EVENT(end); DECLARE_EVENT_CLASS(rpc_task_queued, -- cgit