Return-Path: Received: from userp1040.oracle.com ([156.151.31.81]:32963 "EHLO userp1040.oracle.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1750707AbdAXTQT (ORCPT ); Tue, 24 Jan 2017 14:16:19 -0500 Content-Type: text/plain; charset=us-ascii Mime-Version: 1.0 (Mac OS X Mail 9.3 \(3124\)) Subject: Re: [PATCH v2 2/5] xprtrdma: Per-connection pad optimization From: Chuck Lever In-Reply-To: <19a00a6f-9502-161a-d37b-7371b07662ed@Netapp.com> Date: Tue, 24 Jan 2017 14:16:13 -0500 Cc: linux-rdma@vger.kernel.org, Linux NFS Mailing List Message-Id: <575D5F73-2D4B-4FDA-B196-0D683B459F9D@oracle.com> References: <20170123205159.21699.47373.stgit@manet.1015granger.net> <20170123205254.21699.44329.stgit@manet.1015granger.net> <19a00a6f-9502-161a-d37b-7371b07662ed@Netapp.com> To: Anna Schumaker Sender: linux-nfs-owner@vger.kernel.org List-ID: > On Jan 24, 2017, at 2:12 PM, Anna Schumaker wrote: > > Hi Chuck, > > On 01/23/2017 03:52 PM, Chuck Lever wrote: >> Pad optimization is changed by echoing into >> /proc/sys/sunrpc/rdma_pad_optimize. This is a global setting, >> affecting all RPC-over-RDMA connections to all servers. >> >> The marshaling code picks up that value and uses it for decisions >> about how to construct each RPC-over-RDMA frame. Having it change >> suddenly in mid-operation can result in unexpected failures. And >> some servers a client mounts might need chunk round-up, while >> others don't. >> >> So instead, copy the pad_optimize setting into each connection's >> rpcrdma_ia when the transport is created, and use the copy, which >> can't change during the life of the connection, instead. > > Does the rdma_pad_optimize option have documentation somewhere that needs to be updated to describe the new behavior? I'm not aware of any. No documentation was changed by d5440e27d3e5. > Anna > >> >> This also removes a hack: rpcrdma_convert_iovs was using >> the remote-invalidation-expected flag to predict when it could leave >> out Write chunk padding. This is because the Linux server handles >> implicit XDR padding on Write chunks correctly, and only Linux >> servers can set the connection's remote-invalidation-expected flag. >> >> It's more sensible to use the pad optimization setting instead. >> >> Signed-off-by: Chuck Lever >> --- >> net/sunrpc/xprtrdma/rpc_rdma.c | 28 ++++++++++++++-------------- >> net/sunrpc/xprtrdma/verbs.c | 1 + >> net/sunrpc/xprtrdma/xprt_rdma.h | 1 + >> 3 files changed, 16 insertions(+), 14 deletions(-) >> >> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c >> index a524d3c..c634f0f 100644 >> --- a/net/sunrpc/xprtrdma/rpc_rdma.c >> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c >> @@ -186,9 +186,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, >> */ >> >> static int >> -rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos, >> - enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg, >> - bool reminv_expected) >> +rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, >> + unsigned int pos, enum rpcrdma_chunktype type, >> + struct rpcrdma_mr_seg *seg) >> { >> int len, n, p, page_base; >> struct page **ppages; >> @@ -229,14 +229,15 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, >> /* When encoding a Read chunk, the tail iovec contains an >> * XDR pad and may be omitted. >> */ >> - if (type == rpcrdma_readch && xprt_rdma_pad_optimize) >> + if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) >> return n; >> >> - /* When encoding the Write list, some servers need to see an extra >> - * segment for odd-length Write chunks. The upper layer provides >> - * space in the tail iovec for this purpose. >> + /* When encoding a Write chunk, some servers need to see an >> + * extra segment for non-XDR-aligned Write chunks. The upper >> + * layer provides space in the tail iovec that may be used >> + * for this purpose. >> */ >> - if (type == rpcrdma_writech && reminv_expected) >> + if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) >> return n; >> >> if (xdrbuf->tail[0].iov_len) { >> @@ -291,7 +292,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, >> if (rtype == rpcrdma_areadch) >> pos = 0; >> seg = req->rl_segments; >> - nsegs = rpcrdma_convert_iovs(&rqst->rq_snd_buf, pos, rtype, seg, false); >> + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, >> + rtype, seg); >> if (nsegs < 0) >> return ERR_PTR(nsegs); >> >> @@ -353,10 +355,9 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, >> } >> >> seg = req->rl_segments; >> - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, >> + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, >> rqst->rq_rcv_buf.head[0].iov_len, >> - wtype, seg, >> - r_xprt->rx_ia.ri_reminv_expected); >> + wtype, seg); >> if (nsegs < 0) >> return ERR_PTR(nsegs); >> >> @@ -421,8 +422,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, >> } >> >> seg = req->rl_segments; >> - nsegs = rpcrdma_convert_iovs(&rqst->rq_rcv_buf, 0, wtype, seg, >> - r_xprt->rx_ia.ri_reminv_expected); >> + nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); >> if (nsegs < 0) >> return ERR_PTR(nsegs); >> >> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c >> index 11d0774..2a6a367 100644 >> --- a/net/sunrpc/xprtrdma/verbs.c >> +++ b/net/sunrpc/xprtrdma/verbs.c >> @@ -208,6 +208,7 @@ >> >> /* Default settings for RPC-over-RDMA Version One */ >> r_xprt->rx_ia.ri_reminv_expected = false; >> + r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; >> rsize = RPCRDMA_V1_DEF_INLINE_SIZE; >> wsize = RPCRDMA_V1_DEF_INLINE_SIZE; >> >> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h >> index e35efd4..c137154 100644 >> --- a/net/sunrpc/xprtrdma/xprt_rdma.h >> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h >> @@ -75,6 +75,7 @@ struct rpcrdma_ia { >> unsigned int ri_max_inline_write; >> unsigned int ri_max_inline_read; >> bool ri_reminv_expected; >> + bool ri_implicit_roundup; >> enum ib_mr_type ri_mrtype; >> struct ib_qp_attr ri_qp_attr; >> struct ib_qp_init_attr ri_qp_init_attr; >> > -- > To unsubscribe from this list: send the line "unsubscribe linux-rdma" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Chuck Lever