2010-08-03 16:44:56

by Tom Tucker

[permalink] [raw]
Subject: [PATCH 0/4] nfsrdma: Miscellanseous Bug Fixes

The following series fixes a number of bugs in the NFSRDMA transport.

---

Steve Wise (1):
xprtrdma: Do not truncate iova_start values in frmr registrations.

Tom Tucker (3):
rpcrdma: Fix SQ size calculation when memreg is FRMR
svcrdma: Cleanup DMA unmapping in error paths.
svcrdma: Change DMA mapping logic to avoid the page_address kernel API


net/sunrpc/xprtrdma/rpc_rdma.c | 2 +
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 19 ++++---
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 82 ++++++++++++++++++++++--------
net/sunrpc/xprtrdma/svc_rdma_transport.c | 41 +++++++--------
net/sunrpc/xprtrdma/verbs.c | 22 ++++++--
5 files changed, 111 insertions(+), 55 deletions(-)

--
Signed-off-by: Tom Tucker <[email protected]>


2010-08-04 02:42:14

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 0/4] nfsrdma: Miscellanseous Bug Fixes

On Tue, 2010-08-03 at 11:44 -0500, Tom Tucker wrote:
> The following series fixes a number of bugs in the NFSRDMA transport.
>
> ---
>
> Steve Wise (1):
> xprtrdma: Do not truncate iova_start values in frmr registrations.
>
> Tom Tucker (3):
> rpcrdma: Fix SQ size calculation when memreg is FRMR
> svcrdma: Cleanup DMA unmapping in error paths.
> svcrdma: Change DMA mapping logic to avoid the page_address kernel API
>
>
> net/sunrpc/xprtrdma/rpc_rdma.c | 2 +
> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 19 ++++---
> net/sunrpc/xprtrdma/svc_rdma_sendto.c | 82 ++++++++++++++++++++++--------
> net/sunrpc/xprtrdma/svc_rdma_transport.c | 41 +++++++--------
> net/sunrpc/xprtrdma/verbs.c | 22 ++++++--
> 5 files changed, 111 insertions(+), 55 deletions(-)

Hi Tom,

How do you want to split these patches? The first one looks definitely
like a client bug, and the next 2 look like server bugs, but what about
the last patch?

Cheers
Trond

2010-08-03 16:45:07

by Tom Tucker

[permalink] [raw]
Subject: [PATCH 2/4] svcrdma: Change DMA mapping logic to avoid the page_address kernel API

There was logic in the send path that assumed that a page containing data
to send to the client has a KVA. This is not always the case and can result
in data corruption when page_address returns zero and we end up DMA mapping
zero.

This patch changes the bus mapping logic to avoid page_address() where
necessary and converts all calls from ib_dma_map_single to ib_dma_map_page
in order to keep the map/unmap calls symmetric.

Signed-off-by: Tom Tucker <[email protected]>
---

net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 18 ++++---
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 80 ++++++++++++++++++++++--------
net/sunrpc/xprtrdma/svc_rdma_transport.c | 18 +++----
3 files changed, 78 insertions(+), 38 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 0194de8..926bdb4 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -263,9 +263,9 @@ static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(rqstp->rq_arg.pages[page_no]),
- PAGE_SIZE, DMA_FROM_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ rqstp->rq_arg.pages[page_no], 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -309,17 +309,21 @@ static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
int count)
{
int i;
+ unsigned long off;

ctxt->count = count;
ctxt->direction = DMA_FROM_DEVICE;
for (i = 0; i < count; i++) {
ctxt->sge[i].length = 0; /* in case map fails */
if (!frmr) {
+ BUG_ON(0 == virt_to_page(vec[i].iov_base));
+ off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
ctxt->sge[i].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- vec[i].iov_base,
- vec[i].iov_len,
- DMA_FROM_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ virt_to_page(vec[i].iov_base),
+ off,
+ vec[i].iov_len,
+ DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
ctxt->sge[i].addr))
return -EINVAL;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index b15e1eb..d4f5e0e 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -70,8 +70,8 @@
* on extra page for the RPCRMDA header.
*/
static int fast_reg_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
+ struct xdr_buf *xdr,
+ struct svc_rdma_req_map *vec)
{
int sge_no;
u32 sge_bytes;
@@ -96,21 +96,25 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
vec->count = 2;
sge_no++;

- /* Build the FRMR */
+ /* Map the XDR head */
frmr->kva = frva;
frmr->direction = DMA_TO_DEVICE;
frmr->access_flags = 0;
frmr->map_len = PAGE_SIZE;
frmr->page_list_len = 1;
+ page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- (void *)xdr->head[0].iov_base,
- PAGE_SIZE, DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ virt_to_page(xdr->head[0].iov_base),
+ page_off,
+ PAGE_SIZE - page_off,
+ DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
atomic_inc(&xprt->sc_dma_used);

+ /* Map the XDR page list */
page_off = xdr->page_base;
page_bytes = xdr->page_len + page_off;
if (!page_bytes)
@@ -128,9 +132,9 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
page_bytes -= sge_bytes;

frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(page),
- PAGE_SIZE, DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ page, page_off,
+ sge_bytes, DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -166,8 +170,10 @@ static int fast_reg_xdr(struct svcxprt_rdma *xprt,
vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;

frmr->page_list->page_list[page_no] =
- ib_dma_map_single(xprt->sc_cm_id->device, va, PAGE_SIZE,
- DMA_TO_DEVICE);
+ ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
+ page_off,
+ PAGE_SIZE,
+ DMA_TO_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
frmr->page_list->page_list[page_no]))
goto fatal_err;
@@ -245,6 +251,35 @@ static int map_xdr(struct svcxprt_rdma *xprt,
return 0;
}

+static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
+ struct xdr_buf *xdr,
+ u32 xdr_off, size_t len, int dir)
+{
+ struct page *page;
+ dma_addr_t dma_addr;
+ if (xdr_off < xdr->head[0].iov_len) {
+ /* This offset is in the head */
+ xdr_off += (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
+ page = virt_to_page(xdr->head[0].iov_base);
+ } else {
+ xdr_off -= xdr->head[0].iov_len;
+ if (xdr_off < xdr->page_len) {
+ /* This offset is in the page list */
+ page = xdr->pages[xdr_off >> PAGE_SHIFT];
+ xdr_off &= ~PAGE_MASK;
+ } else {
+ /* This offset is in the tail */
+ xdr_off -= xdr->page_len;
+ xdr_off += (unsigned long)
+ xdr->tail[0].iov_base & ~PAGE_MASK;
+ page = virt_to_page(xdr->tail[0].iov_base);
+ }
+ }
+ dma_addr = ib_dma_map_page(xprt->sc_cm_id->device, page, xdr_off,
+ min_t(size_t, PAGE_SIZE, len), dir);
+ return dma_addr;
+}
+
/* Assumptions:
* - We are using FRMR
* - or -
@@ -293,10 +328,9 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge[sge_no].length = sge_bytes;
if (!vec->frmr) {
sge[sge_no].addr =
- ib_dma_map_single(xprt->sc_cm_id->device,
- (void *)
- vec->sge[xdr_sge_no].iov_base + sge_off,
- sge_bytes, DMA_TO_DEVICE);
+ dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
if (ib_dma_mapping_error(xprt->sc_cm_id->device,
sge[sge_no].addr))
goto err;
@@ -494,7 +528,8 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
* In all three cases, this function prepares the RPCRDMA header in
* sge[0], the 'type' parameter indicates the type to place in the
* RPCRDMA header, and the 'byte_count' field indicates how much of
- * the XDR to include in this RDMA_SEND.
+ * the XDR to include in this RDMA_SEND. NB: The offset of the payload
+ * to send is zero in the XDR.
*/
static int send_reply(struct svcxprt_rdma *rdma,
struct svc_rqst *rqstp,
@@ -536,23 +571,24 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
ctxt->sge[0].length = svc_rdma_xdr_get_reply_hdr_len(rdma_resp);
ctxt->sge[0].addr =
- ib_dma_map_single(rdma->sc_cm_id->device, page_address(page),
- ctxt->sge[0].length, DMA_TO_DEVICE);
+ ib_dma_map_page(rdma->sc_cm_id->device, page, 0,
+ ctxt->sge[0].length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(rdma->sc_cm_id->device, ctxt->sge[0].addr))
goto err;
atomic_inc(&rdma->sc_dma_used);

ctxt->direction = DMA_TO_DEVICE;

- /* Determine how many of our SGE are to be transmitted */
+ /* Map the payload indicated by 'byte_count' */
for (sge_no = 1; byte_count && sge_no < vec->count; sge_no++) {
+ int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
if (!vec->frmr) {
ctxt->sge[sge_no].addr =
- ib_dma_map_single(rdma->sc_cm_id->device,
- vec->sge[sge_no].iov_base,
- sge_bytes, DMA_TO_DEVICE);
+ dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
if (ib_dma_mapping_error(rdma->sc_cm_id->device,
ctxt->sge[sge_no].addr))
goto err;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index edea15a..23f90c3 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -120,7 +120,7 @@ void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt)
*/
if (ctxt->sge[i].lkey == xprt->sc_dma_lkey) {
atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ib_dma_unmap_page(xprt->sc_cm_id->device,
ctxt->sge[i].addr,
ctxt->sge[i].length,
ctxt->direction);
@@ -502,8 +502,8 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
BUG_ON(sge_no >= xprt->sc_max_sge);
page = svc_rdma_get_page();
ctxt->pages[sge_no] = page;
- pa = ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(page), PAGE_SIZE,
+ pa = ib_dma_map_page(xprt->sc_cm_id->device,
+ page, 0, PAGE_SIZE,
DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, pa))
goto err_put_ctxt;
@@ -798,8 +798,8 @@ static void frmr_unmap_dma(struct svcxprt_rdma *xprt,
if (ib_dma_mapping_error(frmr->mr->device, addr))
continue;
atomic_dec(&xprt->sc_dma_used);
- ib_dma_unmap_single(frmr->mr->device, addr, PAGE_SIZE,
- frmr->direction);
+ ib_dma_unmap_page(frmr->mr->device, addr, PAGE_SIZE,
+ frmr->direction);
}
}

@@ -1274,7 +1274,7 @@ int svc_rdma_send(struct svcxprt_rdma *xprt, struct ib_send_wr *wr)
atomic_read(&xprt->sc_sq_count) <
xprt->sc_sq_depth);
if (test_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags))
- return 0;
+ return -ENOTCONN;
continue;
}
/* Take a transport ref for each WR posted */
@@ -1320,8 +1320,8 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);

/* Prepare SGE for local address */
- sge.addr = ib_dma_map_single(xprt->sc_cm_id->device,
- page_address(p), PAGE_SIZE, DMA_FROM_DEVICE);
+ sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
+ p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
put_page(p);
return;
@@ -1348,7 +1348,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if (ret) {
dprintk("svcrdma: Error %d posting send for protocol error\n",
ret);
- ib_dma_unmap_single(xprt->sc_cm_id->device,
+ ib_dma_unmap_page(xprt->sc_cm_id->device,
sge.addr, PAGE_SIZE,
DMA_FROM_DEVICE);
svc_rdma_put_context(ctxt, 1);


2010-08-03 16:45:18

by Tom Tucker

[permalink] [raw]
Subject: [PATCH 4/4] rpcrdma: Fix SQ size calculation when memreg is FRMR

This patch updates the computation to include the worst case situation
where three FRMR are required to map a single RPC REQ.

Signed-off-by: Tom Tucker <[email protected]>
---

net/sunrpc/xprtrdma/rpc_rdma.c | 2 ++
net/sunrpc/xprtrdma/verbs.c | 20 ++++++++++++++++----
2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index e5e28d1..2ac3f6e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
req->rl_nchunks = nchunks;

BUG_ON(nchunks == 0);
+ BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
+ && (nchunks > 3));

/*
* finish off header. If write, marshal discrim and nchunks.
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3bdbd9f..5f4c7b3 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
- /* Add room for frmr register and invalidate WRs */
- ep->rep_attr.cap.max_send_wr *= 3;
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
- return -EINVAL;
+ /* Add room for frmr register and invalidate WRs.
+ * 1. FRMR reg WR for head
+ * 2. FRMR invalidate WR for head
+ * 3. FRMR reg WR for pagelist
+ * 4. FRMR invalidate WR for pagelist
+ * 5. FRMR reg WR for tail
+ * 6. FRMR invalidate WR for tail
+ * 7. The RDMA_SEND WR
+ */
+ ep->rep_attr.cap.max_send_wr *= 7;
+ if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
+ cdata->max_requests = devattr.max_qp_wr / 7;
+ if (!cdata->max_requests)
+ return -EINVAL;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ }
break;
case RPCRDMA_MEMWINDOWS_ASYNC:
case RPCRDMA_MEMWINDOWS:


2010-08-04 03:21:42

by Tom Tucker

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpcrdma: Fix SQ size calculation when memreg is FRMR

Trond Myklebust wrote:
> Hi Tom
>
> On Tue, 2010-08-03 at 11:45 -0500, Tom Tucker wrote:
>
>> This patch updates the computation to include the worst case situation
>> where three FRMR are required to map a single RPC REQ.
>>
>> Signed-off-by: Tom Tucker <[email protected]>
>> ---
>>
>> net/sunrpc/xprtrdma/rpc_rdma.c | 2 ++
>> net/sunrpc/xprtrdma/verbs.c | 20 ++++++++++++++++----
>> 2 files changed, 18 insertions(+), 4 deletions(-)
>>
>> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
>> index e5e28d1..2ac3f6e 100644
>> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
>> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
>> @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
>> req->rl_nchunks = nchunks;
>>
>> BUG_ON(nchunks == 0);
>> + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
>> + && (nchunks > 3));
>>
>
> Where does the number 3 come from?
>

The assumption below is based on the premise that there is one chunk for
the header, one for the pagelist, and one for the tail == 3. I guess I
should have a comment -- sorry :-\

>
>> /*
>> * finish off header. If write, marshal discrim and nchunks.
>> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
>> index 3bdbd9f..5f4c7b3 100644
>> --- a/net/sunrpc/xprtrdma/verbs.c
>> +++ b/net/sunrpc/xprtrdma/verbs.c
>> @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
>> ep->rep_attr.cap.max_send_wr = cdata->max_requests;
>> switch (ia->ri_memreg_strategy) {
>> case RPCRDMA_FRMR:
>> - /* Add room for frmr register and invalidate WRs */
>> - ep->rep_attr.cap.max_send_wr *= 3;
>> - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
>> - return -EINVAL;
>> + /* Add room for frmr register and invalidate WRs.
>> + * 1. FRMR reg WR for head
>> + * 2. FRMR invalidate WR for head
>> + * 3. FRMR reg WR for pagelist
>> + * 4. FRMR invalidate WR for pagelist
>> + * 5. FRMR reg WR for tail
>> + * 6. FRMR invalidate WR for tail
>> + * 7. The RDMA_SEND WR
>> + */
>> + ep->rep_attr.cap.max_send_wr *= 7;
>> + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
>> + cdata->max_requests = devattr.max_qp_wr / 7;
>> + if (!cdata->max_requests)
>> + return -EINVAL;
>> + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
>> + }
>> break;
>> case RPCRDMA_MEMWINDOWS_ASYNC:
>> case RPCRDMA_MEMWINDOWS:
>>
>>
>
>


2010-08-04 03:27:00

by Tom Tucker

[permalink] [raw]
Subject: Re: [PATCH 0/4] nfsrdma: Miscellanseous Bug Fixes

Hi Trond,

Sorry my last note avoided answering the actual question.

Trond Myklebust wrote:
> On Tue, 2010-08-03 at 11:44 -0500, Tom Tucker wrote:
>
>> The following series fixes a number of bugs in the NFSRDMA transport.
>>
>> ---
>>
>> Steve Wise (1):
>> xprtrdma: Do not truncate iova_start values in frmr registrations.
>>
>>

client bug.

>> Tom Tucker (3):
>> rpcrdma: Fix SQ size calculation when memreg is FRMR
>>
client bug.

>> svcrdma: Cleanup DMA unmapping in error paths.
>> svcrdma: Change DMA mapping logic to avoid the page_address kernel API
>>
>>
server bugs.
>> net/sunrpc/xprtrdma/rpc_rdma.c | 2 +
>> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 19 ++++---
>> net/sunrpc/xprtrdma/svc_rdma_sendto.c | 82 ++++++++++++++++++++++--------
>> net/sunrpc/xprtrdma/svc_rdma_transport.c | 41 +++++++--------
>> net/sunrpc/xprtrdma/verbs.c | 22 ++++++--
>> 5 files changed, 111 insertions(+), 55 deletions(-)
>>
>
> Hi Tom,
>
> How do you want to split these patches? The first one looks definitely
> like a client bug, and the next 2 look like server bugs, but what about
> the last patch?
>
> Cheers
> Trond
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>


2010-08-04 03:24:32

by Tom Tucker

[permalink] [raw]
Subject: Re: [PATCH 0/4] nfsrdma: Miscellanseous Bug Fixes

Trond Myklebust wrote:
> On Tue, 2010-08-03 at 11:44 -0500, Tom Tucker wrote:
>
>> The following series fixes a number of bugs in the NFSRDMA transport.
>>
>> ---
>>
>> Steve Wise (1):
>> xprtrdma: Do not truncate iova_start values in frmr registrations.
>>
>> Tom Tucker (3):
>> rpcrdma: Fix SQ size calculation when memreg is FRMR
>> svcrdma: Cleanup DMA unmapping in error paths.
>> svcrdma: Change DMA mapping logic to avoid the page_address kernel API
>>
>>
>> net/sunrpc/xprtrdma/rpc_rdma.c | 2 +
>> net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 19 ++++---
>> net/sunrpc/xprtrdma/svc_rdma_sendto.c | 82 ++++++++++++++++++++++--------
>> net/sunrpc/xprtrdma/svc_rdma_transport.c | 41 +++++++--------
>> net/sunrpc/xprtrdma/verbs.c | 22 ++++++--
>> 5 files changed, 111 insertions(+), 55 deletions(-)
>>
>
> Hi Tom,
>
> How do you want to split these patches? The first one looks definitely
> like a client bug, and the next 2 look like server bugs, but what about
> the last patch?
>
>
Yes, I wrestled with that, but decided to post them as a collection
since it's the 'same transport' for review. But obviously, the client
ones will go to your tree and the server ones to Bruce's tree. I'll
split and repost the final after the review.

Will that work?

Tom

> Cheers
> Trond
>


2010-08-03 16:45:02

by Tom Tucker

[permalink] [raw]
Subject: [PATCH 1/4] xprtrdma: Do not truncate iova_start values in frmr registrations.

From: Steve Wise <[email protected]>

A bad cast causes the iova_start, which in this case is a 64b DMA
bus address, to be truncated on 32b systems. This breaks frmrs on
32b systems. No cast is needed.

Signed-off-by: Steve Wise <[email protected]>
---

net/sunrpc/xprtrdma/verbs.c | 2 +-
1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 27015c6..3bdbd9f 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1490,7 +1490,7 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.opcode = IB_WR_FAST_REG_MR;
frmr_wr.send_flags = 0; /* unsignaled */
- frmr_wr.wr.fast_reg.iova_start = (unsigned long)seg1->mr_dma;
+ frmr_wr.wr.fast_reg.iova_start = seg1->mr_dma;
frmr_wr.wr.fast_reg.page_list = seg1->mr_chunk.rl_mw->r.frmr.fr_pgl;
frmr_wr.wr.fast_reg.page_list_len = i;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;


2010-08-03 16:45:12

by Tom Tucker

[permalink] [raw]
Subject: [PATCH 3/4] svcrdma: Cleanup DMA unmapping in error paths.

There are several error paths in the code that do not unmap DMA. This
patch adds calls to svc_rdma_unmap_dma to free these DMA contexts.

Signed-off-by: Tom Tucker <[email protected]>
---

net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 1 +
net/sunrpc/xprtrdma/svc_rdma_sendto.c | 2 ++
net/sunrpc/xprtrdma/svc_rdma_transport.c | 29 ++++++++++++++---------------
3 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 926bdb4..df67211 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -495,6 +495,7 @@ next_sge:
printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
err);
set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 0);
goto out;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index d4f5e0e..249a835 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -367,6 +367,8 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
goto err;
return 0;
err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 23f90c3..d22a44d 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -511,9 +511,9 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
ctxt->sge[sge_no].addr = pa;
ctxt->sge[sge_no].length = PAGE_SIZE;
ctxt->sge[sge_no].lkey = xprt->sc_dma_lkey;
+ ctxt->count = sge_no + 1;
buflen += PAGE_SIZE;
}
- ctxt->count = sge_no;
recv_wr.next = NULL;
recv_wr.sg_list = &ctxt->sge[0];
recv_wr.num_sge = ctxt->count;
@@ -529,6 +529,7 @@ int svc_rdma_post_recv(struct svcxprt_rdma *xprt)
return ret;

err_put_ctxt:
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
return -ENOMEM;
}
@@ -1306,7 +1307,6 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
enum rpcrdma_errcode err)
{
struct ib_send_wr err_wr;
- struct ib_sge sge;
struct page *p;
struct svc_rdma_op_ctxt *ctxt;
u32 *va;
@@ -1319,26 +1319,27 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
/* XDR encode error */
length = svc_rdma_xdr_encode_error(xprt, rmsgp, err, va);

+ ctxt = svc_rdma_get_context(xprt);
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->count = 1;
+ ctxt->pages[0] = p;
+
/* Prepare SGE for local address */
- sge.addr = ib_dma_map_page(xprt->sc_cm_id->device,
- p, 0, PAGE_SIZE, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device, sge.addr)) {
+ ctxt->sge[0].addr = ib_dma_map_page(xprt->sc_cm_id->device,
+ p, 0, length, DMA_FROM_DEVICE);
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device, ctxt->sge[0].addr)) {
put_page(p);
return;
}
atomic_inc(&xprt->sc_dma_used);
- sge.lkey = xprt->sc_dma_lkey;
- sge.length = length;
-
- ctxt = svc_rdma_get_context(xprt);
- ctxt->count = 1;
- ctxt->pages[0] = p;
+ ctxt->sge[0].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[0].length = length;

/* Prepare SEND WR */
memset(&err_wr, 0, sizeof err_wr);
ctxt->wr_op = IB_WR_SEND;
err_wr.wr_id = (unsigned long)ctxt;
- err_wr.sg_list = &sge;
+ err_wr.sg_list = ctxt->sge;
err_wr.num_sge = 1;
err_wr.opcode = IB_WR_SEND;
err_wr.send_flags = IB_SEND_SIGNALED;
@@ -1348,9 +1349,7 @@ void svc_rdma_send_error(struct svcxprt_rdma *xprt, struct rpcrdma_msg *rmsgp,
if (ret) {
dprintk("svcrdma: Error %d posting send for protocol error\n",
ret);
- ib_dma_unmap_page(xprt->sc_cm_id->device,
- sge.addr, PAGE_SIZE,
- DMA_FROM_DEVICE);
+ svc_rdma_unmap_dma(ctxt);
svc_rdma_put_context(ctxt, 1);
}
}


2010-08-04 02:40:32

by Myklebust, Trond

[permalink] [raw]
Subject: Re: [PATCH 4/4] rpcrdma: Fix SQ size calculation when memreg is FRMR

Hi Tom

On Tue, 2010-08-03 at 11:45 -0500, Tom Tucker wrote:
> This patch updates the computation to include the worst case situation
> where three FRMR are required to map a single RPC REQ.
>
> Signed-off-by: Tom Tucker <[email protected]>
> ---
>
> net/sunrpc/xprtrdma/rpc_rdma.c | 2 ++
> net/sunrpc/xprtrdma/verbs.c | 20 ++++++++++++++++----
> 2 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
> index e5e28d1..2ac3f6e 100644
> --- a/net/sunrpc/xprtrdma/rpc_rdma.c
> +++ b/net/sunrpc/xprtrdma/rpc_rdma.c
> @@ -249,6 +249,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
> req->rl_nchunks = nchunks;
>
> BUG_ON(nchunks == 0);
> + BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
> + && (nchunks > 3));

Where does the number 3 come from?

> /*
> * finish off header. If write, marshal discrim and nchunks.
> diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
> index 3bdbd9f..5f4c7b3 100644
> --- a/net/sunrpc/xprtrdma/verbs.c
> +++ b/net/sunrpc/xprtrdma/verbs.c
> @@ -650,10 +650,22 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
> ep->rep_attr.cap.max_send_wr = cdata->max_requests;
> switch (ia->ri_memreg_strategy) {
> case RPCRDMA_FRMR:
> - /* Add room for frmr register and invalidate WRs */
> - ep->rep_attr.cap.max_send_wr *= 3;
> - if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
> - return -EINVAL;
> + /* Add room for frmr register and invalidate WRs.
> + * 1. FRMR reg WR for head
> + * 2. FRMR invalidate WR for head
> + * 3. FRMR reg WR for pagelist
> + * 4. FRMR invalidate WR for pagelist
> + * 5. FRMR reg WR for tail
> + * 6. FRMR invalidate WR for tail
> + * 7. The RDMA_SEND WR
> + */
> + ep->rep_attr.cap.max_send_wr *= 7;
> + if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
> + cdata->max_requests = devattr.max_qp_wr / 7;
> + if (!cdata->max_requests)
> + return -EINVAL;
> + ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
> + }
> break;
> case RPCRDMA_MEMWINDOWS_ASYNC:
> case RPCRDMA_MEMWINDOWS:
>