Adopt the use of xprt_pin_rqst to eliminate contention between
Call-side users of rb_lock and the use of rb_lock in
rpcrdma_reply_handler.
This replaces the mechanism introduced in 431af645cf66 ("xprtrdma:
Fix client lock-up after application signal fires").
Use recv_lock to quickly find the completing rqst, pin it, then
drop the lock. At that point invalidation and pull-up of the Reply
XDR can be done. Both are often expensive operations.
Finally, take recv_lock again to signal completion to the RPC
layer. It also protects adjustment of "cwnd".
This greatly reduces the amount of time a lock is held by the
reply handler. Comparing lock_stat results shows a marked decrease
in contention on rb_lock and recv_lock.
Signed-off-by: Chuck Lever <[email protected]>
---
Hi-
If Trond's lock contention series is going into v4.14, I'd like you
to consider this one (applied at the end of that series) as well.
Without it, NFS/RDMA performance regresses a bit after the
first xprt_pin_rqst patch is applied. Thanks!
net/sunrpc/xprt.c | 2 +
net/sunrpc/xprtrdma/rpc_rdma.c | 57 ++++++++++-----------------------------
net/sunrpc/xprtrdma/transport.c | 1 -
net/sunrpc/xprtrdma/verbs.c | 1 -
net/sunrpc/xprtrdma/xprt_rdma.h | 30 ---------------------
5 files changed, 16 insertions(+), 75 deletions(-)
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 2af189c..e741ec2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -855,6 +855,7 @@ void xprt_pin_rqst(struct rpc_rqst *req)
{
set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
}
+EXPORT_SYMBOL_GPL(xprt_pin_rqst);
/**
* xprt_unpin_rqst - Unpin a request on the transport receive list
@@ -870,6 +871,7 @@ void xprt_unpin_rqst(struct rpc_rqst *req)
if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
}
+EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
__must_hold(&req->rq_xprt->recv_lock)
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index dfa748a..577a29e 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -734,9 +734,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
rpclen = 0;
}
- req->rl_xid = rqst->rq_xid;
- rpcrdma_insert_req(&r_xprt->rx_buf, req);
-
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -991,7 +988,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpcrdma_msg *headerp;
struct rpcrdma_req *req;
@@ -999,7 +995,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
__be32 *iptr;
int rdmalen, status, rmerr;
unsigned long cwnd;
- struct list_head mws;
dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
@@ -1017,22 +1012,14 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
- spin_lock(&buf->rb_lock);
- req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf,
- headerp->rm_xid);
- if (!req)
- goto out_nomatch;
- if (req->rl_reply)
- goto out_duplicate;
-
- list_replace_init(&req->rl_registered, &mws);
- rpcrdma_mark_remote_invalidation(&mws, rep);
-
- /* Avoid races with signals and duplicate replies
- * by marking this req as matched.
- */
+ spin_lock(&xprt->recv_lock);
+ rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
+ if (!rqst)
+ goto out_norqst;
+ xprt_pin_rqst(rqst);
+ spin_unlock(&xprt->recv_lock);
+ req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
- spin_unlock(&buf->rb_lock);
dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
__func__, rep, req, be32_to_cpu(headerp->rm_xid));
@@ -1044,17 +1031,12 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
* waking the next RPC waits until this RPC has relinquished
* all its Send Queue entries.
*/
- if (!list_empty(&mws))
- r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws);
+ if (!list_empty(&req->rl_registered)) {
+ rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
+ r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt,
+ &req->rl_registered);
+ }
- /* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
- */
- spin_lock(&xprt->recv_lock);
- rqst = xprt_lookup_rqst(xprt, headerp->rm_xid);
- if (!rqst)
- goto out_norqst;
xprt->reestablish_timeout = 0;
if (headerp->rm_vers != rpcrdma_version)
goto out_badversion;
@@ -1130,12 +1112,14 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
}
out:
+ spin_lock(&xprt->recv_lock);
cwnd = xprt->cwnd;
xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
if (xprt->cwnd > cwnd)
xprt_release_rqst_cong(rqst->rq_task);
xprt_complete_rqst(rqst->rq_task, status);
+ xprt_unpin_rqst(rqst);
spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
@@ -1202,19 +1186,6 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
dprintk("RPC: %s: short/invalid reply\n", __func__);
goto repost;
-out_nomatch:
- spin_unlock(&buf->rb_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n",
- __func__, be32_to_cpu(headerp->rm_xid),
- rep->rr_len);
- goto repost;
-
-out_duplicate:
- spin_unlock(&buf->rb_lock);
- dprintk("RPC: %s: "
- "duplicate reply %p to RPC request %p: xid 0x%08x\n",
- __func__, rep, req, be32_to_cpu(headerp->rm_xid));
-
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.
*/
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index d1c458e..1cb1a07 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -684,7 +684,6 @@
dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply);
- rpcrdma_remove_req(&r_xprt->rx_buf, req);
if (!list_empty(&req->rl_registered))
ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task));
rpcrdma_unmap_sges(ia, req);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index e4171f2..23ec6ed 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1004,7 +1004,6 @@ struct rpcrdma_rep *
spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mws);
INIT_LIST_HEAD(&buf->rb_all);
- INIT_LIST_HEAD(&buf->rb_pending);
INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index b282d3f..ad918c8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -341,7 +341,6 @@ enum {
struct rpcrdma_buffer;
struct rpcrdma_req {
struct list_head rl_list;
- __be32 rl_xid;
unsigned int rl_mapped_sges;
unsigned int rl_connect_cookie;
struct rpcrdma_buffer *rl_buffer;
@@ -403,7 +402,6 @@ struct rpcrdma_buffer {
int rb_send_count, rb_recv_count;
struct list_head rb_send_bufs;
struct list_head rb_recv_bufs;
- struct list_head rb_pending;
u32 rb_max_requests;
atomic_t rb_credits; /* most recent credit grant */
@@ -552,34 +550,6 @@ int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
int rpcrdma_buffer_create(struct rpcrdma_xprt *);
void rpcrdma_buffer_destroy(struct rpcrdma_buffer *);
-static inline void
-rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
- spin_lock(&buffers->rb_lock);
- if (list_empty(&req->rl_list))
- list_add_tail(&req->rl_list, &buffers->rb_pending);
- spin_unlock(&buffers->rb_lock);
-}
-
-static inline struct rpcrdma_req *
-rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid)
-{
- struct rpcrdma_req *pos;
-
- list_for_each_entry(pos, &buffers->rb_pending, rl_list)
- if (pos->rl_xid == xid)
- return pos;
- return NULL;
-}
-
-static inline void
-rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req)
-{
- spin_lock(&buffers->rb_lock);
- list_del(&req->rl_list);
- spin_unlock(&buffers->rb_lock);
-}
-
struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *);
void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *);
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
T24gV2VkLCAyMDE3LTA4LTIzIGF0IDE3OjA1IC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
QWRvcHQgdGhlIHVzZSBvZiB4cHJ0X3Bpbl9ycXN0IHRvIGVsaW1pbmF0ZSBjb250ZW50aW9uIGJl
dHdlZW4NCj4gQ2FsbC1zaWRlIHVzZXJzIG9mIHJiX2xvY2sgYW5kIHRoZSB1c2Ugb2YgcmJfbG9j
ayBpbg0KPiBycGNyZG1hX3JlcGx5X2hhbmRsZXIuDQo+IA0KPiBUaGlzIHJlcGxhY2VzIHRoZSBt
ZWNoYW5pc20gaW50cm9kdWNlZCBpbiA0MzFhZjY0NWNmNjYgKCJ4cHJ0cmRtYToNCj4gRml4IGNs
aWVudCBsb2NrLXVwIGFmdGVyIGFwcGxpY2F0aW9uIHNpZ25hbCBmaXJlcyIpLg0KPiANCj4gVXNl
IHJlY3ZfbG9jayB0byBxdWlja2x5IGZpbmQgdGhlIGNvbXBsZXRpbmcgcnFzdCwgcGluIGl0LCB0
aGVuDQo+IGRyb3AgdGhlIGxvY2suIEF0IHRoYXQgcG9pbnQgaW52YWxpZGF0aW9uIGFuZCBwdWxs
LXVwIG9mIHRoZSBSZXBseQ0KPiBYRFIgY2FuIGJlIGRvbmUuIEJvdGggYXJlIG9mdGVuIGV4cGVu
c2l2ZSBvcGVyYXRpb25zLg0KPiANCj4gRmluYWxseSwgdGFrZSByZWN2X2xvY2sgYWdhaW4gdG8g
c2lnbmFsIGNvbXBsZXRpb24gdG8gdGhlIFJQQw0KPiBsYXllci4gSXQgYWxzbyBwcm90ZWN0cyBh
ZGp1c3RtZW50IG9mICJjd25kIi4NCj4gDQo+IFRoaXMgZ3JlYXRseSByZWR1Y2VzIHRoZSBhbW91
bnQgb2YgdGltZSBhIGxvY2sgaXMgaGVsZCBieSB0aGUNCj4gcmVwbHkgaGFuZGxlci4gQ29tcGFy
aW5nIGxvY2tfc3RhdCByZXN1bHRzIHNob3dzIGEgbWFya2VkIGRlY3JlYXNlDQo+IGluIGNvbnRl
bnRpb24gb24gcmJfbG9jayBhbmQgcmVjdl9sb2NrLg0KPiANCj4gU2lnbmVkLW9mZi1ieTogQ2h1
Y2sgTGV2ZXIgPGNodWNrLmxldmVyQG9yYWNsZS5jb20+DQo+IC0tLQ0KPiBIaS0NCj4gDQo+IElm
IFRyb25kJ3MgbG9jayBjb250ZW50aW9uIHNlcmllcyBpcyBnb2luZyBpbnRvIHY0LjE0LCBJJ2Qg
bGlrZSB5b3UNCj4gdG8gY29uc2lkZXIgdGhpcyBvbmUgKGFwcGxpZWQgYXQgdGhlIGVuZCBvZiB0
aGF0IHNlcmllcykgYXMgd2VsbC4NCj4gV2l0aG91dCBpdCwgTkZTL1JETUEgcGVyZm9ybWFuY2Ug
cmVncmVzc2VzIGEgYml0IGFmdGVyIHRoZQ0KPiBmaXJzdCB4cHJ0X3Bpbl9ycXN0IHBhdGNoIGlz
IGFwcGxpZWQuIFRoYW5rcyENCj4gDQoNCklmIEFubmEgaXMgT0sgd2l0aCBpdCwgdGhlbiBJIGNh
biBhcHBseSB0aGlzIHBhdGNoIG9uY2Ugc2hlJ3Mgc2VudCBtZQ0KdGhlIHB1bGwgcmVxdWVzdCBm
b3IgdGhlIG90aGVyIFJETUEgY2xpZW50IHBhdGNoZXMuDQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0
DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWtsZWJ1
c3RAcHJpbWFyeWRhdGEuY29tDQo=
On 08/23/2017 05:40 PM, Trond Myklebust wrote:
> On Wed, 2017-08-23 at 17:05 -0400, Chuck Lever wrote:
>> Adopt the use of xprt_pin_rqst to eliminate contention between
>> Call-side users of rb_lock and the use of rb_lock in
>> rpcrdma_reply_handler.
>>
>> This replaces the mechanism introduced in 431af645cf66 ("xprtrdma:
>> Fix client lock-up after application signal fires").
>>
>> Use recv_lock to quickly find the completing rqst, pin it, then
>> drop the lock. At that point invalidation and pull-up of the Reply
>> XDR can be done. Both are often expensive operations.
>>
>> Finally, take recv_lock again to signal completion to the RPC
>> layer. It also protects adjustment of "cwnd".
>>
>> This greatly reduces the amount of time a lock is held by the
>> reply handler. Comparing lock_stat results shows a marked decrease
>> in contention on rb_lock and recv_lock.
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> Hi-
>>
>> If Trond's lock contention series is going into v4.14, I'd like you
>> to consider this one (applied at the end of that series) as well.
>> Without it, NFS/RDMA performance regresses a bit after the
>> first xprt_pin_rqst patch is applied. Thanks!
>>
>
> If Anna is OK with it, then I can apply this patch once she's sent me
> the pull request for the other RDMA client patches.
Works for me. Thanks!
>
T24gVGh1LCAyMDE3LTA4LTI0IGF0IDE0OjE4IC0wNDAwLCBBbm5hIFNjaHVtYWtlciB3cm90ZToN
Cj4gDQo+IE9uIDA4LzIzLzIwMTcgMDU6NDAgUE0sIFRyb25kIE15a2xlYnVzdCB3cm90ZToNCj4g
PiBPbiBXZWQsIDIwMTctMDgtMjMgYXQgMTc6MDUgLTA0MDAsIENodWNrIExldmVyIHdyb3RlOg0K
PiA+ID4gQWRvcHQgdGhlIHVzZSBvZiB4cHJ0X3Bpbl9ycXN0IHRvIGVsaW1pbmF0ZSBjb250ZW50
aW9uIGJldHdlZW4NCj4gPiA+IENhbGwtc2lkZSB1c2VycyBvZiByYl9sb2NrIGFuZCB0aGUgdXNl
IG9mIHJiX2xvY2sgaW4NCj4gPiA+IHJwY3JkbWFfcmVwbHlfaGFuZGxlci4NCj4gPiA+IA0KPiA+
ID4gVGhpcyByZXBsYWNlcyB0aGUgbWVjaGFuaXNtIGludHJvZHVjZWQgaW4gNDMxYWY2NDVjZjY2
DQo+ID4gPiAoInhwcnRyZG1hOg0KPiA+ID4gRml4IGNsaWVudCBsb2NrLXVwIGFmdGVyIGFwcGxp
Y2F0aW9uIHNpZ25hbCBmaXJlcyIpLg0KPiA+ID4gDQo+ID4gPiBVc2UgcmVjdl9sb2NrIHRvIHF1
aWNrbHkgZmluZCB0aGUgY29tcGxldGluZyBycXN0LCBwaW4gaXQsIHRoZW4NCj4gPiA+IGRyb3Ag
dGhlIGxvY2suIEF0IHRoYXQgcG9pbnQgaW52YWxpZGF0aW9uIGFuZCBwdWxsLXVwIG9mIHRoZQ0K
PiA+ID4gUmVwbHkNCj4gPiA+IFhEUiBjYW4gYmUgZG9uZS4gQm90aCBhcmUgb2Z0ZW4gZXhwZW5z
aXZlIG9wZXJhdGlvbnMuDQo+ID4gPiANCj4gPiA+IEZpbmFsbHksIHRha2UgcmVjdl9sb2NrIGFn
YWluIHRvIHNpZ25hbCBjb21wbGV0aW9uIHRvIHRoZSBSUEMNCj4gPiA+IGxheWVyLiBJdCBhbHNv
IHByb3RlY3RzIGFkanVzdG1lbnQgb2YgImN3bmQiLg0KPiA+ID4gDQo+ID4gPiBUaGlzIGdyZWF0
bHkgcmVkdWNlcyB0aGUgYW1vdW50IG9mIHRpbWUgYSBsb2NrIGlzIGhlbGQgYnkgdGhlDQo+ID4g
PiByZXBseSBoYW5kbGVyLiBDb21wYXJpbmcgbG9ja19zdGF0IHJlc3VsdHMgc2hvd3MgYSBtYXJr
ZWQNCj4gPiA+IGRlY3JlYXNlDQo+ID4gPiBpbiBjb250ZW50aW9uIG9uIHJiX2xvY2sgYW5kIHJl
Y3ZfbG9jay4NCj4gPiA+IA0KPiA+ID4gU2lnbmVkLW9mZi1ieTogQ2h1Y2sgTGV2ZXIgPGNodWNr
LmxldmVyQG9yYWNsZS5jb20+DQo+ID4gPiAtLS0NCj4gPiA+IEhpLQ0KPiA+ID4gDQo+ID4gPiBJ
ZiBUcm9uZCdzIGxvY2sgY29udGVudGlvbiBzZXJpZXMgaXMgZ29pbmcgaW50byB2NC4xNCwgSSdk
IGxpa2UNCj4gPiA+IHlvdQ0KPiA+ID4gdG8gY29uc2lkZXIgdGhpcyBvbmUgKGFwcGxpZWQgYXQg
dGhlIGVuZCBvZiB0aGF0IHNlcmllcykgYXMgd2VsbC4NCj4gPiA+IFdpdGhvdXQgaXQsIE5GUy9S
RE1BIHBlcmZvcm1hbmNlIHJlZ3Jlc3NlcyBhIGJpdCBhZnRlciB0aGUNCj4gPiA+IGZpcnN0IHhw
cnRfcGluX3Jxc3QgcGF0Y2ggaXMgYXBwbGllZC4gVGhhbmtzIQ0KPiA+ID4gDQo+ID4gDQo+ID4g
SWYgQW5uYSBpcyBPSyB3aXRoIGl0LCB0aGVuIEkgY2FuIGFwcGx5IHRoaXMgcGF0Y2ggb25jZSBz
aGUncyBzZW50DQo+ID4gbWUNCj4gPiB0aGUgcHVsbCByZXF1ZXN0IGZvciB0aGUgb3RoZXIgUkRN
QSBjbGllbnQgcGF0Y2hlcy4NCj4gDQo+IFdvcmtzIGZvciBtZS4gIFRoYW5rcyENCg0KUGxlYXNl
IGNhbiB5b3UgYm90aCBjaGVjayBodHRwOi8vZ2l0LmxpbnV4LW5mcy5vcmcvP3A9dHJvbmRteS9s
aW51eC1uZnMNCi5naXQ7YT1jb21taXQ7aD1iMWRhNWQ5MDg1N2VlNGI4ZjVhY2VlMTE0MzcwNTQx
MmIxNmZjZTU2IGFuZCBzZWUgaWYgYWxsDQppcyB3ZWxsPw0KDQpEbyBub3RlIHRoYXQgSSBkaWQg
cmVtb3ZlIHRoZSBjYWxsIHRvIHJwY3JkbWFfYnVmZmVyX3B1dCgpIHdoaWNoIHdhcw0KYmVpbmcg
Y2FsbGVkIHdpdGggYW4gdW5pbml0aWFsaXNlZCBhcmd1bWVudC4NCg0KQ2hlZXJzDQogIFRyb25k
DQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFBy
aW1hcnlEYXRhDQp0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=
On 09/05/2017 04:33 PM, Trond Myklebust wrote:
> On Thu, 2017-08-24 at 14:18 -0400, Anna Schumaker wrote:
>>
>> On 08/23/2017 05:40 PM, Trond Myklebust wrote:
>>> On Wed, 2017-08-23 at 17:05 -0400, Chuck Lever wrote:
>>>> Adopt the use of xprt_pin_rqst to eliminate contention between
>>>> Call-side users of rb_lock and the use of rb_lock in
>>>> rpcrdma_reply_handler.
>>>>
>>>> This replaces the mechanism introduced in 431af645cf66
>>>> ("xprtrdma:
>>>> Fix client lock-up after application signal fires").
>>>>
>>>> Use recv_lock to quickly find the completing rqst, pin it, then
>>>> drop the lock. At that point invalidation and pull-up of the
>>>> Reply
>>>> XDR can be done. Both are often expensive operations.
>>>>
>>>> Finally, take recv_lock again to signal completion to the RPC
>>>> layer. It also protects adjustment of "cwnd".
>>>>
>>>> This greatly reduces the amount of time a lock is held by the
>>>> reply handler. Comparing lock_stat results shows a marked
>>>> decrease
>>>> in contention on rb_lock and recv_lock.
>>>>
>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>> ---
>>>> Hi-
>>>>
>>>> If Trond's lock contention series is going into v4.14, I'd like
>>>> you
>>>> to consider this one (applied at the end of that series) as well.
>>>> Without it, NFS/RDMA performance regresses a bit after the
>>>> first xprt_pin_rqst patch is applied. Thanks!
>>>>
>>>
>>> If Anna is OK with it, then I can apply this patch once she's sent
>>> me
>>> the pull request for the other RDMA client patches.
>>
>> Works for me. Thanks!
>
> Please can you both check http://git.linux-nfs.org/?p=trondmy/linux-nfs
> .git;a=commit;h=b1da5d90857ee4b8f5acee1143705412b16fce56 and see if all
> is well?
Looks okay to me.
Anna
>
> Do note that I did remove the call to rpcrdma_buffer_put() which was
> being called with an uninitialised argument.
>
> Cheers
> Trond
>
> On Sep 5, 2017, at 4:33 PM, Trond Myklebust <[email protected]> wrote:
>
> On Thu, 2017-08-24 at 14:18 -0400, Anna Schumaker wrote:
>>
>> On 08/23/2017 05:40 PM, Trond Myklebust wrote:
>>> On Wed, 2017-08-23 at 17:05 -0400, Chuck Lever wrote:
>>>> Adopt the use of xprt_pin_rqst to eliminate contention between
>>>> Call-side users of rb_lock and the use of rb_lock in
>>>> rpcrdma_reply_handler.
>>>>
>>>> This replaces the mechanism introduced in 431af645cf66
>>>> ("xprtrdma:
>>>> Fix client lock-up after application signal fires").
>>>>
>>>> Use recv_lock to quickly find the completing rqst, pin it, then
>>>> drop the lock. At that point invalidation and pull-up of the
>>>> Reply
>>>> XDR can be done. Both are often expensive operations.
>>>>
>>>> Finally, take recv_lock again to signal completion to the RPC
>>>> layer. It also protects adjustment of "cwnd".
>>>>
>>>> This greatly reduces the amount of time a lock is held by the
>>>> reply handler. Comparing lock_stat results shows a marked
>>>> decrease
>>>> in contention on rb_lock and recv_lock.
>>>>
>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>> ---
>>>> Hi-
>>>>
>>>> If Trond's lock contention series is going into v4.14, I'd like
>>>> you
>>>> to consider this one (applied at the end of that series) as well.
>>>> Without it, NFS/RDMA performance regresses a bit after the
>>>> first xprt_pin_rqst patch is applied. Thanks!
>>>>
>>>
>>> If Anna is OK with it, then I can apply this patch once she's sent
>>> me
>>> the pull request for the other RDMA client patches.
>>
>> Works for me. Thanks!
>
> Please can you both check http://git.linux-nfs.org/?p=trondmy/linux-nfs
> .git;a=commit;h=b1da5d90857ee4b8f5acee1143705412b16fce56 and see if all
> is well?
>
> Do note that I did remove the call to rpcrdma_buffer_put() which was
> being called with an uninitialised argument.
Looking at that again. rpcrdma_buffer_put() does look unnecessary,
since out_norqst is being called now before "req" is initialized.
On further inspection, "return;" should be replaced with
"goto repost;". This is similar to "out_nomatch" and
"out_duplicate", which are both being replaced in this patch.
So, out_norqst should look something like this:
out_norqst:
spin_unlock(&xprt->recv_lock);
dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
__func__, be32_to_cpu(xid));
goto repost;
Otherwise, I don't see any obvious problems.
--
Chuck Lever
T24gVHVlLCAyMDE3LTA5LTA1IGF0IDE3OjA2IC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
PiBPbiBTZXAgNSwgMjAxNywgYXQgNDozMyBQTSwgVHJvbmQgTXlrbGVidXN0IDx0cm9uZG15QHBy
aW1hcnlkYXRhLmNvDQo+ID4gbT4gd3JvdGU6DQo+ID4gDQo+ID4gT24gVGh1LCAyMDE3LTA4LTI0
IGF0IDE0OjE4IC0wNDAwLCBBbm5hIFNjaHVtYWtlciB3cm90ZToNCj4gPiA+IA0KPiA+ID4gT24g
MDgvMjMvMjAxNyAwNTo0MCBQTSwgVHJvbmQgTXlrbGVidXN0IHdyb3RlOg0KPiA+ID4gPiBPbiBX
ZWQsIDIwMTctMDgtMjMgYXQgMTc6MDUgLTA0MDAsIENodWNrIExldmVyIHdyb3RlOg0KPiA+ID4g
PiA+IEFkb3B0IHRoZSB1c2Ugb2YgeHBydF9waW5fcnFzdCB0byBlbGltaW5hdGUgY29udGVudGlv
bg0KPiA+ID4gPiA+IGJldHdlZW4NCj4gPiA+ID4gPiBDYWxsLXNpZGUgdXNlcnMgb2YgcmJfbG9j
ayBhbmQgdGhlIHVzZSBvZiByYl9sb2NrIGluDQo+ID4gPiA+ID4gcnBjcmRtYV9yZXBseV9oYW5k
bGVyLg0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IFRoaXMgcmVwbGFjZXMgdGhlIG1lY2hhbmlzbSBp
bnRyb2R1Y2VkIGluIDQzMWFmNjQ1Y2Y2Ng0KPiA+ID4gPiA+ICgieHBydHJkbWE6DQo+ID4gPiA+
ID4gRml4IGNsaWVudCBsb2NrLXVwIGFmdGVyIGFwcGxpY2F0aW9uIHNpZ25hbCBmaXJlcyIpLg0K
PiA+ID4gPiA+IA0KPiA+ID4gPiA+IFVzZSByZWN2X2xvY2sgdG8gcXVpY2tseSBmaW5kIHRoZSBj
b21wbGV0aW5nIHJxc3QsIHBpbiBpdCwNCj4gPiA+ID4gPiB0aGVuDQo+ID4gPiA+ID4gZHJvcCB0
aGUgbG9jay4gQXQgdGhhdCBwb2ludCBpbnZhbGlkYXRpb24gYW5kIHB1bGwtdXAgb2YgdGhlDQo+
ID4gPiA+ID4gUmVwbHkNCj4gPiA+ID4gPiBYRFIgY2FuIGJlIGRvbmUuIEJvdGggYXJlIG9mdGVu
IGV4cGVuc2l2ZSBvcGVyYXRpb25zLg0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IEZpbmFsbHksIHRh
a2UgcmVjdl9sb2NrIGFnYWluIHRvIHNpZ25hbCBjb21wbGV0aW9uIHRvIHRoZSBSUEMNCj4gPiA+
ID4gPiBsYXllci4gSXQgYWxzbyBwcm90ZWN0cyBhZGp1c3RtZW50IG9mICJjd25kIi4NCj4gPiA+
ID4gPiANCj4gPiA+ID4gPiBUaGlzIGdyZWF0bHkgcmVkdWNlcyB0aGUgYW1vdW50IG9mIHRpbWUg
YSBsb2NrIGlzIGhlbGQgYnkgdGhlDQo+ID4gPiA+ID4gcmVwbHkgaGFuZGxlci4gQ29tcGFyaW5n
IGxvY2tfc3RhdCByZXN1bHRzIHNob3dzIGEgbWFya2VkDQo+ID4gPiA+ID4gZGVjcmVhc2UNCj4g
PiA+ID4gPiBpbiBjb250ZW50aW9uIG9uIHJiX2xvY2sgYW5kIHJlY3ZfbG9jay4NCj4gPiA+ID4g
PiANCj4gPiA+ID4gPiBTaWduZWQtb2ZmLWJ5OiBDaHVjayBMZXZlciA8Y2h1Y2subGV2ZXJAb3Jh
Y2xlLmNvbT4NCj4gPiA+ID4gPiAtLS0NCj4gPiA+ID4gPiBIaS0NCj4gPiA+ID4gPiANCj4gPiA+
ID4gPiBJZiBUcm9uZCdzIGxvY2sgY29udGVudGlvbiBzZXJpZXMgaXMgZ29pbmcgaW50byB2NC4x
NCwgSSdkDQo+ID4gPiA+ID4gbGlrZQ0KPiA+ID4gPiA+IHlvdQ0KPiA+ID4gPiA+IHRvIGNvbnNp
ZGVyIHRoaXMgb25lIChhcHBsaWVkIGF0IHRoZSBlbmQgb2YgdGhhdCBzZXJpZXMpIGFzDQo+ID4g
PiA+ID4gd2VsbC4NCj4gPiA+ID4gPiBXaXRob3V0IGl0LCBORlMvUkRNQSBwZXJmb3JtYW5jZSBy
ZWdyZXNzZXMgYSBiaXQgYWZ0ZXIgdGhlDQo+ID4gPiA+ID4gZmlyc3QgeHBydF9waW5fcnFzdCBw
YXRjaCBpcyBhcHBsaWVkLiBUaGFua3MhDQo+ID4gPiA+ID4gDQo+ID4gPiA+IA0KPiA+ID4gPiBJ
ZiBBbm5hIGlzIE9LIHdpdGggaXQsIHRoZW4gSSBjYW4gYXBwbHkgdGhpcyBwYXRjaCBvbmNlIHNo
ZSdzDQo+ID4gPiA+IHNlbnQNCj4gPiA+ID4gbWUNCj4gPiA+ID4gdGhlIHB1bGwgcmVxdWVzdCBm
b3IgdGhlIG90aGVyIFJETUEgY2xpZW50IHBhdGNoZXMuDQo+ID4gPiANCj4gPiA+IFdvcmtzIGZv
ciBtZS4gIFRoYW5rcyENCj4gPiANCj4gPiBQbGVhc2UgY2FuIHlvdSBib3RoIGNoZWNrIGh0dHA6
Ly9naXQubGludXgtbmZzLm9yZy8/cD10cm9uZG15L2xpbnV4DQo+ID4gLW5mcw0KPiA+IC5naXQ7
YT1jb21taXQ7aD1iMWRhNWQ5MDg1N2VlNGI4ZjVhY2VlMTE0MzcwNTQxMmIxNmZjZTU2IGFuZCBz
ZWUgaWYNCj4gPiBhbGwNCj4gPiBpcyB3ZWxsPw0KPiA+IA0KPiA+IERvIG5vdGUgdGhhdCBJIGRp
ZCByZW1vdmUgdGhlIGNhbGwgdG8gcnBjcmRtYV9idWZmZXJfcHV0KCkgd2hpY2gNCj4gPiB3YXMN
Cj4gPiBiZWluZyBjYWxsZWQgd2l0aCBhbiB1bmluaXRpYWxpc2VkIGFyZ3VtZW50Lg0KPiANCj4g
TG9va2luZyBhdCB0aGF0IGFnYWluLiBycGNyZG1hX2J1ZmZlcl9wdXQoKSBkb2VzIGxvb2sgdW5u
ZWNlc3NhcnksDQo+IHNpbmNlIG91dF9ub3Jxc3QgaXMgYmVpbmcgY2FsbGVkIG5vdyBiZWZvcmUg
InJlcSIgaXMgaW5pdGlhbGl6ZWQuDQo+IA0KPiBPbiBmdXJ0aGVyIGluc3BlY3Rpb24sICJyZXR1
cm47IiBzaG91bGQgYmUgcmVwbGFjZWQgd2l0aA0KPiAiZ290byByZXBvc3Q7Ii4gVGhpcyBpcyBz
aW1pbGFyIHRvICJvdXRfbm9tYXRjaCIgYW5kDQo+ICJvdXRfZHVwbGljYXRlIiwgd2hpY2ggYXJl
IGJvdGggYmVpbmcgcmVwbGFjZWQgaW4gdGhpcyBwYXRjaC4NCj4gDQo+IFNvLCBvdXRfbm9ycXN0
IHNob3VsZCBsb29rIHNvbWV0aGluZyBsaWtlIHRoaXM6DQo+IA0KPiANCj4gb3V0X25vcnFzdDoN
Cj4gICAgICAgICBzcGluX3VubG9jaygmeHBydC0+cmVjdl9sb2NrKTsNCj4gICAgICAgICBkcHJp
bnRrKCJSUEM6ICAgICAgICVzOiBubyBtYXRjaCBmb3IgaW5jb21pbmcgeGlkIDB4JTA4eFxuIiwN
Cj4gICAgICAgICAgICAgICAgIF9fZnVuY19fLCBiZTMyX3RvX2NwdSh4aWQpKTsNCj4gICAgICAg
ICBnb3RvIHJlcG9zdDsNCj4gDQo+IA0KPiBPdGhlcndpc2UsIEkgZG9uJ3Qgc2VlIGFueSBvYnZp
b3VzIHByb2JsZW1zLg0KPiANCg0KUGF0Y2ggdXBkYXRlZC4gUGxlYXNlIHNlZSBodHRwOi8vZ2l0
LmxpbnV4LW5mcy5vcmcvP3A9dHJvbmRteS9saW51eC1uZnMNCi5naXQ7YT1jb21taXQ7aD05NTkw
ZDA4M2MxYmIxNDE5Yjc5OTI2MDlkMWEwYTNlMzUxN2QzODkzDQoNCi0tIA0KVHJvbmQgTXlrbGVi
dXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWts
ZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=
> On Sep 5, 2017, at 6:31 PM, Trond Myklebust <[email protected]> wrote:
>
> On Tue, 2017-09-05 at 17:06 -0400, Chuck Lever wrote:
>>> On Sep 5, 2017, at 4:33 PM, Trond Myklebust <[email protected]
>>> m> wrote:
>>>
>>> On Thu, 2017-08-24 at 14:18 -0400, Anna Schumaker wrote:
>>>>
>>>> On 08/23/2017 05:40 PM, Trond Myklebust wrote:
>>>>> On Wed, 2017-08-23 at 17:05 -0400, Chuck Lever wrote:
>>>>>> Adopt the use of xprt_pin_rqst to eliminate contention
>>>>>> between
>>>>>> Call-side users of rb_lock and the use of rb_lock in
>>>>>> rpcrdma_reply_handler.
>>>>>>
>>>>>> This replaces the mechanism introduced in 431af645cf66
>>>>>> ("xprtrdma:
>>>>>> Fix client lock-up after application signal fires").
>>>>>>
>>>>>> Use recv_lock to quickly find the completing rqst, pin it,
>>>>>> then
>>>>>> drop the lock. At that point invalidation and pull-up of the
>>>>>> Reply
>>>>>> XDR can be done. Both are often expensive operations.
>>>>>>
>>>>>> Finally, take recv_lock again to signal completion to the RPC
>>>>>> layer. It also protects adjustment of "cwnd".
>>>>>>
>>>>>> This greatly reduces the amount of time a lock is held by the
>>>>>> reply handler. Comparing lock_stat results shows a marked
>>>>>> decrease
>>>>>> in contention on rb_lock and recv_lock.
>>>>>>
>>>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>>>> ---
>>>>>> Hi-
>>>>>>
>>>>>> If Trond's lock contention series is going into v4.14, I'd
>>>>>> like
>>>>>> you
>>>>>> to consider this one (applied at the end of that series) as
>>>>>> well.
>>>>>> Without it, NFS/RDMA performance regresses a bit after the
>>>>>> first xprt_pin_rqst patch is applied. Thanks!
>>>>>>
>>>>>
>>>>> If Anna is OK with it, then I can apply this patch once she's
>>>>> sent
>>>>> me
>>>>> the pull request for the other RDMA client patches.
>>>>
>>>> Works for me. Thanks!
>>>
>>> Please can you both check http://git.linux-nfs.org/?p=trondmy/linux
>>> -nfs
>>> .git;a=commit;h=b1da5d90857ee4b8f5acee1143705412b16fce56 and see if
>>> all
>>> is well?
>>>
>>> Do note that I did remove the call to rpcrdma_buffer_put() which
>>> was
>>> being called with an uninitialised argument.
>>
>> Looking at that again. rpcrdma_buffer_put() does look unnecessary,
>> since out_norqst is being called now before "req" is initialized.
>>
>> On further inspection, "return;" should be replaced with
>> "goto repost;". This is similar to "out_nomatch" and
>> "out_duplicate", which are both being replaced in this patch.
>>
>> So, out_norqst should look something like this:
>>
>>
>> out_norqst:
>> spin_unlock(&xprt->recv_lock);
>> dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
>> __func__, be32_to_cpu(xid));
>> goto repost;
>>
>>
>> Otherwise, I don't see any obvious problems.
>>
>
> Patch updated. Please see http://git.linux-nfs.org/?p=trondmy/linux-nfs
> .git;a=commit;h=9590d083c1bb1419b7992609d1a0a3e3517d3893
out_norqst looks correct.
Note that for out_shortreply:
+out_shortreply:
+ dprintk("RPC: %s: short/invalid reply\n", __func__);
+ goto repost;
The "goto repost;" can be dropped, since this code drops
right into "repost:". That's harmless, though.
--
Chuck Lever