The following 3 patches apply on top of the writeback patches sent last
week. They start the process of reducing contention in the RPC client
receive code due to excessive holding of the transport lock.
The first patch is the most important in that it allows the client to
drop the transport lock while copying data from the socket into the
RPC call's reply buffers.
The second ensures that we don't keep hogging the workqueue worker thread
forever, but that we reschedule the receive job if it keeps looping.
Finally, a small cleanup to avoid having to make a copy of the
struct xdr_skb_reader on the stack.
Trond Myklebust (3):
SUNRPC: Don't hold the transport lock across socket copy operations
SUNRPC: Don't loop forever in xs_tcp_data_receive()
SUNRPC: Cleanup xs_tcp_read_common()
include/linux/sunrpc/sched.h | 2 ++
include/linux/sunrpc/xprt.h | 2 ++
net/sunrpc/xprt.c | 42 ++++++++++++++++++++++++++++
net/sunrpc/xprtsock.c | 65 +++++++++++++++++++++++++-------------------
4 files changed, 83 insertions(+), 28 deletions(-)
--
2.13.5
Ensure that we don't hog the workqueue thread by requeuing the job
every 64 loops.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 13 +++++++------
1 file changed, 7 insertions(+), 6 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 222993fbaf99..12f4c9b2857c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1533,6 +1533,7 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
.arg.data = xprt,
};
unsigned long total = 0;
+ int loop;
int read = 0;
mutex_lock(&transport->recv_mutex);
@@ -1541,20 +1542,20 @@ static void xs_tcp_data_receive(struct sock_xprt *transport)
goto out;
/* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (;;) {
+ for (loop = 0; loop < 64; loop++) {
lock_sock(sk);
read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
if (read <= 0) {
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
release_sock(sk);
- if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- break;
- } else {
- release_sock(sk);
- total += read;
+ break;
}
+ release_sock(sk);
+ total += read;
rd_desc.count = 65536;
}
+ if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ queue_work(xprtiod_workqueue, &transport->recv_worker);
out:
mutex_unlock(&transport->recv_mutex);
trace_xs_tcp_data_ready(xprt, read, total);
--
2.13.5
Instead add a mechanism to ensure that the request doesn't disappear
from underneath us while copying from the socket. We do this by
preventing xprt_release() from freeing the XDR buffers until the
flag RPC_TASK_MSG_RECV has been cleared from the request.
Signed-off-by: Trond Myklebust <[email protected]>
---
include/linux/sunrpc/sched.h | 2 ++
include/linux/sunrpc/xprt.h | 2 ++
net/sunrpc/xprt.c | 42 ++++++++++++++++++++++++++++++++++++++++++
net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++-----
4 files changed, 68 insertions(+), 5 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 15bc1cd6ee5c..e972d9e05426 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -141,6 +141,8 @@ struct rpc_task_setup {
#define RPC_TASK_ACTIVE 2
#define RPC_TASK_MSG_XMIT 3
#define RPC_TASK_MSG_XMIT_WAIT 4
+#define RPC_TASK_MSG_RECV 5
+#define RPC_TASK_MSG_RECV_WAIT 6
#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index eab1c749e192..65b9e0224753 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -372,6 +372,8 @@ void xprt_write_space(struct rpc_xprt *xprt);
void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
void xprt_complete_rqst(struct rpc_task *task, int copied);
+void xprt_pin_rqst(struct rpc_rqst *req);
+void xprt_unpin_rqst(struct rpc_rqst *req);
void xprt_release_rqst_cong(struct rpc_task *task);
void xprt_disconnect_done(struct rpc_xprt *xprt);
void xprt_force_disconnect(struct rpc_xprt *xprt);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 788c1b6138c2..62c379865f7c 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -844,6 +844,47 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
}
EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+/**
+ * xprt_pin_rqst - Pin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
+ * so should be holding the xprt transport lock.
+ */
+void xprt_pin_rqst(struct rpc_rqst *req)
+{
+ set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+}
+
+/**
+ * xprt_unpin_rqst - Unpin a request on the transport receive list
+ * @req: Request to pin
+ *
+ * Caller should be holding the xprt transport lock.
+ */
+void xprt_unpin_rqst(struct rpc_rqst *req)
+{
+ struct rpc_task *task = req->rq_task;
+
+ clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
+ if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
+ wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+}
+
+static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
+__must_hold(&req->rq_xprt->transport_lock)
+{
+ struct rpc_task *task = req->rq_task;
+ if (test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
+ spin_unlock_bh(&req->rq_xprt->transport_lock);
+ set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+ wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
+ TASK_UNINTERRUPTIBLE);
+ clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
+ spin_lock_bh(&req->rq_xprt->transport_lock);
+ }
+}
+
static void xprt_update_rtt(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
@@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task *task)
list_del(&req->rq_list);
xprt->last_used = jiffies;
xprt_schedule_autodisconnect(xprt);
+ xprt_wait_on_pinned_rqst(req);
spin_unlock_bh(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 4f154d388748..222993fbaf99 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -973,6 +973,8 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
+ xprt_pin_rqst(rovr);
+ spin_unlock_bh(&xprt->transport_lock);
task = rovr->rq_task;
copied = rovr->rq_private_buf.buflen;
@@ -981,11 +983,14 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
dprintk("RPC: sk_buff copy failed\n");
- goto out_unlock;
+ spin_lock_bh(&xprt->transport_lock);
+ goto out_unpin;
}
+ spin_lock_bh(&xprt->transport_lock);
xprt_complete_rqst(task, copied);
-
+out_unpin:
+ xprt_unpin_rqst(rovr);
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
}
@@ -1054,6 +1059,8 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
+ xprt_pin_rqst(rovr);
+ spin_unlock_bh(&xprt->transport_lock);
task = rovr->rq_task;
if ((copied = rovr->rq_private_buf.buflen) > repsize)
@@ -1062,14 +1069,17 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
- goto out_unlock;
+ spin_lock_bh(&xprt->transport_lock);
+ goto out_unpin;
}
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
+ spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
xprt_complete_rqst(task, copied);
-
+out_unpin:
+ xprt_unpin_rqst(rovr);
out_unlock:
spin_unlock_bh(&xprt->transport_lock);
}
@@ -1351,12 +1361,15 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
spin_unlock_bh(&xprt->transport_lock);
return -1;
}
+ xprt_pin_rqst(req);
+ spin_unlock_bh(&xprt->transport_lock);
xs_tcp_read_common(xprt, desc, req);
+ spin_lock_bh(&xprt->transport_lock);
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_rqst(req->rq_task, transport->tcp_copied);
-
+ xprt_unpin_rqst(req);
spin_unlock_bh(&xprt->transport_lock);
return 0;
}
@@ -1385,12 +1398,16 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
xprt_force_disconnect(xprt);
return -1;
}
+ xprt_pin_rqst(req);
+ spin_unlock_bh(&xprt->transport_lock);
dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
xs_tcp_read_common(xprt, desc, req);
+ spin_lock_bh(&xprt->transport_lock);
if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
xprt_complete_bc_request(req, transport->tcp_copied);
+ xprt_unpin_rqst(req);
spin_unlock_bh(&xprt->transport_lock);
return 0;
--
2.13.5
Simplify the code to avoid a full copy of the struct xdr_skb_reader.
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 25 ++++++++-----------------
1 file changed, 8 insertions(+), 17 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 12f4c9b2857c..27858d5819ad 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1287,25 +1287,12 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
}
len = desc->count;
- if (len > transport->tcp_reclen - transport->tcp_offset) {
- struct xdr_skb_reader my_desc;
-
- len = transport->tcp_reclen - transport->tcp_offset;
- memcpy(&my_desc, desc, sizeof(my_desc));
- my_desc.count = len;
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
- &my_desc, xdr_skb_read_bits);
- desc->count -= r;
- desc->offset += r;
- } else
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+ if (len > transport->tcp_reclen - transport->tcp_offset)
+ desc->count = transport->tcp_reclen - transport->tcp_offset;
+ r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
desc, xdr_skb_read_bits);
- if (r > 0) {
- transport->tcp_copied += r;
- transport->tcp_offset += r;
- }
- if (r != len) {
+ if (desc->count) {
/* Error when copying to the receive buffer,
* usually because we weren't able to allocate
* additional buffer pages. All we can do now
@@ -1325,6 +1312,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
return;
}
+ transport->tcp_copied += r;
+ transport->tcp_offset += r;
+ desc->count = len - r;
+
dprintk("RPC: XID %08x read %zd bytes\n",
ntohl(transport->tcp_xid), r);
dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
--
2.13.5
> On Aug 14, 2017, at 3:16 PM, Trond Myklebust <[email protected]> wrote:
>
> Instead add a mechanism to ensure that the request doesn't disappear
> from underneath us while copying from the socket. We do this by
> preventing xprt_release() from freeing the XDR buffers until the
> flag RPC_TASK_MSG_RECV has been cleared from the request.
>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
> include/linux/sunrpc/sched.h | 2 ++
> include/linux/sunrpc/xprt.h | 2 ++
> net/sunrpc/xprt.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++-----
> 4 files changed, 68 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index 15bc1cd6ee5c..e972d9e05426 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -141,6 +141,8 @@ struct rpc_task_setup {
> #define RPC_TASK_ACTIVE 2
> #define RPC_TASK_MSG_XMIT 3
> #define RPC_TASK_MSG_XMIT_WAIT 4
> +#define RPC_TASK_MSG_RECV 5
> +#define RPC_TASK_MSG_RECV_WAIT 6
>
> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
> #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index eab1c749e192..65b9e0224753 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -372,6 +372,8 @@ void xprt_write_space(struct rpc_xprt *xprt);
> void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
> struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
> void xprt_complete_rqst(struct rpc_task *task, int copied);
> +void xprt_pin_rqst(struct rpc_rqst *req);
> +void xprt_unpin_rqst(struct rpc_rqst *req);
> void xprt_release_rqst_cong(struct rpc_task *task);
> void xprt_disconnect_done(struct rpc_xprt *xprt);
> void xprt_force_disconnect(struct rpc_xprt *xprt);
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 788c1b6138c2..62c379865f7c 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -844,6 +844,47 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
> }
> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>
> +/**
> + * xprt_pin_rqst - Pin a request on the transport receive list
> + * @req: Request to pin
> + *
> + * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
> + * so should be holding the xprt transport lock.
> + */
> +void xprt_pin_rqst(struct rpc_rqst *req)
> +{
> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
> +}
> +
> +/**
> + * xprt_unpin_rqst - Unpin a request on the transport receive list
> + * @req: Request to pin
> + *
> + * Caller should be holding the xprt transport lock.
> + */
> +void xprt_unpin_rqst(struct rpc_rqst *req)
> +{
> + struct rpc_task *task = req->rq_task;
> +
> + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
> + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
> +}
> +
> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
> +__must_hold(&req->rq_xprt->transport_lock)
> +{
> + struct rpc_task *task = req->rq_task;
> + if (test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
> + spin_unlock_bh(&req->rq_xprt->transport_lock);
> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
> + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
> + TASK_UNINTERRUPTIBLE);
> + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
> + spin_lock_bh(&req->rq_xprt->transport_lock);
> + }
> +}
> +
> static void xprt_update_rtt(struct rpc_task *task)
> {
> struct rpc_rqst *req = task->tk_rqstp;
> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task *task)
> list_del(&req->rq_list);
> xprt->last_used = jiffies;
> xprt_schedule_autodisconnect(xprt);
> + xprt_wait_on_pinned_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
Is it OK to call wait_on_bit(TASK_UNINTERRUPTIBLE) while holding
a BH spin lock? This could be prone to deadlock.
> if (req->rq_buffer)
> xprt->ops->buf_free(task);
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 4f154d388748..222993fbaf99 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -973,6 +973,8 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
> rovr = xprt_lookup_rqst(xprt, *xp);
> if (!rovr)
> goto out_unlock;
> + xprt_pin_rqst(rovr);
> + spin_unlock_bh(&xprt->transport_lock);
> task = rovr->rq_task;
>
> copied = rovr->rq_private_buf.buflen;
> @@ -981,11 +983,14 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
>
> if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
> dprintk("RPC: sk_buff copy failed\n");
> - goto out_unlock;
> + spin_lock_bh(&xprt->transport_lock);
> + goto out_unpin;
> }
>
> + spin_lock_bh(&xprt->transport_lock);
> xprt_complete_rqst(task, copied);
> -
> +out_unpin:
> + xprt_unpin_rqst(rovr);
> out_unlock:
> spin_unlock_bh(&xprt->transport_lock);
> }
> @@ -1054,6 +1059,8 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
> rovr = xprt_lookup_rqst(xprt, *xp);
> if (!rovr)
> goto out_unlock;
> + xprt_pin_rqst(rovr);
> + spin_unlock_bh(&xprt->transport_lock);
> task = rovr->rq_task;
>
> if ((copied = rovr->rq_private_buf.buflen) > repsize)
> @@ -1062,14 +1069,17 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
> /* Suck it into the iovec, verify checksum if not done by hw. */
> if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
> __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
> - goto out_unlock;
> + spin_lock_bh(&xprt->transport_lock);
> + goto out_unpin;
> }
>
> __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
>
> + spin_lock_bh(&xprt->transport_lock);
> xprt_adjust_cwnd(xprt, task, copied);
> xprt_complete_rqst(task, copied);
> -
> +out_unpin:
> + xprt_unpin_rqst(rovr);
> out_unlock:
> spin_unlock_bh(&xprt->transport_lock);
> }
> @@ -1351,12 +1361,15 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
> spin_unlock_bh(&xprt->transport_lock);
> return -1;
> }
> + xprt_pin_rqst(req);
> + spin_unlock_bh(&xprt->transport_lock);
>
> xs_tcp_read_common(xprt, desc, req);
>
> + spin_lock_bh(&xprt->transport_lock);
> if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
> xprt_complete_rqst(req->rq_task, transport->tcp_copied);
> -
> + xprt_unpin_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
> return 0;
> }
> @@ -1385,12 +1398,16 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
> xprt_force_disconnect(xprt);
> return -1;
> }
> + xprt_pin_rqst(req);
> + spin_unlock_bh(&xprt->transport_lock);
>
> dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
> xs_tcp_read_common(xprt, desc, req);
>
> + spin_lock_bh(&xprt->transport_lock);
> if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
> xprt_complete_bc_request(req, transport->tcp_copied);
> + xprt_unpin_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
>
> return 0;
> --
> 2.13.5
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
Chuck Lever
T24gTW9uLCAyMDE3LTA4LTE0IGF0IDE1OjI4IC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDM6MTYgUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmQubXlr
bGVidXN0QHByaW1hDQo+ID4gcnlkYXRhLmNvbT4gd3JvdGU6DQo+ID4gDQo+ID4gSW5zdGVhZCBh
ZGQgYSBtZWNoYW5pc20gdG8gZW5zdXJlIHRoYXQgdGhlIHJlcXVlc3QgZG9lc24ndA0KPiA+IGRp
c2FwcGVhcg0KPiA+IGZyb20gdW5kZXJuZWF0aCB1cyB3aGlsZSBjb3B5aW5nIGZyb20gdGhlIHNv
Y2tldC4gV2UgZG8gdGhpcyBieQ0KPiA+IHByZXZlbnRpbmcgeHBydF9yZWxlYXNlKCkgZnJvbSBm
cmVlaW5nIHRoZSBYRFIgYnVmZmVycyB1bnRpbCB0aGUNCj4gPiBmbGFnIFJQQ19UQVNLX01TR19S
RUNWIGhhcyBiZWVuIGNsZWFyZWQgZnJvbSB0aGUgcmVxdWVzdC4NCj4gPiANCj4gPiBTaWduZWQt
b2ZmLWJ5OiBUcm9uZCBNeWtsZWJ1c3QgPHRyb25kLm15a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20+
DQo+ID4gLS0tDQo+ID4gaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaCB8ICAyICsrDQo+ID4g
aW5jbHVkZS9saW51eC9zdW5ycGMveHBydC5oICB8ICAyICsrDQo+ID4gbmV0L3N1bnJwYy94cHJ0
LmMgICAgICAgICAgICB8IDQyDQo+ID4gKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysr
KysrKysrKysrDQo+ID4gbmV0L3N1bnJwYy94cHJ0c29jay5jICAgICAgICB8IDI3ICsrKysrKysr
KysrKysrKysrKysrKystLS0tLQ0KPiA+IDQgZmlsZXMgY2hhbmdlZCwgNjggaW5zZXJ0aW9ucygr
KSwgNSBkZWxldGlvbnMoLSkNCj4gPiANCj4gPiBkaWZmIC0tZ2l0IGEvaW5jbHVkZS9saW51eC9z
dW5ycGMvc2NoZWQuaA0KPiA+IGIvaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaA0KPiA+IGlu
ZGV4IDE1YmMxY2Q2ZWU1Yy4uZTk3MmQ5ZTA1NDI2IDEwMDY0NA0KPiA+IC0tLSBhL2luY2x1ZGUv
bGludXgvc3VucnBjL3NjaGVkLmgNCj4gPiArKysgYi9pbmNsdWRlL2xpbnV4L3N1bnJwYy9zY2hl
ZC5oDQo+ID4gQEAgLTE0MSw2ICsxNDEsOCBAQCBzdHJ1Y3QgcnBjX3Rhc2tfc2V0dXAgew0KPiA+
ICNkZWZpbmUgUlBDX1RBU0tfQUNUSVZFCQkyDQo+ID4gI2RlZmluZSBSUENfVEFTS19NU0dfWE1J
VAkzDQo+ID4gI2RlZmluZSBSUENfVEFTS19NU0dfWE1JVF9XQUlUCTQNCj4gPiArI2RlZmluZSBS
UENfVEFTS19NU0dfUkVDVgk1DQo+ID4gKyNkZWZpbmUgUlBDX1RBU0tfTVNHX1JFQ1ZfV0FJVAk2
DQo+ID4gDQo+ID4gI2RlZmluZSBSUENfSVNfUlVOTklORyh0KQl0ZXN0X2JpdChSUENfVEFTS19S
VU5OSU5HLCAmKHQpLQ0KPiA+ID50a19ydW5zdGF0ZSkNCj4gPiAjZGVmaW5lIHJwY19zZXRfcnVu
bmluZyh0KQlzZXRfYml0KFJQQ19UQVNLX1JVTk5JTkcsICYodCktDQo+ID4gPnRrX3J1bnN0YXRl
KQ0KPiA+IGRpZmYgLS1naXQgYS9pbmNsdWRlL2xpbnV4L3N1bnJwYy94cHJ0LmgNCj4gPiBiL2lu
Y2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaA0KPiA+IGluZGV4IGVhYjFjNzQ5ZTE5Mi4uNjViOWUw
MjI0NzUzIDEwMDY0NA0KPiA+IC0tLSBhL2luY2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaA0KPiA+
ICsrKyBiL2luY2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaA0KPiA+IEBAIC0zNzIsNiArMzcyLDgg
QEAgdm9pZAkJCXhwcnRfd3JpdGVfc3BhY2Uoc3QNCj4gPiBydWN0IHJwY194cHJ0ICp4cHJ0KTsN
Cj4gPiB2b2lkCQkJeHBydF9hZGp1c3RfY3duZChzdHJ1Y3QgcnBjX3hwcnQgKnhwcnQsDQo+ID4g
c3RydWN0IHJwY190YXNrICp0YXNrLCBpbnQgcmVzdWx0KTsNCj4gPiBzdHJ1Y3QgcnBjX3Jxc3Qg
Kgl4cHJ0X2xvb2t1cF9ycXN0KHN0cnVjdCBycGNfeHBydCAqeHBydCwNCj4gPiBfX2JlMzIgeGlk
KTsNCj4gPiB2b2lkCQkJeHBydF9jb21wbGV0ZV9ycXN0KHN0cnVjdCBycGNfdGFzaw0KPiA+ICp0
YXNrLCBpbnQgY29waWVkKTsNCj4gPiArdm9pZAkJCXhwcnRfcGluX3Jxc3Qoc3RydWN0IHJwY19y
cXN0ICpyZXEpOw0KPiA+ICt2b2lkCQkJeHBydF91bnBpbl9ycXN0KHN0cnVjdCBycGNfcnFzdCAq
cmVxKTsNCj4gPiB2b2lkCQkJeHBydF9yZWxlYXNlX3Jxc3RfY29uZyhzdHJ1Y3QgcnBjX3Rhc2sN
Cj4gPiAqdGFzayk7DQo+ID4gdm9pZAkJCXhwcnRfZGlzY29ubmVjdF9kb25lKHN0cnVjdCBycGNf
eHBydA0KPiA+ICp4cHJ0KTsNCj4gPiB2b2lkCQkJeHBydF9mb3JjZV9kaXNjb25uZWN0KHN0cnVj
dCBycGNfeHBydA0KPiA+ICp4cHJ0KTsNCj4gPiBkaWZmIC0tZ2l0IGEvbmV0L3N1bnJwYy94cHJ0
LmMgYi9uZXQvc3VucnBjL3hwcnQuYw0KPiA+IGluZGV4IDc4OGMxYjYxMzhjMi4uNjJjMzc5ODY1
ZjdjIDEwMDY0NA0KPiA+IC0tLSBhL25ldC9zdW5ycGMveHBydC5jDQo+ID4gKysrIGIvbmV0L3N1
bnJwYy94cHJ0LmMNCj4gPiBAQCAtODQ0LDYgKzg0NCw0NyBAQCBzdHJ1Y3QgcnBjX3Jxc3QgKnhw
cnRfbG9va3VwX3Jxc3Qoc3RydWN0DQo+ID4gcnBjX3hwcnQgKnhwcnQsIF9fYmUzMiB4aWQpDQo+
ID4gfQ0KPiA+IEVYUE9SVF9TWU1CT0xfR1BMKHhwcnRfbG9va3VwX3Jxc3QpOw0KPiA+IA0KPiA+
ICsvKioNCj4gPiArICogeHBydF9waW5fcnFzdCAtIFBpbiBhIHJlcXVlc3Qgb24gdGhlIHRyYW5z
cG9ydCByZWNlaXZlIGxpc3QNCj4gPiArICogQHJlcTogUmVxdWVzdCB0byBwaW4NCj4gPiArICoN
Cj4gPiArICogQ2FsbGVyIG11c3QgZW5zdXJlIHRoaXMgaXMgYXRvbWljIHdpdGggdGhlIGNhbGwg
dG8NCj4gPiB4cHJ0X2xvb2t1cF9ycXN0KCkNCj4gPiArICogc28gc2hvdWxkIGJlIGhvbGRpbmcg
dGhlIHhwcnQgdHJhbnNwb3J0IGxvY2suDQo+ID4gKyAqLw0KPiA+ICt2b2lkIHhwcnRfcGluX3Jx
c3Qoc3RydWN0IHJwY19ycXN0ICpyZXEpDQo+ID4gK3sNCj4gPiArCXNldF9iaXQoUlBDX1RBU0tf
TVNHX1JFQ1YsICZyZXEtPnJxX3Rhc2stPnRrX3J1bnN0YXRlKTsNCj4gPiArfQ0KPiA+ICsNCj4g
PiArLyoqDQo+ID4gKyAqIHhwcnRfdW5waW5fcnFzdCAtIFVucGluIGEgcmVxdWVzdCBvbiB0aGUg
dHJhbnNwb3J0IHJlY2VpdmUgbGlzdA0KPiA+ICsgKiBAcmVxOiBSZXF1ZXN0IHRvIHBpbg0KPiA+
ICsgKg0KPiA+ICsgKiBDYWxsZXIgc2hvdWxkIGJlIGhvbGRpbmcgdGhlIHhwcnQgdHJhbnNwb3J0
IGxvY2suDQo+ID4gKyAqLw0KPiA+ICt2b2lkIHhwcnRfdW5waW5fcnFzdChzdHJ1Y3QgcnBjX3Jx
c3QgKnJlcSkNCj4gPiArew0KPiA+ICsJc3RydWN0IHJwY190YXNrICp0YXNrID0gcmVxLT5ycV90
YXNrOw0KPiA+ICsNCj4gPiArCWNsZWFyX2JpdChSUENfVEFTS19NU0dfUkVDViwgJnRhc2stPnRr
X3J1bnN0YXRlKTsNCj4gPiArCWlmICh0ZXN0X2JpdChSUENfVEFTS19NU0dfUkVDVl9XQUlULCAm
dGFzay0+dGtfcnVuc3RhdGUpKQ0KPiA+ICsJCXdha2VfdXBfYml0KCZ0YXNrLT50a19ydW5zdGF0
ZSwNCj4gPiBSUENfVEFTS19NU0dfUkVDVik7DQo+ID4gK30NCj4gPiArDQo+ID4gK3N0YXRpYyB2
b2lkIHhwcnRfd2FpdF9vbl9waW5uZWRfcnFzdChzdHJ1Y3QgcnBjX3Jxc3QgKnJlcSkNCj4gPiAr
X19tdXN0X2hvbGQoJnJlcS0+cnFfeHBydC0+dHJhbnNwb3J0X2xvY2spDQo+ID4gK3sNCj4gPiAr
CXN0cnVjdCBycGNfdGFzayAqdGFzayA9IHJlcS0+cnFfdGFzazsNCj4gPiArCWlmICh0ZXN0X2Jp
dChSUENfVEFTS19NU0dfUkVDViwgJnRhc2stPnRrX3J1bnN0YXRlKSkgew0KPiA+ICsJCXNwaW5f
dW5sb2NrX2JoKCZyZXEtPnJxX3hwcnQtPnRyYW5zcG9ydF9sb2NrKTsNCj4gPiArCQlzZXRfYml0
KFJQQ19UQVNLX01TR19SRUNWX1dBSVQsICZ0YXNrLQ0KPiA+ID50a19ydW5zdGF0ZSk7DQo+ID4g
KwkJd2FpdF9vbl9iaXQoJnRhc2stPnRrX3J1bnN0YXRlLCBSUENfVEFTS19NU0dfUkVDViwNCj4g
PiArCQkJCVRBU0tfVU5JTlRFUlJVUFRJQkxFKTsNCj4gPiArCQljbGVhcl9iaXQoUlBDX1RBU0tf
TVNHX1JFQ1ZfV0FJVCwgJnRhc2stDQo+ID4gPnRrX3J1bnN0YXRlKTsNCj4gPiArCQlzcGluX2xv
Y2tfYmgoJnJlcS0+cnFfeHBydC0+dHJhbnNwb3J0X2xvY2spOw0KPiA+ICsJfQ0KPiA+ICt9DQo+
ID4gKw0KPiA+IHN0YXRpYyB2b2lkIHhwcnRfdXBkYXRlX3J0dChzdHJ1Y3QgcnBjX3Rhc2sgKnRh
c2spDQo+ID4gew0KPiA+IAlzdHJ1Y3QgcnBjX3Jxc3QgKnJlcSA9IHRhc2stPnRrX3Jxc3RwOw0K
PiA+IEBAIC0xMzAxLDYgKzEzNDIsNyBAQCB2b2lkIHhwcnRfcmVsZWFzZShzdHJ1Y3QgcnBjX3Rh
c2sgKnRhc2spDQo+ID4gCQlsaXN0X2RlbCgmcmVxLT5ycV9saXN0KTsNCj4gPiAJeHBydC0+bGFz
dF91c2VkID0gamlmZmllczsNCj4gPiAJeHBydF9zY2hlZHVsZV9hdXRvZGlzY29ubmVjdCh4cHJ0
KTsNCj4gPiArCXhwcnRfd2FpdF9vbl9waW5uZWRfcnFzdChyZXEpOw0KPiA+IAlzcGluX3VubG9j
a19iaCgmeHBydC0+dHJhbnNwb3J0X2xvY2spOw0KPiANCj4gSXMgaXQgT0sgdG8gY2FsbCB3YWl0
X29uX2JpdChUQVNLX1VOSU5URVJSVVBUSUJMRSkgd2hpbGUgaG9sZGluZw0KPiBhIEJIIHNwaW4g
bG9jaz8gVGhpcyBjb3VsZCBiZSBwcm9uZSB0byBkZWFkbG9jay4NCg0KV2UgZHJvcCB0aGUgbG9j
ayBpbnNpZGUgeHBydF93YWl0X29uX3Bpbm5lZF9ycXN0KCkgaWYgd2UgbmVlZCB0byB3YWl0Lg0K
DQpUaGUgcmVhc29uIHdoeSB3ZSB3YW50IHRvIGhvbGQgdGhlIGxvY2sgdGhlcmUgaXMgdG8gYXZv
aWQgYSB1c2UtYWZ0ZXItDQpmcmVlIGluIHhwcnRfdW5waW5fcnFzdCgpLiBXaXRob3V0IHRoZSBs
b2NrLCB0aGVyZSBpcyBhIHJpc2sgdGhhdA0KeHBydF9yZWxlYXNlKCkgY291bGQgY29tcGxldGUs
IGFuZCB0aGUgdGFzayBnZXQgZnJlZWQgYmVmb3JlIHdlIGhhdmUNCmNvbXBsZXRlZCB3YWtlX3Vw
X2JpdCgpLg0KDQoNCg0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5GUyBjbGllbnQgbWFp
bnRhaW5lciwgUHJpbWFyeURhdGENCnRyb25kLm15a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20NCg==
> On Aug 14, 2017, at 4:07 PM, Trond Myklebust <[email protected]> wrote:
>
> On Mon, 2017-08-14 at 15:28 -0400, Chuck Lever wrote:
>>> On Aug 14, 2017, at 3:16 PM, Trond Myklebust <trond.myklebust@prima
>>> rydata.com> wrote:
>>>
>>> Instead add a mechanism to ensure that the request doesn't
>>> disappear
>>> from underneath us while copying from the socket. We do this by
>>> preventing xprt_release() from freeing the XDR buffers until the
>>> flag RPC_TASK_MSG_RECV has been cleared from the request.
>>>
>>> Signed-off-by: Trond Myklebust <[email protected]>
>>> ---
>>> include/linux/sunrpc/sched.h | 2 ++
>>> include/linux/sunrpc/xprt.h | 2 ++
>>> net/sunrpc/xprt.c | 42
>>> ++++++++++++++++++++++++++++++++++++++++++
>>> net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++-----
>>> 4 files changed, 68 insertions(+), 5 deletions(-)
>>>
>>> diff --git a/include/linux/sunrpc/sched.h
>>> b/include/linux/sunrpc/sched.h
>>> index 15bc1cd6ee5c..e972d9e05426 100644
>>> --- a/include/linux/sunrpc/sched.h
>>> +++ b/include/linux/sunrpc/sched.h
>>> @@ -141,6 +141,8 @@ struct rpc_task_setup {
>>> #define RPC_TASK_ACTIVE 2
>>> #define RPC_TASK_MSG_XMIT 3
>>> #define RPC_TASK_MSG_XMIT_WAIT 4
>>> +#define RPC_TASK_MSG_RECV 5
>>> +#define RPC_TASK_MSG_RECV_WAIT 6
>>>
>>> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)-
>>>> tk_runstate)
>>> #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)-
>>>> tk_runstate)
>>> diff --git a/include/linux/sunrpc/xprt.h
>>> b/include/linux/sunrpc/xprt.h
>>> index eab1c749e192..65b9e0224753 100644
>>> --- a/include/linux/sunrpc/xprt.h
>>> +++ b/include/linux/sunrpc/xprt.h
>>> @@ -372,6 +372,8 @@ void xprt_write_space(st
>>> ruct rpc_xprt *xprt);
>>> void xprt_adjust_cwnd(struct rpc_xprt *xprt,
>>> struct rpc_task *task, int result);
>>> struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt,
>>> __be32 xid);
>>> void xprt_complete_rqst(struct rpc_task
>>> *task, int copied);
>>> +void xprt_pin_rqst(struct rpc_rqst *req);
>>> +void xprt_unpin_rqst(struct rpc_rqst *req);
>>> void xprt_release_rqst_cong(struct rpc_task
>>> *task);
>>> void xprt_disconnect_done(struct rpc_xprt
>>> *xprt);
>>> void xprt_force_disconnect(struct rpc_xprt
>>> *xprt);
>>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>>> index 788c1b6138c2..62c379865f7c 100644
>>> --- a/net/sunrpc/xprt.c
>>> +++ b/net/sunrpc/xprt.c
>>> @@ -844,6 +844,47 @@ struct rpc_rqst *xprt_lookup_rqst(struct
>>> rpc_xprt *xprt, __be32 xid)
>>> }
>>> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>>>
>>> +/**
>>> + * xprt_pin_rqst - Pin a request on the transport receive list
>>> + * @req: Request to pin
>>> + *
>>> + * Caller must ensure this is atomic with the call to
>>> xprt_lookup_rqst()
>>> + * so should be holding the xprt transport lock.
>>> + */
>>> +void xprt_pin_rqst(struct rpc_rqst *req)
>>> +{
>>> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
>>> +}
>>> +
>>> +/**
>>> + * xprt_unpin_rqst - Unpin a request on the transport receive list
>>> + * @req: Request to pin
>>> + *
>>> + * Caller should be holding the xprt transport lock.
>>> + */
>>> +void xprt_unpin_rqst(struct rpc_rqst *req)
>>> +{
>>> + struct rpc_task *task = req->rq_task;
>>> +
>>> + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
>>> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
>>> + wake_up_bit(&task->tk_runstate,
>>> RPC_TASK_MSG_RECV);
>>> +}
>>> +
>>> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
>>> +__must_hold(&req->rq_xprt->transport_lock)
>>> +{
>>> + struct rpc_task *task = req->rq_task;
>>> + if (test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
>>> + spin_unlock_bh(&req->rq_xprt->transport_lock);
>>> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>> tk_runstate);
>>> + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
>>> + TASK_UNINTERRUPTIBLE);
>>> + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>> tk_runstate);
>>> + spin_lock_bh(&req->rq_xprt->transport_lock);
>>> + }
>>> +}
>>> +
>>> static void xprt_update_rtt(struct rpc_task *task)
>>> {
>>> struct rpc_rqst *req = task->tk_rqstp;
>>> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task *task)
>>> list_del(&req->rq_list);
>>> xprt->last_used = jiffies;
>>> xprt_schedule_autodisconnect(xprt);
>>> + xprt_wait_on_pinned_rqst(req);
>>> spin_unlock_bh(&xprt->transport_lock);
>>
>> Is it OK to call wait_on_bit(TASK_UNINTERRUPTIBLE) while holding
>> a BH spin lock? This could be prone to deadlock.
>
> We drop the lock inside xprt_wait_on_pinned_rqst() if we need to wait.
>
> The reason why we want to hold the lock there is to avoid a use-after-
> free in xprt_unpin_rqst(). Without the lock, there is a risk that
> xprt_release() could complete, and the task get freed before we have
> completed wake_up_bit().
Agree with the last bit. I had that challenge with the order of
memory invalidation (which waits) and xprt_lookup_rqst/complete_rqst
in rpcrdma_reply_handler.
However my concern is that there are many other users of the
transport_lock. Does it become more tricky not to introduce a
problem by changing any one of the sites that take transport_lock?
Despite the deadlock concern, I don't think it makes sense to call
wait_on_bit while holding a spin lock simply because spin locks are
supposed to be for things that are quick, like updating a data
structure.
wait_on_bit can take microseconds: prepare_to_wait and finish_wait
can both take a irqsave spin_lock, for example. bit_wait is the
action that is done if the bit is not ready, and that calls
schedule(). On a busy system, that kind of context switch can take
dozens of microseconds.
IMO it would be a lot nicer if we had an FSM state available that
could allow another sleep while an RPC task during xprt_release.
--
Chuck Lever
T24gTW9uLCAyMDE3LTA4LTE0IGF0IDE2OjIzIC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDQ6MDcgUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmRteUBw
cmltYXJ5ZGF0YS5jDQo+ID4gb20+IHdyb3RlOg0KPiA+IA0KPiA+IE9uIE1vbiwgMjAxNy0wOC0x
NCBhdCAxNToyOCAtMDQwMCwgQ2h1Y2sgTGV2ZXIgd3JvdGU6DQo+ID4gPiA+IE9uIEF1ZyAxNCwg
MjAxNywgYXQgMzoxNiBQTSwgVHJvbmQgTXlrbGVidXN0IDx0cm9uZC5teWtsZWJ1c3RAcA0KPiA+
ID4gPiByaW1hDQo+ID4gPiA+IHJ5ZGF0YS5jb20+IHdyb3RlOg0KPiA+ID4gPiANCj4gPiA+ID4g
SW5zdGVhZCBhZGQgYSBtZWNoYW5pc20gdG8gZW5zdXJlIHRoYXQgdGhlIHJlcXVlc3QgZG9lc24n
dA0KPiA+ID4gPiBkaXNhcHBlYXINCj4gPiA+ID4gZnJvbSB1bmRlcm5lYXRoIHVzIHdoaWxlIGNv
cHlpbmcgZnJvbSB0aGUgc29ja2V0LiBXZSBkbyB0aGlzIGJ5DQo+ID4gPiA+IHByZXZlbnRpbmcg
eHBydF9yZWxlYXNlKCkgZnJvbSBmcmVlaW5nIHRoZSBYRFIgYnVmZmVycyB1bnRpbA0KPiA+ID4g
PiB0aGUNCj4gPiA+ID4gZmxhZyBSUENfVEFTS19NU0dfUkVDViBoYXMgYmVlbiBjbGVhcmVkIGZy
b20gdGhlIHJlcXVlc3QuDQo+ID4gPiA+IA0KPiA+ID4gPiBTaWduZWQtb2ZmLWJ5OiBUcm9uZCBN
eWtsZWJ1c3QgPHRyb25kLm15a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20NCj4gPiA+ID4gPg0KPiA+
ID4gPiAtLS0NCj4gPiA+ID4gaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaCB8ICAyICsrDQo+
ID4gPiA+IGluY2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaCAgfCAgMiArKw0KPiA+ID4gPiBuZXQv
c3VucnBjL3hwcnQuYyAgICAgICAgICAgIHwgNDINCj4gPiA+ID4gKysrKysrKysrKysrKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrDQo+ID4gPiA+IG5ldC9zdW5ycGMveHBydHNvY2suYyAg
ICAgICAgfCAyNyArKysrKysrKysrKysrKysrKysrKysrLS0tLS0NCj4gPiA+ID4gNCBmaWxlcyBj
aGFuZ2VkLCA2OCBpbnNlcnRpb25zKCspLCA1IGRlbGV0aW9ucygtKQ0KPiA+ID4gPiANCj4gPiA+
ID4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvc3VucnBjL3NjaGVkLmgNCj4gPiA+ID4gYi9p
bmNsdWRlL2xpbnV4L3N1bnJwYy9zY2hlZC5oDQo+ID4gPiA+IGluZGV4IDE1YmMxY2Q2ZWU1Yy4u
ZTk3MmQ5ZTA1NDI2IDEwMDY0NA0KPiA+ID4gPiAtLS0gYS9pbmNsdWRlL2xpbnV4L3N1bnJwYy9z
Y2hlZC5oDQo+ID4gPiA+ICsrKyBiL2luY2x1ZGUvbGludXgvc3VucnBjL3NjaGVkLmgNCj4gPiA+
ID4gQEAgLTE0MSw2ICsxNDEsOCBAQCBzdHJ1Y3QgcnBjX3Rhc2tfc2V0dXAgew0KPiA+ID4gPiAj
ZGVmaW5lIFJQQ19UQVNLX0FDVElWRQkJMg0KPiA+ID4gPiAjZGVmaW5lIFJQQ19UQVNLX01TR19Y
TUlUCTMNCj4gPiA+ID4gI2RlZmluZSBSUENfVEFTS19NU0dfWE1JVF9XQUlUCTQNCj4gPiA+ID4g
KyNkZWZpbmUgUlBDX1RBU0tfTVNHX1JFQ1YJNQ0KPiA+ID4gPiArI2RlZmluZSBSUENfVEFTS19N
U0dfUkVDVl9XQUlUCTYNCj4gPiA+ID4gDQo+ID4gPiA+ICNkZWZpbmUgUlBDX0lTX1JVTk5JTkco
dCkJdGVzdF9iaXQoUlBDX1RBU0tfUlVOTklORywNCj4gPiA+ID4gJih0KS0NCj4gPiA+ID4gPiB0
a19ydW5zdGF0ZSkNCj4gPiA+ID4gDQo+ID4gPiA+ICNkZWZpbmUgcnBjX3NldF9ydW5uaW5nKHQp
CXNldF9iaXQoUlBDX1RBU0tfUlVOTklORywNCj4gPiA+ID4gJih0KS0NCj4gPiA+ID4gPiB0a19y
dW5zdGF0ZSkNCj4gPiA+ID4gDQo+ID4gPiA+IGRpZmYgLS1naXQgYS9pbmNsdWRlL2xpbnV4L3N1
bnJwYy94cHJ0LmgNCj4gPiA+ID4gYi9pbmNsdWRlL2xpbnV4L3N1bnJwYy94cHJ0LmgNCj4gPiA+
ID4gaW5kZXggZWFiMWM3NDllMTkyLi42NWI5ZTAyMjQ3NTMgMTAwNjQ0DQo+ID4gPiA+IC0tLSBh
L2luY2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaA0KPiA+ID4gPiArKysgYi9pbmNsdWRlL2xpbnV4
L3N1bnJwYy94cHJ0LmgNCj4gPiA+ID4gQEAgLTM3Miw2ICszNzIsOCBAQCB2b2lkCQkJeHBydF93
cml0ZV9zcGFjDQo+ID4gPiA+IGUoc3QNCj4gPiA+ID4gcnVjdCBycGNfeHBydCAqeHBydCk7DQo+
ID4gPiA+IHZvaWQJCQl4cHJ0X2FkanVzdF9jd25kKHN0cnVjdCBycGNfeHBydA0KPiA+ID4gPiAq
eHBydCwNCj4gPiA+ID4gc3RydWN0IHJwY190YXNrICp0YXNrLCBpbnQgcmVzdWx0KTsNCj4gPiA+
ID4gc3RydWN0IHJwY19ycXN0ICoJeHBydF9sb29rdXBfcnFzdChzdHJ1Y3QgcnBjX3hwcnQNCj4g
PiA+ID4gKnhwcnQsDQo+ID4gPiA+IF9fYmUzMiB4aWQpOw0KPiA+ID4gPiB2b2lkCQkJeHBydF9j
b21wbGV0ZV9ycXN0KHN0cnVjdCBycGNfdGFzaw0KPiA+ID4gPiAqdGFzaywgaW50IGNvcGllZCk7
DQo+ID4gPiA+ICt2b2lkCQkJeHBydF9waW5fcnFzdChzdHJ1Y3QgcnBjX3Jxc3QNCj4gPiA+ID4g
KnJlcSk7DQo+ID4gPiA+ICt2b2lkCQkJeHBydF91bnBpbl9ycXN0KHN0cnVjdCBycGNfcnFzdA0K
PiA+ID4gPiAqcmVxKTsNCj4gPiA+ID4gdm9pZAkJCXhwcnRfcmVsZWFzZV9ycXN0X2Nvbmcoc3Ry
dWN0DQo+ID4gPiA+IHJwY190YXNrDQo+ID4gPiA+ICp0YXNrKTsNCj4gPiA+ID4gdm9pZAkJCXhw
cnRfZGlzY29ubmVjdF9kb25lKHN0cnVjdA0KPiA+ID4gPiBycGNfeHBydA0KPiA+ID4gPiAqeHBy
dCk7DQo+ID4gPiA+IHZvaWQJCQl4cHJ0X2ZvcmNlX2Rpc2Nvbm5lY3Qoc3RydWN0DQo+ID4gPiA+
IHJwY194cHJ0DQo+ID4gPiA+ICp4cHJ0KTsNCj4gPiA+ID4gZGlmZiAtLWdpdCBhL25ldC9zdW5y
cGMveHBydC5jIGIvbmV0L3N1bnJwYy94cHJ0LmMNCj4gPiA+ID4gaW5kZXggNzg4YzFiNjEzOGMy
Li42MmMzNzk4NjVmN2MgMTAwNjQ0DQo+ID4gPiA+IC0tLSBhL25ldC9zdW5ycGMveHBydC5jDQo+
ID4gPiA+ICsrKyBiL25ldC9zdW5ycGMveHBydC5jDQo+ID4gPiA+IEBAIC04NDQsNiArODQ0LDQ3
IEBAIHN0cnVjdCBycGNfcnFzdCAqeHBydF9sb29rdXBfcnFzdChzdHJ1Y3QNCj4gPiA+ID4gcnBj
X3hwcnQgKnhwcnQsIF9fYmUzMiB4aWQpDQo+ID4gPiA+IH0NCj4gPiA+ID4gRVhQT1JUX1NZTUJP
TF9HUEwoeHBydF9sb29rdXBfcnFzdCk7DQo+ID4gPiA+IA0KPiA+ID4gPiArLyoqDQo+ID4gPiA+
ICsgKiB4cHJ0X3Bpbl9ycXN0IC0gUGluIGEgcmVxdWVzdCBvbiB0aGUgdHJhbnNwb3J0IHJlY2Vp
dmUgbGlzdA0KPiA+ID4gPiArICogQHJlcTogUmVxdWVzdCB0byBwaW4NCj4gPiA+ID4gKyAqDQo+
ID4gPiA+ICsgKiBDYWxsZXIgbXVzdCBlbnN1cmUgdGhpcyBpcyBhdG9taWMgd2l0aCB0aGUgY2Fs
bCB0bw0KPiA+ID4gPiB4cHJ0X2xvb2t1cF9ycXN0KCkNCj4gPiA+ID4gKyAqIHNvIHNob3VsZCBi
ZSBob2xkaW5nIHRoZSB4cHJ0IHRyYW5zcG9ydCBsb2NrLg0KPiA+ID4gPiArICovDQo+ID4gPiA+
ICt2b2lkIHhwcnRfcGluX3Jxc3Qoc3RydWN0IHJwY19ycXN0ICpyZXEpDQo+ID4gPiA+ICt7DQo+
ID4gPiA+ICsJc2V0X2JpdChSUENfVEFTS19NU0dfUkVDViwgJnJlcS0+cnFfdGFzay0NCj4gPiA+
ID4gPnRrX3J1bnN0YXRlKTsNCj4gPiA+ID4gK30NCj4gPiA+ID4gKw0KPiA+ID4gPiArLyoqDQo+
ID4gPiA+ICsgKiB4cHJ0X3VucGluX3Jxc3QgLSBVbnBpbiBhIHJlcXVlc3Qgb24gdGhlIHRyYW5z
cG9ydCByZWNlaXZlDQo+ID4gPiA+IGxpc3QNCj4gPiA+ID4gKyAqIEByZXE6IFJlcXVlc3QgdG8g
cGluDQo+ID4gPiA+ICsgKg0KPiA+ID4gPiArICogQ2FsbGVyIHNob3VsZCBiZSBob2xkaW5nIHRo
ZSB4cHJ0IHRyYW5zcG9ydCBsb2NrLg0KPiA+ID4gPiArICovDQo+ID4gPiA+ICt2b2lkIHhwcnRf
dW5waW5fcnFzdChzdHJ1Y3QgcnBjX3Jxc3QgKnJlcSkNCj4gPiA+ID4gK3sNCj4gPiA+ID4gKwlz
dHJ1Y3QgcnBjX3Rhc2sgKnRhc2sgPSByZXEtPnJxX3Rhc2s7DQo+ID4gPiA+ICsNCj4gPiA+ID4g
KwljbGVhcl9iaXQoUlBDX1RBU0tfTVNHX1JFQ1YsICZ0YXNrLT50a19ydW5zdGF0ZSk7DQo+ID4g
PiA+ICsJaWYgKHRlc3RfYml0KFJQQ19UQVNLX01TR19SRUNWX1dBSVQsICZ0YXNrLQ0KPiA+ID4g
PiA+dGtfcnVuc3RhdGUpKQ0KPiA+ID4gPiArCQl3YWtlX3VwX2JpdCgmdGFzay0+dGtfcnVuc3Rh
dGUsDQo+ID4gPiA+IFJQQ19UQVNLX01TR19SRUNWKTsNCj4gPiA+ID4gK30NCj4gPiA+ID4gKw0K
PiA+ID4gPiArc3RhdGljIHZvaWQgeHBydF93YWl0X29uX3Bpbm5lZF9ycXN0KHN0cnVjdCBycGNf
cnFzdCAqcmVxKQ0KPiA+ID4gPiArX19tdXN0X2hvbGQoJnJlcS0+cnFfeHBydC0+dHJhbnNwb3J0
X2xvY2spDQo+ID4gPiA+ICt7DQo+ID4gPiA+ICsJc3RydWN0IHJwY190YXNrICp0YXNrID0gcmVx
LT5ycV90YXNrOw0KPiA+ID4gPiArCWlmICh0ZXN0X2JpdChSUENfVEFTS19NU0dfUkVDViwgJnRh
c2stPnRrX3J1bnN0YXRlKSkgew0KPiA+ID4gPiArCQlzcGluX3VubG9ja19iaCgmcmVxLT5ycV94
cHJ0LT50cmFuc3BvcnRfbG9jayk7DQo+ID4gPiA+ICsJCXNldF9iaXQoUlBDX1RBU0tfTVNHX1JF
Q1ZfV0FJVCwgJnRhc2stDQo+ID4gPiA+ID4gdGtfcnVuc3RhdGUpOw0KPiA+ID4gPiANCj4gPiA+
ID4gKwkJd2FpdF9vbl9iaXQoJnRhc2stPnRrX3J1bnN0YXRlLA0KPiA+ID4gPiBSUENfVEFTS19N
U0dfUkVDViwNCj4gPiA+ID4gKwkJCQlUQVNLX1VOSU5URVJSVVBUSUJMRSk7DQo+ID4gPiA+ICsJ
CWNsZWFyX2JpdChSUENfVEFTS19NU0dfUkVDVl9XQUlULCAmdGFzay0NCj4gPiA+ID4gPiB0a19y
dW5zdGF0ZSk7DQo+ID4gPiA+IA0KPiA+ID4gPiArCQlzcGluX2xvY2tfYmgoJnJlcS0+cnFfeHBy
dC0+dHJhbnNwb3J0X2xvY2spOw0KPiA+ID4gPiArCX0NCj4gPiA+ID4gK30NCj4gPiA+ID4gKw0K
PiA+ID4gPiBzdGF0aWMgdm9pZCB4cHJ0X3VwZGF0ZV9ydHQoc3RydWN0IHJwY190YXNrICp0YXNr
KQ0KPiA+ID4gPiB7DQo+ID4gPiA+IAlzdHJ1Y3QgcnBjX3Jxc3QgKnJlcSA9IHRhc2stPnRrX3Jx
c3RwOw0KPiA+ID4gPiBAQCAtMTMwMSw2ICsxMzQyLDcgQEAgdm9pZCB4cHJ0X3JlbGVhc2Uoc3Ry
dWN0IHJwY190YXNrICp0YXNrKQ0KPiA+ID4gPiAJCWxpc3RfZGVsKCZyZXEtPnJxX2xpc3QpOw0K
PiA+ID4gPiAJeHBydC0+bGFzdF91c2VkID0gamlmZmllczsNCj4gPiA+ID4gCXhwcnRfc2NoZWR1
bGVfYXV0b2Rpc2Nvbm5lY3QoeHBydCk7DQo+ID4gPiA+ICsJeHBydF93YWl0X29uX3Bpbm5lZF9y
cXN0KHJlcSk7DQo+ID4gPiA+IAlzcGluX3VubG9ja19iaCgmeHBydC0+dHJhbnNwb3J0X2xvY2sp
Ow0KPiA+ID4gDQo+ID4gPiBJcyBpdCBPSyB0byBjYWxsIHdhaXRfb25fYml0KFRBU0tfVU5JTlRF
UlJVUFRJQkxFKSB3aGlsZSBob2xkaW5nDQo+ID4gPiBhIEJIIHNwaW4gbG9jaz8gVGhpcyBjb3Vs
ZCBiZSBwcm9uZSB0byBkZWFkbG9jay4NCj4gPiANCj4gPiBXZSBkcm9wIHRoZSBsb2NrIGluc2lk
ZSB4cHJ0X3dhaXRfb25fcGlubmVkX3Jxc3QoKSBpZiB3ZSBuZWVkIHRvDQo+ID4gd2FpdC4NCj4g
PiANCj4gPiBUaGUgcmVhc29uIHdoeSB3ZSB3YW50IHRvIGhvbGQgdGhlIGxvY2sgdGhlcmUgaXMg
dG8gYXZvaWQgYSB1c2UtDQo+ID4gYWZ0ZXItDQo+ID4gZnJlZSBpbiB4cHJ0X3VucGluX3Jxc3Qo
KS4gV2l0aG91dCB0aGUgbG9jaywgdGhlcmUgaXMgYSByaXNrIHRoYXQNCj4gPiB4cHJ0X3JlbGVh
c2UoKSBjb3VsZCBjb21wbGV0ZSwgYW5kIHRoZSB0YXNrIGdldCBmcmVlZCBiZWZvcmUgd2UNCj4g
PiBoYXZlDQo+ID4gY29tcGxldGVkIHdha2VfdXBfYml0KCkuDQo+IA0KPiBBZ3JlZSB3aXRoIHRo
ZSBsYXN0IGJpdC4gSSBoYWQgdGhhdCBjaGFsbGVuZ2Ugd2l0aCB0aGUgb3JkZXIgb2YNCj4gbWVt
b3J5IGludmFsaWRhdGlvbiAod2hpY2ggd2FpdHMpIGFuZCB4cHJ0X2xvb2t1cF9ycXN0L2NvbXBs
ZXRlX3Jxc3QNCj4gaW4gcnBjcmRtYV9yZXBseV9oYW5kbGVyLg0KPiANCj4gSG93ZXZlciBteSBj
b25jZXJuIGlzIHRoYXQgdGhlcmUgYXJlIG1hbnkgb3RoZXIgdXNlcnMgb2YgdGhlDQo+IHRyYW5z
cG9ydF9sb2NrLiBEb2VzIGl0IGJlY29tZSBtb3JlIHRyaWNreSBub3QgdG8gaW50cm9kdWNlIGEN
Cj4gcHJvYmxlbSBieSBjaGFuZ2luZyBhbnkgb25lIG9mIHRoZSBzaXRlcyB0aGF0IHRha2UgdHJh
bnNwb3J0X2xvY2s/DQoNCldoYXQgaXMgeW91ciBjb25jZXJuPyBUaGUgbWFpbiByZWFzb24gZm9y
IHRha2luZyB0aGUgdHJhbnNwb3J0IGxvY2sgaW4NCnRoZSBjdXJyZW50IGNvZGUgaXMgdHdvZm9s
ZDoNCg0KMSkgUGVyZm9ybSB0aGUgbG9va3VwIG9mIHRoZSByZXF1ZXN0IG9uIHRoZSByZWNlaXZl
IGxpc3QNCjIpIEVuc3VyZSB0aGUgcmVxdWVzdCBkb2Vzbid0IGRpc2FwcGVhciB3aGlsZSB3ZSdy
ZSB3cml0aW5nIHRvIGl0Lg0KDQo+IERlc3BpdGUgdGhlIGRlYWRsb2NrIGNvbmNlcm4sIEkgZG9u
J3QgdGhpbmsgaXQgbWFrZXMgc2Vuc2UgdG8gY2FsbA0KPiB3YWl0X29uX2JpdCB3aGlsZSBob2xk
aW5nIGEgc3BpbiBsb2NrIHNpbXBseSBiZWNhdXNlIHNwaW4gbG9ja3MgYXJlDQo+IHN1cHBvc2Vk
IHRvIGJlIGZvciB0aGluZ3MgdGhhdCBhcmUgcXVpY2ssIGxpa2UgdXBkYXRpbmcgYSBkYXRhDQo+
IHN0cnVjdHVyZS4NCj4gDQo+IHdhaXRfb25fYml0IGNhbiB0YWtlIG1pY3Jvc2Vjb25kczogcHJl
cGFyZV90b193YWl0IGFuZCBmaW5pc2hfd2FpdA0KPiBjYW4gYm90aCB0YWtlIGEgaXJxc2F2ZSBz
cGluX2xvY2ssIGZvciBleGFtcGxlLiBiaXRfd2FpdCBpcyB0aGUNCj4gYWN0aW9uIHRoYXQgaXMg
ZG9uZSBpZiB0aGUgYml0IGlzIG5vdCByZWFkeSwgYW5kIHRoYXQgY2FsbHMNCj4gc2NoZWR1bGUo
KS4gT24gYSBidXN5IHN5c3RlbSwgdGhhdCBraW5kIG9mIGNvbnRleHQgc3dpdGNoIGNhbiB0YWtl
DQo+IGRvemVucyBvZiBtaWNyb3NlY29uZHMuDQoNCkFncmVlZCwgYnV0IHdlIHNob3VsZCBvbmx5
IHZlcnkgcmFyZWx5IGJlIGhpdHRpbmcgdGhlIHdhaXQgY2FzZS4gSW4gYWxsDQpzdWNjZXNzZnVs
IFJQQyBjYWxscywgdGhlIHJlY2VpdmUgd2lsbCBoYXZlIGhhcHBlbmVkIGxvbmcgYmVmb3JlIHdl
DQpjYWxsIHhwcnRfcmVsZWFzZSgpLiBJbiBmYWN0LCB0aGUgb25seSAyIGNhc2VzIHdoZXJlIHRo
aXMgY2FuIGhhcHBlbg0Kd291bGQgYmUgd2hlbiB0aGUgUlBDIGNhbGwgaXRzZWxmIGlzIGludGVy
cnVwdGVkLiBFaXRoZXI6DQoNCjEpIFRoZSB0YXNrIGlzIHN5bmNocm9ub3VzIGFuZCB3YXMgaW50
ZXJydXB0ZWQgYnkgdGhlIHVzZXIuDQoyKSBUaGUgdGFzayBpcyB0ZXJtaW5hdGluZyBlYXJseSBk
dWUgdG8gYSBzb2Z0IHRpbWVvdXQuDQoNCkluIGJvdGggdGhvc2UgY2FzZXMsIHdlJ3JlIGluIGEg
bm9uLXBlcmZvcm1hbmNlIHBhdGguIEZ1cnRoZXJtb3JlLCB0aGV5DQogd291bGQgY3VycmVudGx5
IGVuZCB1cCBzcGlubmluZyBhZ2FpbnN0IHRoZSB0cmFuc3BvcnQgbG9jay4NCg0KPiBJTU8gaXQg
d291bGQgYmUgYSBsb3QgbmljZXIgaWYgd2UgaGFkIGFuIEZTTSBzdGF0ZSBhdmFpbGFibGUgdGhh
dA0KPiBjb3VsZCBhbGxvdyBhbm90aGVyIHNsZWVwIHdoaWxlIGFuIFJQQyB0YXNrIGR1cmluZyB4
cHJ0X3JlbGVhc2UuDQoNCldoeSB3b3VsZCB0aGF0IGJlIHByZWZlcmFibGUsIGlmIHRoZSBhcmd1
bWVudCBob2xkcyB0aGF0IHRoaXMgb25seQ0KaGFwcGVucyBpbiB0aGVzZSAyIHJhcmUgZXJyb3Ig
Y2FzZXM/DQoNCi0tIA0KVHJvbmQgTXlrbGVidXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWlu
ZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWtsZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=
> On Aug 14, 2017, at 4:38 PM, Trond Myklebust <[email protected]> wrote:
>
> On Mon, 2017-08-14 at 16:23 -0400, Chuck Lever wrote:
>>> On Aug 14, 2017, at 4:07 PM, Trond Myklebust <[email protected]
>>> om> wrote:
>>>
>>> On Mon, 2017-08-14 at 15:28 -0400, Chuck Lever wrote:
>>>>> On Aug 14, 2017, at 3:16 PM, Trond Myklebust <trond.myklebust@p
>>>>> rima
>>>>> rydata.com> wrote:
>>>>>
>>>>> Instead add a mechanism to ensure that the request doesn't
>>>>> disappear
>>>>> from underneath us while copying from the socket. We do this by
>>>>> preventing xprt_release() from freeing the XDR buffers until
>>>>> the
>>>>> flag RPC_TASK_MSG_RECV has been cleared from the request.
>>>>>
>>>>> Signed-off-by: Trond Myklebust <[email protected]
>>>>>>
>>>>> ---
>>>>> include/linux/sunrpc/sched.h | 2 ++
>>>>> include/linux/sunrpc/xprt.h | 2 ++
>>>>> net/sunrpc/xprt.c | 42
>>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>> net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++-----
>>>>> 4 files changed, 68 insertions(+), 5 deletions(-)
>>>>>
>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>> b/include/linux/sunrpc/sched.h
>>>>> index 15bc1cd6ee5c..e972d9e05426 100644
>>>>> --- a/include/linux/sunrpc/sched.h
>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>> @@ -141,6 +141,8 @@ struct rpc_task_setup {
>>>>> #define RPC_TASK_ACTIVE 2
>>>>> #define RPC_TASK_MSG_XMIT 3
>>>>> #define RPC_TASK_MSG_XMIT_WAIT 4
>>>>> +#define RPC_TASK_MSG_RECV 5
>>>>> +#define RPC_TASK_MSG_RECV_WAIT 6
>>>>>
>>>>> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING,
>>>>> &(t)-
>>>>>> tk_runstate)
>>>>>
>>>>> #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING,
>>>>> &(t)-
>>>>>> tk_runstate)
>>>>>
>>>>> diff --git a/include/linux/sunrpc/xprt.h
>>>>> b/include/linux/sunrpc/xprt.h
>>>>> index eab1c749e192..65b9e0224753 100644
>>>>> --- a/include/linux/sunrpc/xprt.h
>>>>> +++ b/include/linux/sunrpc/xprt.h
>>>>> @@ -372,6 +372,8 @@ void xprt_write_spac
>>>>> e(st
>>>>> ruct rpc_xprt *xprt);
>>>>> void xprt_adjust_cwnd(struct rpc_xprt
>>>>> *xprt,
>>>>> struct rpc_task *task, int result);
>>>>> struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt
>>>>> *xprt,
>>>>> __be32 xid);
>>>>> void xprt_complete_rqst(struct rpc_task
>>>>> *task, int copied);
>>>>> +void xprt_pin_rqst(struct rpc_rqst
>>>>> *req);
>>>>> +void xprt_unpin_rqst(struct rpc_rqst
>>>>> *req);
>>>>> void xprt_release_rqst_cong(struct
>>>>> rpc_task
>>>>> *task);
>>>>> void xprt_disconnect_done(struct
>>>>> rpc_xprt
>>>>> *xprt);
>>>>> void xprt_force_disconnect(struct
>>>>> rpc_xprt
>>>>> *xprt);
>>>>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>>>>> index 788c1b6138c2..62c379865f7c 100644
>>>>> --- a/net/sunrpc/xprt.c
>>>>> +++ b/net/sunrpc/xprt.c
>>>>> @@ -844,6 +844,47 @@ struct rpc_rqst *xprt_lookup_rqst(struct
>>>>> rpc_xprt *xprt, __be32 xid)
>>>>> }
>>>>> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>>>>>
>>>>> +/**
>>>>> + * xprt_pin_rqst - Pin a request on the transport receive list
>>>>> + * @req: Request to pin
>>>>> + *
>>>>> + * Caller must ensure this is atomic with the call to
>>>>> xprt_lookup_rqst()
>>>>> + * so should be holding the xprt transport lock.
>>>>> + */
>>>>> +void xprt_pin_rqst(struct rpc_rqst *req)
>>>>> +{
>>>>> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task-
>>>>>> tk_runstate);
>>>>> +}
>>>>> +
>>>>> +/**
>>>>> + * xprt_unpin_rqst - Unpin a request on the transport receive
>>>>> list
>>>>> + * @req: Request to pin
>>>>> + *
>>>>> + * Caller should be holding the xprt transport lock.
>>>>> + */
>>>>> +void xprt_unpin_rqst(struct rpc_rqst *req)
>>>>> +{
>>>>> + struct rpc_task *task = req->rq_task;
>>>>> +
>>>>> + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
>>>>> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>> tk_runstate))
>>>>> + wake_up_bit(&task->tk_runstate,
>>>>> RPC_TASK_MSG_RECV);
>>>>> +}
>>>>> +
>>>>> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
>>>>> +__must_hold(&req->rq_xprt->transport_lock)
>>>>> +{
>>>>> + struct rpc_task *task = req->rq_task;
>>>>> + if (test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
>>>>> + spin_unlock_bh(&req->rq_xprt->transport_lock);
>>>>> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>> tk_runstate);
>>>>>
>>>>> + wait_on_bit(&task->tk_runstate,
>>>>> RPC_TASK_MSG_RECV,
>>>>> + TASK_UNINTERRUPTIBLE);
>>>>> + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>> tk_runstate);
>>>>>
>>>>> + spin_lock_bh(&req->rq_xprt->transport_lock);
>>>>> + }
>>>>> +}
>>>>> +
>>>>> static void xprt_update_rtt(struct rpc_task *task)
>>>>> {
>>>>> struct rpc_rqst *req = task->tk_rqstp;
>>>>> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task *task)
>>>>> list_del(&req->rq_list);
>>>>> xprt->last_used = jiffies;
>>>>> xprt_schedule_autodisconnect(xprt);
>>>>> + xprt_wait_on_pinned_rqst(req);
>>>>> spin_unlock_bh(&xprt->transport_lock);
>>>>
>>>> Is it OK to call wait_on_bit(TASK_UNINTERRUPTIBLE) while holding
>>>> a BH spin lock? This could be prone to deadlock.
>>>
>>> We drop the lock inside xprt_wait_on_pinned_rqst() if we need to
>>> wait.
>>>
>>> The reason why we want to hold the lock there is to avoid a use-
>>> after-
>>> free in xprt_unpin_rqst(). Without the lock, there is a risk that
>>> xprt_release() could complete, and the task get freed before we
>>> have
>>> completed wake_up_bit().
>>
>> Agree with the last bit. I had that challenge with the order of
>> memory invalidation (which waits) and xprt_lookup_rqst/complete_rqst
>> in rpcrdma_reply_handler.
>>
>> However my concern is that there are many other users of the
>> transport_lock. Does it become more tricky not to introduce a
>> problem by changing any one of the sites that take transport_lock?
>
> What is your concern? The main reason for taking the transport lock in
> the current code is twofold:
>
> 1) Perform the lookup of the request on the receive list
> 2) Ensure the request doesn't disappear while we're writing to it.
Yes, I think we see the same problem space.
I missed, though, that xprt_wait_on_pinned_rqst always releases
transport_lock before invoking wait_on_bit.
>> Despite the deadlock concern, I don't think it makes sense to call
>> wait_on_bit while holding a spin lock simply because spin locks are
>> supposed to be for things that are quick, like updating a data
>> structure.
>>
>> wait_on_bit can take microseconds: prepare_to_wait and finish_wait
>> can both take a irqsave spin_lock, for example. bit_wait is the
>> action that is done if the bit is not ready, and that calls
>> schedule(). On a busy system, that kind of context switch can take
>> dozens of microseconds.
>
> Agreed, but we should only very rarely be hitting the wait case. In all
> successful RPC calls, the receive will have happened long before we
> call xprt_release(). In fact, the only 2 cases where this can happen
> would be when the RPC call itself is interrupted. Either:
>
> 1) The task is synchronous and was interrupted by the user.
> 2) The task is terminating early due to a soft timeout.
>
> In both those cases, we're in a non-performance path. Furthermore, they
> would currently end up spinning against the transport lock.
OK, no argument that there is any performance concern.
I can hit this case 2 or 3 times out of 5 with generic/029 on
NFSv4/RDMA. These days, the worse that happens is the client drops
the RDMA connection because I've spent a very long time immersed
in these code paths, trying to make the two cases you list above
work 100% without deadlock or crash. See commits 04d25b7d5d1b
through 431af645cf66, and commit 68791649a725.
It makes me uncertain that waiting for anything in xprt_release
is safe, even if the transport_lock is not being held. xprtrdma
used to perform synchronous memory invalidation in ->buf_free.
It doesn't any more for this reason.
>> IMO it would be a lot nicer if we had an FSM state available that
>> could allow another sleep while an RPC task during xprt_release.
>
> Why would that be preferable, if the argument holds that this only
> happens in these 2 rare error cases?
Because FSM steps are the way this code manages sleeps. That
makes it easier to understand and less brittle. I don't think
it would be more overhead than test_bit().
--
Chuck Lever
T24gTW9uLCAyMDE3LTA4LTE0IGF0IDE4OjI1IC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDQ6MzggUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmRteUBw
cmltYXJ5ZGF0YS5jDQo+ID4gb20+IHdyb3RlOg0KPiA+IA0KPiA+IE9uIE1vbiwgMjAxNy0wOC0x
NCBhdCAxNjoyMyAtMDQwMCwgQ2h1Y2sgTGV2ZXIgd3JvdGU6DQo+ID4gPiA+IE9uIEF1ZyAxNCwg
MjAxNywgYXQgNDowNyBQTSwgVHJvbmQgTXlrbGVidXN0IDx0cm9uZG15QHByaW1hcnlkYQ0KPiA+
ID4gPiB0YS5jDQo+ID4gPiA+IG9tPiB3cm90ZToNCj4gPiA+ID4gDQo+ID4gPiA+IE9uIE1vbiwg
MjAxNy0wOC0xNCBhdCAxNToyOCAtMDQwMCwgQ2h1Y2sgTGV2ZXIgd3JvdGU6DQo+ID4gPiA+ID4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDM6MTYgUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmQubXlr
bGVidQ0KPiA+ID4gPiA+ID4gc3RAcA0KPiA+ID4gPiA+ID4gcmltYQ0KPiA+ID4gPiA+ID4gcnlk
YXRhLmNvbT4gd3JvdGU6DQo+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+IEluc3RlYWQgYWRkIGEg
bWVjaGFuaXNtIHRvIGVuc3VyZSB0aGF0IHRoZSByZXF1ZXN0IGRvZXNuJ3QNCj4gPiA+ID4gPiA+
IGRpc2FwcGVhcg0KPiA+ID4gPiA+ID4gZnJvbSB1bmRlcm5lYXRoIHVzIHdoaWxlIGNvcHlpbmcg
ZnJvbSB0aGUgc29ja2V0LiBXZSBkbw0KPiA+ID4gPiA+ID4gdGhpcyBieQ0KPiA+ID4gPiA+ID4g
cHJldmVudGluZyB4cHJ0X3JlbGVhc2UoKSBmcm9tIGZyZWVpbmcgdGhlIFhEUiBidWZmZXJzDQo+
ID4gPiA+ID4gPiB1bnRpbA0KPiA+ID4gPiA+ID4gdGhlDQo+ID4gPiA+ID4gPiBmbGFnIFJQQ19U
QVNLX01TR19SRUNWIGhhcyBiZWVuIGNsZWFyZWQgZnJvbSB0aGUgcmVxdWVzdC4NCj4gPiA+ID4g
PiA+IA0KPiA+ID4gPiA+ID4gU2lnbmVkLW9mZi1ieTogVHJvbmQgTXlrbGVidXN0IDx0cm9uZC5t
eWtsZWJ1c3RAcHJpbWFyeWRhdGENCj4gPiA+ID4gPiA+IC5jb20NCj4gPiA+ID4gPiA+ID4gDQo+
ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+IC0tLQ0KPiA+ID4gPiA+ID4gaW5jbHVkZS9saW51eC9z
dW5ycGMvc2NoZWQuaCB8ICAyICsrDQo+ID4gPiA+ID4gPiBpbmNsdWRlL2xpbnV4L3N1bnJwYy94
cHJ0LmggIHwgIDIgKysNCj4gPiA+ID4gPiA+IG5ldC9zdW5ycGMveHBydC5jICAgICAgICAgICAg
fCA0Mg0KPiA+ID4gPiA+ID4gKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysr
KysrDQo+ID4gPiA+ID4gPiBuZXQvc3VucnBjL3hwcnRzb2NrLmMgICAgICAgIHwgMjcgKysrKysr
KysrKysrKysrKysrKysrKy0tLQ0KPiA+ID4gPiA+ID4gLS0NCj4gPiA+ID4gPiA+IDQgZmlsZXMg
Y2hhbmdlZCwgNjggaW5zZXJ0aW9ucygrKSwgNSBkZWxldGlvbnMoLSkNCj4gPiA+ID4gPiA+IA0K
PiA+ID4gPiA+ID4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvc3VucnBjL3NjaGVkLmgNCj4g
PiA+ID4gPiA+IGIvaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaA0KPiA+ID4gPiA+ID4gaW5k
ZXggMTViYzFjZDZlZTVjLi5lOTcyZDllMDU0MjYgMTAwNjQ0DQo+ID4gPiA+ID4gPiAtLS0gYS9p
bmNsdWRlL2xpbnV4L3N1bnJwYy9zY2hlZC5oDQo+ID4gPiA+ID4gPiArKysgYi9pbmNsdWRlL2xp
bnV4L3N1bnJwYy9zY2hlZC5oDQo+ID4gPiA+ID4gPiBAQCAtMTQxLDYgKzE0MSw4IEBAIHN0cnVj
dCBycGNfdGFza19zZXR1cCB7DQo+ID4gPiA+ID4gPiAjZGVmaW5lIFJQQ19UQVNLX0FDVElWRQkJ
Mg0KPiA+ID4gPiA+ID4gI2RlZmluZSBSUENfVEFTS19NU0dfWE1JVAkzDQo+ID4gPiA+ID4gPiAj
ZGVmaW5lIFJQQ19UQVNLX01TR19YTUlUX1dBSVQJNA0KPiA+ID4gPiA+ID4gKyNkZWZpbmUgUlBD
X1RBU0tfTVNHX1JFQ1YJNQ0KPiA+ID4gPiA+ID4gKyNkZWZpbmUgUlBDX1RBU0tfTVNHX1JFQ1Zf
V0FJVAk2DQo+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+ICNkZWZpbmUgUlBDX0lTX1JVTk5JTkco
dCkJdGVzdF9iaXQoUlBDX1RBU0tfUlVOTklORywNCj4gPiA+ID4gPiA+ICYodCktDQo+ID4gPiA+
ID4gPiA+IHRrX3J1bnN0YXRlKQ0KPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiAjZGVmaW5lIHJw
Y19zZXRfcnVubmluZyh0KQlzZXRfYml0KFJQQ19UQVNLX1JVTk5JTkcsDQo+ID4gPiA+ID4gPiAm
KHQpLQ0KPiA+ID4gPiA+ID4gPiB0a19ydW5zdGF0ZSkNCj4gPiA+ID4gPiA+IA0KPiA+ID4gPiA+
ID4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvc3VucnBjL3hwcnQuaA0KPiA+ID4gPiA+ID4g
Yi9pbmNsdWRlL2xpbnV4L3N1bnJwYy94cHJ0LmgNCj4gPiA+ID4gPiA+IGluZGV4IGVhYjFjNzQ5
ZTE5Mi4uNjViOWUwMjI0NzUzIDEwMDY0NA0KPiA+ID4gPiA+ID4gLS0tIGEvaW5jbHVkZS9saW51
eC9zdW5ycGMveHBydC5oDQo+ID4gPiA+ID4gPiArKysgYi9pbmNsdWRlL2xpbnV4L3N1bnJwYy94
cHJ0LmgNCj4gPiA+ID4gPiA+IEBAIC0zNzIsNiArMzcyLDggQEAgdm9pZAkJCXhwcnRfd3JpdGVf
DQo+ID4gPiA+ID4gPiBzcGFjDQo+ID4gPiA+ID4gPiBlKHN0DQo+ID4gPiA+ID4gPiBydWN0IHJw
Y194cHJ0ICp4cHJ0KTsNCj4gPiA+ID4gPiA+IHZvaWQJCQl4cHJ0X2FkanVzdF9jd25kKHN0cnVj
dA0KPiA+ID4gPiA+ID4gcnBjX3hwcnQNCj4gPiA+ID4gPiA+ICp4cHJ0LA0KPiA+ID4gPiA+ID4g
c3RydWN0IHJwY190YXNrICp0YXNrLCBpbnQgcmVzdWx0KTsNCj4gPiA+ID4gPiA+IHN0cnVjdCBy
cGNfcnFzdCAqCXhwcnRfbG9va3VwX3Jxc3Qoc3RydWN0IHJwY194cHJ0DQo+ID4gPiA+ID4gPiAq
eHBydCwNCj4gPiA+ID4gPiA+IF9fYmUzMiB4aWQpOw0KPiA+ID4gPiA+ID4gdm9pZAkJCXhwcnRf
Y29tcGxldGVfcnFzdChzdHJ1Y3QNCj4gPiA+ID4gPiA+IHJwY190YXNrDQo+ID4gPiA+ID4gPiAq
dGFzaywgaW50IGNvcGllZCk7DQo+ID4gPiA+ID4gPiArdm9pZAkJCXhwcnRfcGluX3Jxc3Qoc3Ry
dWN0IHJwY19ycXN0DQo+ID4gPiA+ID4gPiAqcmVxKTsNCj4gPiA+ID4gPiA+ICt2b2lkCQkJeHBy
dF91bnBpbl9ycXN0KHN0cnVjdA0KPiA+ID4gPiA+ID4gcnBjX3Jxc3QNCj4gPiA+ID4gPiA+ICpy
ZXEpOw0KPiA+ID4gPiA+ID4gdm9pZAkJCXhwcnRfcmVsZWFzZV9ycXN0X2Nvbmcoc3RydWN0DQo+
ID4gPiA+ID4gPiBycGNfdGFzaw0KPiA+ID4gPiA+ID4gKnRhc2spOw0KPiA+ID4gPiA+ID4gdm9p
ZAkJCXhwcnRfZGlzY29ubmVjdF9kb25lKHN0cnVjdA0KPiA+ID4gPiA+ID4gcnBjX3hwcnQNCj4g
PiA+ID4gPiA+ICp4cHJ0KTsNCj4gPiA+ID4gPiA+IHZvaWQJCQl4cHJ0X2ZvcmNlX2Rpc2Nvbm5l
Y3Qoc3RydWN0DQo+ID4gPiA+ID4gPiBycGNfeHBydA0KPiA+ID4gPiA+ID4gKnhwcnQpOw0KPiA+
ID4gPiA+ID4gZGlmZiAtLWdpdCBhL25ldC9zdW5ycGMveHBydC5jIGIvbmV0L3N1bnJwYy94cHJ0
LmMNCj4gPiA+ID4gPiA+IGluZGV4IDc4OGMxYjYxMzhjMi4uNjJjMzc5ODY1ZjdjIDEwMDY0NA0K
PiA+ID4gPiA+ID4gLS0tIGEvbmV0L3N1bnJwYy94cHJ0LmMNCj4gPiA+ID4gPiA+ICsrKyBiL25l
dC9zdW5ycGMveHBydC5jDQo+ID4gPiA+ID4gPiBAQCAtODQ0LDYgKzg0NCw0NyBAQCBzdHJ1Y3Qg
cnBjX3Jxc3QNCj4gPiA+ID4gPiA+ICp4cHJ0X2xvb2t1cF9ycXN0KHN0cnVjdA0KPiA+ID4gPiA+
ID4gcnBjX3hwcnQgKnhwcnQsIF9fYmUzMiB4aWQpDQo+ID4gPiA+ID4gPiB9DQo+ID4gPiA+ID4g
PiBFWFBPUlRfU1lNQk9MX0dQTCh4cHJ0X2xvb2t1cF9ycXN0KTsNCj4gPiA+ID4gPiA+IA0KPiA+
ID4gPiA+ID4gKy8qKg0KPiA+ID4gPiA+ID4gKyAqIHhwcnRfcGluX3Jxc3QgLSBQaW4gYSByZXF1
ZXN0IG9uIHRoZSB0cmFuc3BvcnQgcmVjZWl2ZQ0KPiA+ID4gPiA+ID4gbGlzdA0KPiA+ID4gPiA+
ID4gKyAqIEByZXE6IFJlcXVlc3QgdG8gcGluDQo+ID4gPiA+ID4gPiArICoNCj4gPiA+ID4gPiA+
ICsgKiBDYWxsZXIgbXVzdCBlbnN1cmUgdGhpcyBpcyBhdG9taWMgd2l0aCB0aGUgY2FsbCB0bw0K
PiA+ID4gPiA+ID4geHBydF9sb29rdXBfcnFzdCgpDQo+ID4gPiA+ID4gPiArICogc28gc2hvdWxk
IGJlIGhvbGRpbmcgdGhlIHhwcnQgdHJhbnNwb3J0IGxvY2suDQo+ID4gPiA+ID4gPiArICovDQo+
ID4gPiA+ID4gPiArdm9pZCB4cHJ0X3Bpbl9ycXN0KHN0cnVjdCBycGNfcnFzdCAqcmVxKQ0KPiA+
ID4gPiA+ID4gK3sNCj4gPiA+ID4gPiA+ICsJc2V0X2JpdChSUENfVEFTS19NU0dfUkVDViwgJnJl
cS0+cnFfdGFzay0NCj4gPiA+ID4gPiA+ID4gdGtfcnVuc3RhdGUpOw0KPiA+ID4gPiA+ID4gDQo+
ID4gPiA+ID4gPiArfQ0KPiA+ID4gPiA+ID4gKw0KPiA+ID4gPiA+ID4gKy8qKg0KPiA+ID4gPiA+
ID4gKyAqIHhwcnRfdW5waW5fcnFzdCAtIFVucGluIGEgcmVxdWVzdCBvbiB0aGUgdHJhbnNwb3J0
DQo+ID4gPiA+ID4gPiByZWNlaXZlDQo+ID4gPiA+ID4gPiBsaXN0DQo+ID4gPiA+ID4gPiArICog
QHJlcTogUmVxdWVzdCB0byBwaW4NCj4gPiA+ID4gPiA+ICsgKg0KPiA+ID4gPiA+ID4gKyAqIENh
bGxlciBzaG91bGQgYmUgaG9sZGluZyB0aGUgeHBydCB0cmFuc3BvcnQgbG9jay4NCj4gPiA+ID4g
PiA+ICsgKi8NCj4gPiA+ID4gPiA+ICt2b2lkIHhwcnRfdW5waW5fcnFzdChzdHJ1Y3QgcnBjX3Jx
c3QgKnJlcSkNCj4gPiA+ID4gPiA+ICt7DQo+ID4gPiA+ID4gPiArCXN0cnVjdCBycGNfdGFzayAq
dGFzayA9IHJlcS0+cnFfdGFzazsNCj4gPiA+ID4gPiA+ICsNCj4gPiA+ID4gPiA+ICsJY2xlYXJf
Yml0KFJQQ19UQVNLX01TR19SRUNWLCAmdGFzay0+dGtfcnVuc3RhdGUpOw0KPiA+ID4gPiA+ID4g
KwlpZiAodGVzdF9iaXQoUlBDX1RBU0tfTVNHX1JFQ1ZfV0FJVCwgJnRhc2stDQo+ID4gPiA+ID4g
PiA+IHRrX3J1bnN0YXRlKSkNCj4gPiA+ID4gPiA+IA0KPiA+ID4gPiA+ID4gKwkJd2FrZV91cF9i
aXQoJnRhc2stPnRrX3J1bnN0YXRlLA0KPiA+ID4gPiA+ID4gUlBDX1RBU0tfTVNHX1JFQ1YpOw0K
PiA+ID4gPiA+ID4gK30NCj4gPiA+ID4gPiA+ICsNCj4gPiA+ID4gPiA+ICtzdGF0aWMgdm9pZCB4
cHJ0X3dhaXRfb25fcGlubmVkX3Jxc3Qoc3RydWN0IHJwY19ycXN0ICpyZXEpDQo+ID4gPiA+ID4g
PiArX19tdXN0X2hvbGQoJnJlcS0+cnFfeHBydC0+dHJhbnNwb3J0X2xvY2spDQo+ID4gPiA+ID4g
PiArew0KPiA+ID4gPiA+ID4gKwlzdHJ1Y3QgcnBjX3Rhc2sgKnRhc2sgPSByZXEtPnJxX3Rhc2s7
DQo+ID4gPiA+ID4gPiArCWlmICh0ZXN0X2JpdChSUENfVEFTS19NU0dfUkVDViwgJnRhc2stDQo+
ID4gPiA+ID4gPiA+dGtfcnVuc3RhdGUpKSB7DQo+ID4gPiA+ID4gPiArCQlzcGluX3VubG9ja19i
aCgmcmVxLT5ycV94cHJ0LQ0KPiA+ID4gPiA+ID4gPnRyYW5zcG9ydF9sb2NrKTsNCj4gPiA+ID4g
PiA+ICsJCXNldF9iaXQoUlBDX1RBU0tfTVNHX1JFQ1ZfV0FJVCwgJnRhc2stDQo+ID4gPiA+ID4g
PiA+IHRrX3J1bnN0YXRlKTsNCj4gPiA+ID4gPiA+IA0KPiA+ID4gPiA+ID4gKwkJd2FpdF9vbl9i
aXQoJnRhc2stPnRrX3J1bnN0YXRlLA0KPiA+ID4gPiA+ID4gUlBDX1RBU0tfTVNHX1JFQ1YsDQo+
ID4gPiA+ID4gPiArCQkJCVRBU0tfVU5JTlRFUlJVUFRJQkxFKTsNCj4gPiA+ID4gPiA+ICsJCWNs
ZWFyX2JpdChSUENfVEFTS19NU0dfUkVDVl9XQUlULCAmdGFzay0NCj4gPiA+ID4gPiA+ID4gdGtf
cnVuc3RhdGUpOw0KPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiArCQlzcGluX2xvY2tfYmgoJnJl
cS0+cnFfeHBydC0NCj4gPiA+ID4gPiA+ID50cmFuc3BvcnRfbG9jayk7DQo+ID4gPiA+ID4gPiAr
CX0NCj4gPiA+ID4gPiA+ICt9DQo+ID4gPiA+ID4gPiArDQo+ID4gPiA+ID4gPiBzdGF0aWMgdm9p
ZCB4cHJ0X3VwZGF0ZV9ydHQoc3RydWN0IHJwY190YXNrICp0YXNrKQ0KPiA+ID4gPiA+ID4gew0K
PiA+ID4gPiA+ID4gCXN0cnVjdCBycGNfcnFzdCAqcmVxID0gdGFzay0+dGtfcnFzdHA7DQo+ID4g
PiA+ID4gPiBAQCAtMTMwMSw2ICsxMzQyLDcgQEAgdm9pZCB4cHJ0X3JlbGVhc2Uoc3RydWN0IHJw
Y190YXNrDQo+ID4gPiA+ID4gPiAqdGFzaykNCj4gPiA+ID4gPiA+IAkJbGlzdF9kZWwoJnJlcS0+
cnFfbGlzdCk7DQo+ID4gPiA+ID4gPiAJeHBydC0+bGFzdF91c2VkID0gamlmZmllczsNCj4gPiA+
ID4gPiA+IAl4cHJ0X3NjaGVkdWxlX2F1dG9kaXNjb25uZWN0KHhwcnQpOw0KPiA+ID4gPiA+ID4g
Kwl4cHJ0X3dhaXRfb25fcGlubmVkX3Jxc3QocmVxKTsNCj4gPiA+ID4gPiA+IAlzcGluX3VubG9j
a19iaCgmeHBydC0+dHJhbnNwb3J0X2xvY2spOw0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IElzIGl0
IE9LIHRvIGNhbGwgd2FpdF9vbl9iaXQoVEFTS19VTklOVEVSUlVQVElCTEUpIHdoaWxlDQo+ID4g
PiA+ID4gaG9sZGluZw0KPiA+ID4gPiA+IGEgQkggc3BpbiBsb2NrPyBUaGlzIGNvdWxkIGJlIHBy
b25lIHRvIGRlYWRsb2NrLg0KPiA+ID4gPiANCj4gPiA+ID4gV2UgZHJvcCB0aGUgbG9jayBpbnNp
ZGUgeHBydF93YWl0X29uX3Bpbm5lZF9ycXN0KCkgaWYgd2UgbmVlZA0KPiA+ID4gPiB0bw0KPiA+
ID4gPiB3YWl0Lg0KPiA+ID4gPiANCj4gPiA+ID4gVGhlIHJlYXNvbiB3aHkgd2Ugd2FudCB0byBo
b2xkIHRoZSBsb2NrIHRoZXJlIGlzIHRvIGF2b2lkIGENCj4gPiA+ID4gdXNlLQ0KPiA+ID4gPiBh
ZnRlci0NCj4gPiA+ID4gZnJlZSBpbiB4cHJ0X3VucGluX3Jxc3QoKS4gV2l0aG91dCB0aGUgbG9j
aywgdGhlcmUgaXMgYSByaXNrDQo+ID4gPiA+IHRoYXQNCj4gPiA+ID4geHBydF9yZWxlYXNlKCkg
Y291bGQgY29tcGxldGUsIGFuZCB0aGUgdGFzayBnZXQgZnJlZWQgYmVmb3JlIHdlDQo+ID4gPiA+
IGhhdmUNCj4gPiA+ID4gY29tcGxldGVkIHdha2VfdXBfYml0KCkuDQo+ID4gPiANCj4gPiA+IEFn
cmVlIHdpdGggdGhlIGxhc3QgYml0LiBJIGhhZCB0aGF0IGNoYWxsZW5nZSB3aXRoIHRoZSBvcmRl
ciBvZg0KPiA+ID4gbWVtb3J5IGludmFsaWRhdGlvbiAod2hpY2ggd2FpdHMpIGFuZA0KPiA+ID4g
eHBydF9sb29rdXBfcnFzdC9jb21wbGV0ZV9ycXN0DQo+ID4gPiBpbiBycGNyZG1hX3JlcGx5X2hh
bmRsZXIuDQo+ID4gPiANCj4gPiA+IEhvd2V2ZXIgbXkgY29uY2VybiBpcyB0aGF0IHRoZXJlIGFy
ZSBtYW55IG90aGVyIHVzZXJzIG9mIHRoZQ0KPiA+ID4gdHJhbnNwb3J0X2xvY2suIERvZXMgaXQg
YmVjb21lIG1vcmUgdHJpY2t5IG5vdCB0byBpbnRyb2R1Y2UgYQ0KPiA+ID4gcHJvYmxlbSBieSBj
aGFuZ2luZyBhbnkgb25lIG9mIHRoZSBzaXRlcyB0aGF0IHRha2UNCj4gPiA+IHRyYW5zcG9ydF9s
b2NrPw0KPiA+IA0KPiA+IFdoYXQgaXMgeW91ciBjb25jZXJuPyBUaGUgbWFpbiByZWFzb24gZm9y
IHRha2luZyB0aGUgdHJhbnNwb3J0IGxvY2sNCj4gPiBpbg0KPiA+IHRoZSBjdXJyZW50IGNvZGUg
aXMgdHdvZm9sZDoNCj4gPiANCj4gPiAxKSBQZXJmb3JtIHRoZSBsb29rdXAgb2YgdGhlIHJlcXVl
c3Qgb24gdGhlIHJlY2VpdmUgbGlzdA0KPiA+IDIpIEVuc3VyZSB0aGUgcmVxdWVzdCBkb2Vzbid0
IGRpc2FwcGVhciB3aGlsZSB3ZSdyZSB3cml0aW5nIHRvIGl0Lg0KPiANCj4gWWVzLCBJIHRoaW5r
IHdlIHNlZSB0aGUgc2FtZSBwcm9ibGVtIHNwYWNlLg0KPiANCj4gSSBtaXNzZWQsIHRob3VnaCwg
dGhhdCB4cHJ0X3dhaXRfb25fcGlubmVkX3Jxc3QgYWx3YXlzIHJlbGVhc2VzDQo+IHRyYW5zcG9y
dF9sb2NrIGJlZm9yZSBpbnZva2luZyB3YWl0X29uX2JpdC4NCj4gDQo+IA0KPiA+ID4gRGVzcGl0
ZSB0aGUgZGVhZGxvY2sgY29uY2VybiwgSSBkb24ndCB0aGluayBpdCBtYWtlcyBzZW5zZSB0bw0K
PiA+ID4gY2FsbA0KPiA+ID4gd2FpdF9vbl9iaXQgd2hpbGUgaG9sZGluZyBhIHNwaW4gbG9jayBz
aW1wbHkgYmVjYXVzZSBzcGluIGxvY2tzDQo+ID4gPiBhcmUNCj4gPiA+IHN1cHBvc2VkIHRvIGJl
IGZvciB0aGluZ3MgdGhhdCBhcmUgcXVpY2ssIGxpa2UgdXBkYXRpbmcgYSBkYXRhDQo+ID4gPiBz
dHJ1Y3R1cmUuDQo+ID4gPiANCj4gPiA+IHdhaXRfb25fYml0IGNhbiB0YWtlIG1pY3Jvc2Vjb25k
czogcHJlcGFyZV90b193YWl0IGFuZA0KPiA+ID4gZmluaXNoX3dhaXQNCj4gPiA+IGNhbiBib3Ro
IHRha2UgYSBpcnFzYXZlIHNwaW5fbG9jaywgZm9yIGV4YW1wbGUuIGJpdF93YWl0IGlzIHRoZQ0K
PiA+ID4gYWN0aW9uIHRoYXQgaXMgZG9uZSBpZiB0aGUgYml0IGlzIG5vdCByZWFkeSwgYW5kIHRo
YXQgY2FsbHMNCj4gPiA+IHNjaGVkdWxlKCkuIE9uIGEgYnVzeSBzeXN0ZW0sIHRoYXQga2luZCBv
ZiBjb250ZXh0IHN3aXRjaCBjYW4NCj4gPiA+IHRha2UNCj4gPiA+IGRvemVucyBvZiBtaWNyb3Nl
Y29uZHMuDQo+ID4gDQo+ID4gQWdyZWVkLCBidXQgd2Ugc2hvdWxkIG9ubHkgdmVyeSByYXJlbHkg
YmUgaGl0dGluZyB0aGUgd2FpdCBjYXNlLiBJbg0KPiA+IGFsbA0KPiA+IHN1Y2Nlc3NmdWwgUlBD
IGNhbGxzLCB0aGUgcmVjZWl2ZSB3aWxsIGhhdmUgaGFwcGVuZWQgbG9uZyBiZWZvcmUgd2UNCj4g
PiBjYWxsIHhwcnRfcmVsZWFzZSgpLiBJbiBmYWN0LCB0aGUgb25seSAyIGNhc2VzIHdoZXJlIHRo
aXMgY2FuDQo+ID4gaGFwcGVuDQo+ID4gd291bGQgYmUgd2hlbiB0aGUgUlBDIGNhbGwgaXRzZWxm
IGlzIGludGVycnVwdGVkLiBFaXRoZXI6DQo+ID4gDQo+ID4gMSkgVGhlIHRhc2sgaXMgc3luY2hy
b25vdXMgYW5kIHdhcyBpbnRlcnJ1cHRlZCBieSB0aGUgdXNlci4NCj4gPiAyKSBUaGUgdGFzayBp
cyB0ZXJtaW5hdGluZyBlYXJseSBkdWUgdG8gYSBzb2Z0IHRpbWVvdXQuDQo+ID4gDQo+ID4gSW4g
Ym90aCB0aG9zZSBjYXNlcywgd2UncmUgaW4gYSBub24tcGVyZm9ybWFuY2UgcGF0aC4gRnVydGhl
cm1vcmUsDQo+ID4gdGhleQ0KPiA+IHdvdWxkIGN1cnJlbnRseSBlbmQgdXAgc3Bpbm5pbmcgYWdh
aW5zdCB0aGUgdHJhbnNwb3J0IGxvY2suDQo+IA0KPiBPSywgbm8gYXJndW1lbnQgdGhhdCB0aGVy
ZSBpcyBhbnkgcGVyZm9ybWFuY2UgY29uY2Vybi4NCj4gDQo+IEkgY2FuIGhpdCB0aGlzIGNhc2Ug
MiBvciAzIHRpbWVzIG91dCBvZiA1IHdpdGggZ2VuZXJpYy8wMjkgb24NCj4gTkZTdjQvUkRNQS4g
VGhlc2UgZGF5cywgdGhlIHdvcnNlIHRoYXQgaGFwcGVucyBpcyB0aGUgY2xpZW50IGRyb3BzDQo+
IHRoZSBSRE1BIGNvbm5lY3Rpb24gYmVjYXVzZSBJJ3ZlIHNwZW50IGEgdmVyeSBsb25nIHRpbWUg
aW1tZXJzZWQNCj4gaW4gdGhlc2UgY29kZSBwYXRocywgdHJ5aW5nIHRvIG1ha2UgdGhlIHR3byBj
YXNlcyB5b3UgbGlzdCBhYm92ZQ0KPiB3b3JrIDEwMCUgd2l0aG91dCBkZWFkbG9jayBvciBjcmFz
aC4gU2VlIGNvbW1pdHMgMDRkMjViN2Q1ZDFiDQo+IHRocm91Z2ggNDMxYWY2NDVjZjY2LCBhbmQg
Y29tbWl0IDY4NzkxNjQ5YTcyNS4NCj4gDQo+IEl0IG1ha2VzIG1lIHVuY2VydGFpbiB0aGF0IHdh
aXRpbmcgZm9yIGFueXRoaW5nIGluIHhwcnRfcmVsZWFzZQ0KPiBpcyBzYWZlLCBldmVuIGlmIHRo
ZSB0cmFuc3BvcnRfbG9jayBpcyBub3QgYmVpbmcgaGVsZC4geHBydHJkbWENCj4gdXNlZCB0byBw
ZXJmb3JtIHN5bmNocm9ub3VzIG1lbW9yeSBpbnZhbGlkYXRpb24gaW4gLT5idWZfZnJlZS4NCj4g
SXQgZG9lc24ndCBhbnkgbW9yZSBmb3IgdGhpcyByZWFzb24uDQoNCg0KRnVsbCBkaXNjbG9zdXJl
OiBJIGRpZG4ndCBhY3R1YWxseSBlbmFibGUgdGhpcyBjb2RlIGZvciBSRE1BLiBUaGUNCnJlYXNv
biBpcyBmaXJzdGx5IHRoYXQgSSB3b24ndCBwcmV0ZW5kIHRvIHVuZGVyc3RhbmQgdGhlIGxvY2tp
bmcgaW4NCnJwY3JkbWFfcmVwbHlfaGFuZGxlcigpLCBidXQgZnVydGhlcm1vcmUgdGhhdCBSRE1B
IGRvZXNuJ3QgcmVhbGx5IGhhdmUNCmEgcHJvYmxlbSB3aXRoIGJ1bGsgY29weSBpbiB0aGUgc2Ft
ZSB3YXkgdGhhdCB0aGUgc29ja2V0IGJhc2VkIGNvZGUNCmRvZXM6IHRoZSBkaXJlY3QgcGxhY2Vt
ZW50IGludG8gdGhlIHJlcGx5IGJ1ZmZlciBtZWFucyB0aGUgYnVsayBvZiB0aGUNCmNvZGUgaW4g
cnBjcmRtYV9yZXBseV9oYW5kbGVyKCkgaXMgYWxsIGFib3V0IGNoZWNraW5nIHRoZSBhbGlnbm1l
bnQNCih3aXRoLCBhZG1pdHRlZGx5IHNvbWUgc2NhcnkgYml0cyBpbiBycGNyZG1hX2lubGluZV9m
aXh1cCgpKS4NCg0KDQoNCj4gPiA+ID4gPiA+IElNTyBpdCB3b3VsZCBiZSBhIGxvdCBuaWNlciBp
ZiB3ZSBoYWQgYW4gRlNNIHN0YXRlDQo+ID4gPiA+ID4gPiBhdmFpbGFibGUgdGhhdA0KPiA+ID4g
Y291bGQgYWxsb3cgYW5vdGhlciBzbGVlcCB3aGlsZSBhbiBSUEMgdGFzayBkdXJpbmcgeHBydF9y
ZWxlYXNlLg0KPiA+IA0KPiA+IFdoeSB3b3VsZCB0aGF0IGJlIHByZWZlcmFibGUsIGlmIHRoZSBh
cmd1bWVudCBob2xkcyB0aGF0IHRoaXMgb25seQ0KPiA+IGhhcHBlbnMgaW4gdGhlc2UgMiByYXJl
IGVycm9yIGNhc2VzPw0KPiANCj4gQmVjYXVzZSBGU00gc3RlcHMgYXJlIHRoZSB3YXkgdGhpcyBj
b2RlIG1hbmFnZXMgc2xlZXBzLiBUaGF0DQo+IG1ha2VzIGl0IGVhc2llciB0byB1bmRlcnN0YW5k
IGFuZCBsZXNzIGJyaXR0bGUuIEkgZG9uJ3QgdGhpbmsNCj4gaXQgd291bGQgYmUgbW9yZSBvdmVy
aGVhZCB0aGFuIHRlc3RfYml0KCkuDQoNClRoZSBwcm9ibGVtIGlzIHRoYXQgaXQgd291bGQgYWRk
IG5ldyBzdGF0ZXMgdG8gYSBudW1iZXIgb2YgZnVuY3Rpb25zDQp0aGF0IGN1cnJlbnRseSBjYWxs
IHhwcnRfcmVsZWFzZSgpLiBSaWdodCBub3csIHRoYXQgd291bGQgbWVhbg0KDQpjYWxsX3Jlc2Vy
dmVyZXN1bHQoKQ0KcnBjX3ZlcmlmeV9oZWFkZXIoKQ0KcnBjX2V4aXRfdGFzaygpDQoNCkluIGFk
ZGl0aW9uLCB3ZSdkIG5lZWQgYSBzZXBhcmF0ZSBzb2x1dGlvbiBmb3IgcnBjX3B1dF90YXNrKCks
IGFuZA0KcHJvYmFibHkgYWxzbyBmb3IgcnBjX3JlbGVhc2VfdGFzaygpIChzaW5jZSB0aGVzZSBh
cmUgYm90aCBjYWxsZWQgZnJvbQ0Kb3V0c2lkZSB0aGUgbWFpbiBzdGF0ZSBtYWNoaW5lIGxvb3Ap
Lg0KDQpJJ20gZ2V0dGluZyBhIGxpdHRsZSB3b3JyaWVkIGFib3V0IHlvdXIgcmVwbHkgYWJvdmU6
IHRoZSBSUEMgZW5naW5lDQpvZmZlcnMgbm8gcmVhbC10aW1lIGd1YXJhbnRlZXMsIHNvIGlmIHRo
ZSBSRE1BIGNvZGUgaXMgcmVhbGx5IHRoaXMNCmxhdGVuY3ktc2Vuc2l0aXZlLCB0aGVuIHdoYXQg
d2lsbCBoYXBwZW4gaWYgd2UgZW5hYmxlIHByZS1lbXB0aW9uDQphbmQvb3Igd2UganVzdCBoYXZl
IGEgdmVyeSBjb25nZXN0ZWQgd29ya3F1ZXVlPw0KQWxzbywgaG93IHdvdWxkIGEgc3RhdGUgbWFj
aGluZSBzb2x1dGlvbiBoZWxwPyBJJ2QgZXhwZWN0IHRoYXQgdG8gbWFrZQ0KdGhlIGxhdGVuY3kg
d29yc2Ugbm90IGJldHRlciwgc2luY2UgdGhlIHdvcmsgaXRlbSB3b3VsZCBlbmQgdXAgZ2V0dGlu
Zw0KcmVxdWV1ZWQuIElmIHRoZSB3b3JrZXIgdGhyZWFkIHRoZW4gZ2V0cyByZXNjaGVkdWxlZCBh
cyB3ZWxsLi4uDQoNCk9uZSBzb2x1dGlvbiBtYXkgYmUgdG8gc2ltcGx5IHNheSB3ZSdyZSBuZXZl
ciBnb2luZyB0byB1c2UgdGhpcw0KbWVjaGFuaXNtIGZvciBSRE1BLiBJIHN1c3BlY3QgdGhhdCBp
ZiB3ZSBqdXN0IHJlcGxhY2UgdGhlIHVzZSBvZiB4cHJ0LQ0KPnRyYW5zcG9ydF9sb2NrIHdoZW4g
cHJvdGVjdGluZyB0aGUgcmVjZWl2ZSBxdWV1ZSB3aXRoIGEgbmV3IGxvY2sgdGhhdA0Kd291bGRu
J3QgbmVlZCB0byBiZSBiaC1zYWZlIHRoZW4gdGhhdCBtaWdodCBzdWZmaWNlIHRvIGRlYWwgd2l0
aCB0aGUNClJETUEgbGF0ZW5jeSBpc3N1ZXMuIEkgaGF2ZSBsZXNzIGNvbmZpZGVuY2UgdGhhdCB3
b3VsZCB3b3JrIGZvciB0aGUNCm90aGVycyBkdWUgdG8gdGhlIGJ1bGsgZGF0YSBjb3B5IGlzc3Vl
Lg0KDQotLSANClRyb25kIE15a2xlYnVzdA0KTGludXggTkZTIGNsaWVudCBtYWludGFpbmVyLCBQ
cmltYXJ5RGF0YQ0KdHJvbmQubXlrbGVidXN0QHByaW1hcnlkYXRhLmNvbQ0K
> On Aug 14, 2017, at 9:18 PM, Trond Myklebust <[email protected]> wrote:
>
> On Mon, 2017-08-14 at 18:25 -0400, Chuck Lever wrote:
>>> On Aug 14, 2017, at 4:38 PM, Trond Myklebust <[email protected]
>>> om> wrote:
>>>
>>> On Mon, 2017-08-14 at 16:23 -0400, Chuck Lever wrote:
>>>>> On Aug 14, 2017, at 4:07 PM, Trond Myklebust <trondmy@primaryda
>>>>> ta.c
>>>>> om> wrote:
>>>>>
>>>>> On Mon, 2017-08-14 at 15:28 -0400, Chuck Lever wrote:
>>>>>>> On Aug 14, 2017, at 3:16 PM, Trond Myklebust <trond.myklebu
>>>>>>> st@p
>>>>>>> rima
>>>>>>> rydata.com> wrote:
>>>>>>>
>>>>>>> Instead add a mechanism to ensure that the request doesn't
>>>>>>> disappear
>>>>>>> from underneath us while copying from the socket. We do
>>>>>>> this by
>>>>>>> preventing xprt_release() from freeing the XDR buffers
>>>>>>> until
>>>>>>> the
>>>>>>> flag RPC_TASK_MSG_RECV has been cleared from the request.
>>>>>>>
>>>>>>> Signed-off-by: Trond Myklebust <trond.myklebust@primarydata
>>>>>>> .com
>>>>>>>>
>>>>>>>
>>>>>>> ---
>>>>>>> include/linux/sunrpc/sched.h | 2 ++
>>>>>>> include/linux/sunrpc/xprt.h | 2 ++
>>>>>>> net/sunrpc/xprt.c | 42
>>>>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>>>> net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++---
>>>>>>> --
>>>>>>> 4 files changed, 68 insertions(+), 5 deletions(-)
>>>>>>>
>>>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>>>> b/include/linux/sunrpc/sched.h
>>>>>>> index 15bc1cd6ee5c..e972d9e05426 100644
>>>>>>> --- a/include/linux/sunrpc/sched.h
>>>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>>>> @@ -141,6 +141,8 @@ struct rpc_task_setup {
>>>>>>> #define RPC_TASK_ACTIVE 2
>>>>>>> #define RPC_TASK_MSG_XMIT 3
>>>>>>> #define RPC_TASK_MSG_XMIT_WAIT 4
>>>>>>> +#define RPC_TASK_MSG_RECV 5
>>>>>>> +#define RPC_TASK_MSG_RECV_WAIT 6
>>>>>>>
>>>>>>> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING,
>>>>>>> &(t)-
>>>>>>>> tk_runstate)
>>>>>>>
>>>>>>> #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING,
>>>>>>> &(t)-
>>>>>>>> tk_runstate)
>>>>>>>
>>>>>>> diff --git a/include/linux/sunrpc/xprt.h
>>>>>>> b/include/linux/sunrpc/xprt.h
>>>>>>> index eab1c749e192..65b9e0224753 100644
>>>>>>> --- a/include/linux/sunrpc/xprt.h
>>>>>>> +++ b/include/linux/sunrpc/xprt.h
>>>>>>> @@ -372,6 +372,8 @@ void xprt_write_
>>>>>>> spac
>>>>>>> e(st
>>>>>>> ruct rpc_xprt *xprt);
>>>>>>> void xprt_adjust_cwnd(struct
>>>>>>> rpc_xprt
>>>>>>> *xprt,
>>>>>>> struct rpc_task *task, int result);
>>>>>>> struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt
>>>>>>> *xprt,
>>>>>>> __be32 xid);
>>>>>>> void xprt_complete_rqst(struct
>>>>>>> rpc_task
>>>>>>> *task, int copied);
>>>>>>> +void xprt_pin_rqst(struct rpc_rqst
>>>>>>> *req);
>>>>>>> +void xprt_unpin_rqst(struct
>>>>>>> rpc_rqst
>>>>>>> *req);
>>>>>>> void xprt_release_rqst_cong(struct
>>>>>>> rpc_task
>>>>>>> *task);
>>>>>>> void xprt_disconnect_done(struct
>>>>>>> rpc_xprt
>>>>>>> *xprt);
>>>>>>> void xprt_force_disconnect(struct
>>>>>>> rpc_xprt
>>>>>>> *xprt);
>>>>>>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>>>>>>> index 788c1b6138c2..62c379865f7c 100644
>>>>>>> --- a/net/sunrpc/xprt.c
>>>>>>> +++ b/net/sunrpc/xprt.c
>>>>>>> @@ -844,6 +844,47 @@ struct rpc_rqst
>>>>>>> *xprt_lookup_rqst(struct
>>>>>>> rpc_xprt *xprt, __be32 xid)
>>>>>>> }
>>>>>>> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>>>>>>>
>>>>>>> +/**
>>>>>>> + * xprt_pin_rqst - Pin a request on the transport receive
>>>>>>> list
>>>>>>> + * @req: Request to pin
>>>>>>> + *
>>>>>>> + * Caller must ensure this is atomic with the call to
>>>>>>> xprt_lookup_rqst()
>>>>>>> + * so should be holding the xprt transport lock.
>>>>>>> + */
>>>>>>> +void xprt_pin_rqst(struct rpc_rqst *req)
>>>>>>> +{
>>>>>>> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task-
>>>>>>>> tk_runstate);
>>>>>>>
>>>>>>> +}
>>>>>>> +
>>>>>>> +/**
>>>>>>> + * xprt_unpin_rqst - Unpin a request on the transport
>>>>>>> receive
>>>>>>> list
>>>>>>> + * @req: Request to pin
>>>>>>> + *
>>>>>>> + * Caller should be holding the xprt transport lock.
>>>>>>> + */
>>>>>>> +void xprt_unpin_rqst(struct rpc_rqst *req)
>>>>>>> +{
>>>>>>> + struct rpc_task *task = req->rq_task;
>>>>>>> +
>>>>>>> + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
>>>>>>> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>>>> tk_runstate))
>>>>>>>
>>>>>>> + wake_up_bit(&task->tk_runstate,
>>>>>>> RPC_TASK_MSG_RECV);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
>>>>>>> +__must_hold(&req->rq_xprt->transport_lock)
>>>>>>> +{
>>>>>>> + struct rpc_task *task = req->rq_task;
>>>>>>> + if (test_bit(RPC_TASK_MSG_RECV, &task-
>>>>>>>> tk_runstate)) {
>>>>>>> + spin_unlock_bh(&req->rq_xprt-
>>>>>>>> transport_lock);
>>>>>>> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>>>> tk_runstate);
>>>>>>>
>>>>>>> + wait_on_bit(&task->tk_runstate,
>>>>>>> RPC_TASK_MSG_RECV,
>>>>>>> + TASK_UNINTERRUPTIBLE);
>>>>>>> + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>>>> tk_runstate);
>>>>>>>
>>>>>>> + spin_lock_bh(&req->rq_xprt-
>>>>>>>> transport_lock);
>>>>>>> + }
>>>>>>> +}
Looking at xprt_wait_for_pinned_rqst, I'm wondering
though what happens if a soft timeout occurs but the
RPC Reply never arrives because the server died. Will
it wait forever in that case?
I can't tell if wait-for-pinned always waits for the
Reply, or if it can allow the RPC to exit cleanly if
the Reply hasn't arrived.
>>>>>>> +
>>>>>>> static void xprt_update_rtt(struct rpc_task *task)
>>>>>>> {
>>>>>>> struct rpc_rqst *req = task->tk_rqstp;
>>>>>>> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task
>>>>>>> *task)
>>>>>>> list_del(&req->rq_list);
>>>>>>> xprt->last_used = jiffies;
>>>>>>> xprt_schedule_autodisconnect(xprt);
>>>>>>> + xprt_wait_on_pinned_rqst(req);
>>>>>>> spin_unlock_bh(&xprt->transport_lock);
>>>>>>
>>>>>> Is it OK to call wait_on_bit(TASK_UNINTERRUPTIBLE) while
>>>>>> holding
>>>>>> a BH spin lock? This could be prone to deadlock.
>>>>>
>>>>> We drop the lock inside xprt_wait_on_pinned_rqst() if we need
>>>>> to
>>>>> wait.
>>>>>
>>>>> The reason why we want to hold the lock there is to avoid a
>>>>> use-
>>>>> after-
>>>>> free in xprt_unpin_rqst(). Without the lock, there is a risk
>>>>> that
>>>>> xprt_release() could complete, and the task get freed before we
>>>>> have
>>>>> completed wake_up_bit().
>>>>
>>>> Agree with the last bit. I had that challenge with the order of
>>>> memory invalidation (which waits) and
>>>> xprt_lookup_rqst/complete_rqst
>>>> in rpcrdma_reply_handler.
>>>>
>>>> However my concern is that there are many other users of the
>>>> transport_lock. Does it become more tricky not to introduce a
>>>> problem by changing any one of the sites that take
>>>> transport_lock?
>>>
>>> What is your concern? The main reason for taking the transport lock
>>> in
>>> the current code is twofold:
>>>
>>> 1) Perform the lookup of the request on the receive list
>>> 2) Ensure the request doesn't disappear while we're writing to it.
>>
>> Yes, I think we see the same problem space.
>>
>> I missed, though, that xprt_wait_on_pinned_rqst always releases
>> transport_lock before invoking wait_on_bit.
>>
>>
>>>> Despite the deadlock concern, I don't think it makes sense to
>>>> call
>>>> wait_on_bit while holding a spin lock simply because spin locks
>>>> are
>>>> supposed to be for things that are quick, like updating a data
>>>> structure.
>>>>
>>>> wait_on_bit can take microseconds: prepare_to_wait and
>>>> finish_wait
>>>> can both take a irqsave spin_lock, for example. bit_wait is the
>>>> action that is done if the bit is not ready, and that calls
>>>> schedule(). On a busy system, that kind of context switch can
>>>> take
>>>> dozens of microseconds.
>>>
>>> Agreed, but we should only very rarely be hitting the wait case. In
>>> all
>>> successful RPC calls, the receive will have happened long before we
>>> call xprt_release(). In fact, the only 2 cases where this can
>>> happen
>>> would be when the RPC call itself is interrupted. Either:
>>>
>>> 1) The task is synchronous and was interrupted by the user.
>>> 2) The task is terminating early due to a soft timeout.
>>>
>>> In both those cases, we're in a non-performance path. Furthermore,
>>> they
>>> would currently end up spinning against the transport lock.
>>
>> OK, no argument that there is any performance concern.
>>
>> I can hit this case 2 or 3 times out of 5 with generic/029 on
>> NFSv4/RDMA. These days, the worse that happens is the client drops
>> the RDMA connection because I've spent a very long time immersed
>> in these code paths, trying to make the two cases you list above
>> work 100% without deadlock or crash. See commits 04d25b7d5d1b
>> through 431af645cf66, and commit 68791649a725.
>>
>> It makes me uncertain that waiting for anything in xprt_release
>> is safe, even if the transport_lock is not being held. xprtrdma
>> used to perform synchronous memory invalidation in ->buf_free.
>> It doesn't any more for this reason.
>
>
> Full disclosure: I didn't actually enable this code for RDMA.
Noted that; I assumed I would be responsible for it
if it could be applied to xprtrdma.
> The reason is firstly that I won't pretend to understand the locking in
> rpcrdma_reply_handler(), but furthermore that RDMA doesn't really have
> a problem with bulk copy in the same way that the socket based code
> does: the direct placement into the reply buffer means the bulk of the
> code in rpcrdma_reply_handler() is all about checking the alignment
> (with, admittedly some scary bits in rpcrdma_inline_fixup()).
rpcrdma_reply_handler actually can use xprt_pin_rqst.
It's not doing a data copy (except for the pull-up in
rpcrdma_inline_fixup), but it is performing a possibly
long-running synchronous memory invalidation. During
that invalidation, the transport_lock cannot be held.
With xprt_pin_rqst, rpcrdma_reply_handler would
probably work like this:
spin_lock(transport_lock)
xprt_lookup_rqst
xprt_pin_rqst
spin_unlock(transport_lock)
invalidate memory associated with this rqst, and wait
for it to complete
reconstruct the Reply in rqst->rq_rcv_buf, including
maybe a substantial pull-up if the Reply is large and
inline
spin_lock(transport_lock)
xprt_complete_rqst
xprt_unpin_rqst
spin_unlock(transport_lock)
This would have to co-ordinate with xprt_rdma_free. If
there are still memory regions registered when
xprt_rdma_free is called, that means the RPC was
terminated before the Reply arrived. They still need to
be invalidated, but these functions have to ensure that
invalidation does not occur twice.
>>>>>>> IMO it would be a lot nicer if we had an FSM state
>>>>>>> available that
>>>> could allow another sleep while an RPC task during xprt_release.
>>>
>>> Why would that be preferable, if the argument holds that this only
>>> happens in these 2 rare error cases?
>>
>> Because FSM steps are the way this code manages sleeps. That
>> makes it easier to understand and less brittle. I don't think
>> it would be more overhead than test_bit().
>
> The problem is that it would add new states to a number of functions
> that currently call xprt_release(). Right now, that would mean
>
> call_reserveresult()
> rpc_verify_header()
> rpc_exit_task()
Right. That's why I wasn't bold enough to actually try adding
the FSM myself.
Your solution is helped somewhat by checking to see if a Call
was actually sent and a Reply is therefore expected. In some
of these cases, that means waiting can be avoided entirely
when the RPC exits before call_transmit.
> In addition, we'd need a separate solution for rpc_put_task(), and
> probably also for rpc_release_task() (since these are both called from
> outside the main state machine loop).
>
> I'm getting a little worried about your reply above: the RPC engine
> offers no real-time guarantees, so if the RDMA code is really this
> latency-sensitive, then what will happen if we enable pre-emption
> and/or we just have a very congested workqueue?
The sensitivity is only performance-related. On CPU-intensive
workloads, RPC execution time can increase considerably. The
RDMA transport doesn't rely on timely execution for proper
operation, but maybe I don't understand your worry.
> Also, how would a state machine solution help? I'd expect that to make
> the latency worse not better, since the work item would end up getting
> requeued. If the worker thread then gets rescheduled as well...
>
> One solution may be to simply say we're never going to use this
> mechanism for RDMA. I suspect that if we just replace the use of xprt-
>> transport_lock when protecting the receive queue with a new lock that
> wouldn't need to be bh-safe then that might suffice to deal with the
> RDMA latency issues. I have less confidence that would work for the
> others due to the bulk data copy issue.
I would like a separate non-BH lock, if that can be made to
work. That's in fact how xprtrdma currently works. xprtrdma
has its own receive list at the moment so that it can find
memory associated with a rqst _without_ needing to call
xprt_lookup_rqst. A non-BH lock is used to protect that
list.
--
Chuck Lever
T24gVHVlLCAyMDE3LTA4LTE1IGF0IDEwOjIzIC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDk6MTggUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmRteUBw
cmltYXJ5ZGF0YS5jDQo+ID4gb20+IHdyb3RlOg0KPiA+IA0KPiA+IE9uIE1vbiwgMjAxNy0wOC0x
NCBhdCAxODoyNSAtMDQwMCwgQ2h1Y2sgTGV2ZXIgd3JvdGU6DQo+ID4gPiA+IE9uIEF1ZyAxNCwg
MjAxNywgYXQgNDozOCBQTSwgVHJvbmQgTXlrbGVidXN0IDx0cm9uZG15QHByaW1hcnlkYQ0KPiA+
ID4gPiB0YS5jDQo+ID4gPiA+IG9tPiB3cm90ZToNCj4gPiA+ID4gDQo+ID4gPiA+IE9uIE1vbiwg
MjAxNy0wOC0xNCBhdCAxNjoyMyAtMDQwMCwgQ2h1Y2sgTGV2ZXIgd3JvdGU6DQo+ID4gPiA+ID4g
PiBPbiBBdWcgMTQsIDIwMTcsIGF0IDQ6MDcgUE0sIFRyb25kIE15a2xlYnVzdCA8dHJvbmRteUBw
cmltYQ0KPiA+ID4gPiA+ID4gcnlkYQ0KPiA+ID4gPiA+ID4gdGEuYw0KPiA+ID4gPiA+ID4gb20+
IHdyb3RlOg0KPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiBPbiBNb24sIDIwMTctMDgtMTQgYXQg
MTU6MjggLTA0MDAsIENodWNrIExldmVyIHdyb3RlOg0KPiA+ID4gPiA+ID4gPiA+IE9uIEF1ZyAx
NCwgMjAxNywgYXQgMzoxNiBQTSwgVHJvbmQgTXlrbGVidXN0DQo+ID4gPiA+ID4gPiA+ID4gPHRy
b25kLm15a2xlYnUNCj4gPiA+ID4gPiA+ID4gPiBzdEBwDQo+ID4gPiA+ID4gPiA+ID4gcmltYQ0K
PiA+ID4gPiA+ID4gPiA+IHJ5ZGF0YS5jb20+IHdyb3RlOg0KPiA+ID4gPiA+ID4gPiA+IA0KPiA+
ID4gPiA+ID4gPiA+IEluc3RlYWQgYWRkIGEgbWVjaGFuaXNtIHRvIGVuc3VyZSB0aGF0IHRoZSBy
ZXF1ZXN0DQo+ID4gPiA+ID4gPiA+ID4gZG9lc24ndA0KPiA+ID4gPiA+ID4gPiA+IGRpc2FwcGVh
cg0KPiA+ID4gPiA+ID4gPiA+IGZyb20gdW5kZXJuZWF0aCB1cyB3aGlsZSBjb3B5aW5nIGZyb20g
dGhlIHNvY2tldC4gV2UgZG8NCj4gPiA+ID4gPiA+ID4gPiB0aGlzIGJ5DQo+ID4gPiA+ID4gPiA+
ID4gcHJldmVudGluZyB4cHJ0X3JlbGVhc2UoKSBmcm9tIGZyZWVpbmcgdGhlIFhEUiBidWZmZXJz
DQo+ID4gPiA+ID4gPiA+ID4gdW50aWwNCj4gPiA+ID4gPiA+ID4gPiB0aGUNCj4gPiA+ID4gPiA+
ID4gPiBmbGFnIFJQQ19UQVNLX01TR19SRUNWIGhhcyBiZWVuIGNsZWFyZWQgZnJvbSB0aGUNCj4g
PiA+ID4gPiA+ID4gPiByZXF1ZXN0Lg0KPiA+ID4gPiA+ID4gPiA+IA0KPiA+ID4gPiA+ID4gPiA+
IFNpZ25lZC1vZmYtYnk6IFRyb25kIE15a2xlYnVzdCA8dHJvbmQubXlrbGVidXN0QHByaW1hcnkN
Cj4gPiA+ID4gPiA+ID4gPiBkYXRhDQo+ID4gPiA+ID4gPiA+ID4gLmNvbQ0KPiA+ID4gPiA+ID4g
PiA+ID4gDQo+ID4gPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiA+ID4gLS0tDQo+ID4gPiA+ID4g
PiA+ID4gaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaCB8ICAyICsrDQo+ID4gPiA+ID4gPiA+
ID4gaW5jbHVkZS9saW51eC9zdW5ycGMveHBydC5oICB8ICAyICsrDQo+ID4gPiA+ID4gPiA+ID4g
bmV0L3N1bnJwYy94cHJ0LmMgICAgICAgICAgICB8IDQyDQo+ID4gPiA+ID4gPiA+ID4gKysrKysr
KysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrKysrDQo+ID4gPiA+ID4gPiA+ID4gbmV0
L3N1bnJwYy94cHJ0c29jay5jICAgICAgICB8IDI3DQo+ID4gPiA+ID4gPiA+ID4gKysrKysrKysr
KysrKysrKysrKysrKy0tLQ0KPiA+ID4gPiA+ID4gPiA+IC0tDQo+ID4gPiA+ID4gPiA+ID4gNCBm
aWxlcyBjaGFuZ2VkLCA2OCBpbnNlcnRpb25zKCspLCA1IGRlbGV0aW9ucygtKQ0KPiA+ID4gPiA+
ID4gPiA+IA0KPiA+ID4gPiA+ID4gPiA+IGRpZmYgLS1naXQgYS9pbmNsdWRlL2xpbnV4L3N1bnJw
Yy9zY2hlZC5oDQo+ID4gPiA+ID4gPiA+ID4gYi9pbmNsdWRlL2xpbnV4L3N1bnJwYy9zY2hlZC5o
DQo+ID4gPiA+ID4gPiA+ID4gaW5kZXggMTViYzFjZDZlZTVjLi5lOTcyZDllMDU0MjYgMTAwNjQ0
DQo+ID4gPiA+ID4gPiA+ID4gLS0tIGEvaW5jbHVkZS9saW51eC9zdW5ycGMvc2NoZWQuaA0KPiA+
ID4gPiA+ID4gPiA+ICsrKyBiL2luY2x1ZGUvbGludXgvc3VucnBjL3NjaGVkLmgNCj4gPiA+ID4g
PiA+ID4gPiBAQCAtMTQxLDYgKzE0MSw4IEBAIHN0cnVjdCBycGNfdGFza19zZXR1cCB7DQo+ID4g
PiA+ID4gPiA+ID4gI2RlZmluZSBSUENfVEFTS19BQ1RJVkUJCTINCj4gPiA+ID4gPiA+ID4gPiAj
ZGVmaW5lIFJQQ19UQVNLX01TR19YTUlUCTMNCj4gPiA+ID4gPiA+ID4gPiAjZGVmaW5lIFJQQ19U
QVNLX01TR19YTUlUX1dBSVQJNA0KPiA+ID4gPiA+ID4gPiA+ICsjZGVmaW5lIFJQQ19UQVNLX01T
R19SRUNWCTUNCj4gPiA+ID4gPiA+ID4gPiArI2RlZmluZSBSUENfVEFTS19NU0dfUkVDVl9XQUlU
CTYNCj4gPiA+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+ID4gPiAjZGVmaW5lIFJQQ19JU19SVU5O
SU5HKHQpCXRlc3RfYml0KFJQQ19UQVNLX1JVTk4NCj4gPiA+ID4gPiA+ID4gPiBJTkcsDQo+ID4g
PiA+ID4gPiA+ID4gJih0KS0NCj4gPiA+ID4gPiA+ID4gPiA+IHRrX3J1bnN0YXRlKQ0KPiA+ID4g
PiA+ID4gPiA+IA0KPiA+ID4gPiA+ID4gPiA+ICNkZWZpbmUgcnBjX3NldF9ydW5uaW5nKHQpCXNl
dF9iaXQoUlBDX1RBU0tfUlVOTg0KPiA+ID4gPiA+ID4gPiA+IElORywNCj4gPiA+ID4gPiA+ID4g
PiAmKHQpLQ0KPiA+ID4gPiA+ID4gPiA+ID4gdGtfcnVuc3RhdGUpDQo+ID4gPiA+ID4gPiA+ID4g
DQo+ID4gPiA+ID4gPiA+ID4gZGlmZiAtLWdpdCBhL2luY2x1ZGUvbGludXgvc3VucnBjL3hwcnQu
aA0KPiA+ID4gPiA+ID4gPiA+IGIvaW5jbHVkZS9saW51eC9zdW5ycGMveHBydC5oDQo+ID4gPiA+
ID4gPiA+ID4gaW5kZXggZWFiMWM3NDllMTkyLi42NWI5ZTAyMjQ3NTMgMTAwNjQ0DQo+ID4gPiA+
ID4gPiA+ID4gLS0tIGEvaW5jbHVkZS9saW51eC9zdW5ycGMveHBydC5oDQo+ID4gPiA+ID4gPiA+
ID4gKysrIGIvaW5jbHVkZS9saW51eC9zdW5ycGMveHBydC5oDQo+ID4gPiA+ID4gPiA+ID4gQEAg
LTM3Miw2ICszNzIsOCBAQCB2b2lkCQkJeHBydF93cg0KPiA+ID4gPiA+ID4gPiA+IGl0ZV8NCj4g
PiA+ID4gPiA+ID4gPiBzcGFjDQo+ID4gPiA+ID4gPiA+ID4gZShzdA0KPiA+ID4gPiA+ID4gPiA+
IHJ1Y3QgcnBjX3hwcnQgKnhwcnQpOw0KPiA+ID4gPiA+ID4gPiA+IHZvaWQJCQl4cHJ0X2FkanVz
dF9jd25kKHN0cnVjdA0KPiA+ID4gPiA+ID4gPiA+IHJwY194cHJ0DQo+ID4gPiA+ID4gPiA+ID4g
KnhwcnQsDQo+ID4gPiA+ID4gPiA+ID4gc3RydWN0IHJwY190YXNrICp0YXNrLCBpbnQgcmVzdWx0
KTsNCj4gPiA+ID4gPiA+ID4gPiBzdHJ1Y3QgcnBjX3Jxc3QgKgl4cHJ0X2xvb2t1cF9ycXN0KHN0
cnVjdA0KPiA+ID4gPiA+ID4gPiA+IHJwY194cHJ0DQo+ID4gPiA+ID4gPiA+ID4gKnhwcnQsDQo+
ID4gPiA+ID4gPiA+ID4gX19iZTMyIHhpZCk7DQo+ID4gPiA+ID4gPiA+ID4gdm9pZAkJCXhwcnRf
Y29tcGxldGVfcnFzdChzdHJ1Y3QNCj4gPiA+ID4gPiA+ID4gPiBycGNfdGFzaw0KPiA+ID4gPiA+
ID4gPiA+ICp0YXNrLCBpbnQgY29waWVkKTsNCj4gPiA+ID4gPiA+ID4gPiArdm9pZAkJCXhwcnRf
cGluX3Jxc3Qoc3RydWN0DQo+ID4gPiA+ID4gPiA+ID4gcnBjX3Jxc3QNCj4gPiA+ID4gPiA+ID4g
PiAqcmVxKTsNCj4gPiA+ID4gPiA+ID4gPiArdm9pZAkJCXhwcnRfdW5waW5fcnFzdChzdHJ1Y3QN
Cj4gPiA+ID4gPiA+ID4gPiBycGNfcnFzdA0KPiA+ID4gPiA+ID4gPiA+ICpyZXEpOw0KPiA+ID4g
PiA+ID4gPiA+IHZvaWQJCQl4cHJ0X3JlbGVhc2VfcnFzdF9jb25nKHN0cnUNCj4gPiA+ID4gPiA+
ID4gPiBjdA0KPiA+ID4gPiA+ID4gPiA+IHJwY190YXNrDQo+ID4gPiA+ID4gPiA+ID4gKnRhc2sp
Ow0KPiA+ID4gPiA+ID4gPiA+IHZvaWQJCQl4cHJ0X2Rpc2Nvbm5lY3RfZG9uZShzdHJ1Y3QNCj4g
PiA+ID4gPiA+ID4gPiBycGNfeHBydA0KPiA+ID4gPiA+ID4gPiA+ICp4cHJ0KTsNCj4gPiA+ID4g
PiA+ID4gPiB2b2lkCQkJeHBydF9mb3JjZV9kaXNjb25uZWN0KHN0cnVjDQo+ID4gPiA+ID4gPiA+
ID4gdA0KPiA+ID4gPiA+ID4gPiA+IHJwY194cHJ0DQo+ID4gPiA+ID4gPiA+ID4gKnhwcnQpOw0K
PiA+ID4gPiA+ID4gPiA+IGRpZmYgLS1naXQgYS9uZXQvc3VucnBjL3hwcnQuYyBiL25ldC9zdW5y
cGMveHBydC5jDQo+ID4gPiA+ID4gPiA+ID4gaW5kZXggNzg4YzFiNjEzOGMyLi42MmMzNzk4NjVm
N2MgMTAwNjQ0DQo+ID4gPiA+ID4gPiA+ID4gLS0tIGEvbmV0L3N1bnJwYy94cHJ0LmMNCj4gPiA+
ID4gPiA+ID4gPiArKysgYi9uZXQvc3VucnBjL3hwcnQuYw0KPiA+ID4gPiA+ID4gPiA+IEBAIC04
NDQsNiArODQ0LDQ3IEBAIHN0cnVjdCBycGNfcnFzdA0KPiA+ID4gPiA+ID4gPiA+ICp4cHJ0X2xv
b2t1cF9ycXN0KHN0cnVjdA0KPiA+ID4gPiA+ID4gPiA+IHJwY194cHJ0ICp4cHJ0LCBfX2JlMzIg
eGlkKQ0KPiA+ID4gPiA+ID4gPiA+IH0NCj4gPiA+ID4gPiA+ID4gPiBFWFBPUlRfU1lNQk9MX0dQ
TCh4cHJ0X2xvb2t1cF9ycXN0KTsNCj4gPiA+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+ID4gPiAr
LyoqDQo+ID4gPiA+ID4gPiA+ID4gKyAqIHhwcnRfcGluX3Jxc3QgLSBQaW4gYSByZXF1ZXN0IG9u
IHRoZSB0cmFuc3BvcnQNCj4gPiA+ID4gPiA+ID4gPiByZWNlaXZlDQo+ID4gPiA+ID4gPiA+ID4g
bGlzdA0KPiA+ID4gPiA+ID4gPiA+ICsgKiBAcmVxOiBSZXF1ZXN0IHRvIHBpbg0KPiA+ID4gPiA+
ID4gPiA+ICsgKg0KPiA+ID4gPiA+ID4gPiA+ICsgKiBDYWxsZXIgbXVzdCBlbnN1cmUgdGhpcyBp
cyBhdG9taWMgd2l0aCB0aGUgY2FsbCB0bw0KPiA+ID4gPiA+ID4gPiA+IHhwcnRfbG9va3VwX3Jx
c3QoKQ0KPiA+ID4gPiA+ID4gPiA+ICsgKiBzbyBzaG91bGQgYmUgaG9sZGluZyB0aGUgeHBydCB0
cmFuc3BvcnQgbG9jay4NCj4gPiA+ID4gPiA+ID4gPiArICovDQo+ID4gPiA+ID4gPiA+ID4gK3Zv
aWQgeHBydF9waW5fcnFzdChzdHJ1Y3QgcnBjX3Jxc3QgKnJlcSkNCj4gPiA+ID4gPiA+ID4gPiAr
ew0KPiA+ID4gPiA+ID4gPiA+ICsJc2V0X2JpdChSUENfVEFTS19NU0dfUkVDViwgJnJlcS0+cnFf
dGFzay0NCj4gPiA+ID4gPiA+ID4gPiA+IHRrX3J1bnN0YXRlKTsNCj4gPiA+ID4gPiA+ID4gPiAN
Cj4gPiA+ID4gPiA+ID4gPiArfQ0KPiA+ID4gPiA+ID4gPiA+ICsNCj4gPiA+ID4gPiA+ID4gPiAr
LyoqDQo+ID4gPiA+ID4gPiA+ID4gKyAqIHhwcnRfdW5waW5fcnFzdCAtIFVucGluIGEgcmVxdWVz
dCBvbiB0aGUgdHJhbnNwb3J0DQo+ID4gPiA+ID4gPiA+ID4gcmVjZWl2ZQ0KPiA+ID4gPiA+ID4g
PiA+IGxpc3QNCj4gPiA+ID4gPiA+ID4gPiArICogQHJlcTogUmVxdWVzdCB0byBwaW4NCj4gPiA+
ID4gPiA+ID4gPiArICoNCj4gPiA+ID4gPiA+ID4gPiArICogQ2FsbGVyIHNob3VsZCBiZSBob2xk
aW5nIHRoZSB4cHJ0IHRyYW5zcG9ydCBsb2NrLg0KPiA+ID4gPiA+ID4gPiA+ICsgKi8NCj4gPiA+
ID4gPiA+ID4gPiArdm9pZCB4cHJ0X3VucGluX3Jxc3Qoc3RydWN0IHJwY19ycXN0ICpyZXEpDQo+
ID4gPiA+ID4gPiA+ID4gK3sNCj4gPiA+ID4gPiA+ID4gPiArCXN0cnVjdCBycGNfdGFzayAqdGFz
ayA9IHJlcS0+cnFfdGFzazsNCj4gPiA+ID4gPiA+ID4gPiArDQo+ID4gPiA+ID4gPiA+ID4gKwlj
bGVhcl9iaXQoUlBDX1RBU0tfTVNHX1JFQ1YsICZ0YXNrLQ0KPiA+ID4gPiA+ID4gPiA+ID50a19y
dW5zdGF0ZSk7DQo+ID4gPiA+ID4gPiA+ID4gKwlpZiAodGVzdF9iaXQoUlBDX1RBU0tfTVNHX1JF
Q1ZfV0FJVCwgJnRhc2stDQo+ID4gPiA+ID4gPiA+ID4gPiB0a19ydW5zdGF0ZSkpDQo+ID4gPiA+
ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiA+ID4gKwkJd2FrZV91cF9iaXQoJnRhc2stPnRrX3J1bnN0
YXRlLA0KPiA+ID4gPiA+ID4gPiA+IFJQQ19UQVNLX01TR19SRUNWKTsNCj4gPiA+ID4gPiA+ID4g
PiArfQ0KPiA+ID4gPiA+ID4gPiA+ICsNCj4gPiA+ID4gPiA+ID4gPiArc3RhdGljIHZvaWQgeHBy
dF93YWl0X29uX3Bpbm5lZF9ycXN0KHN0cnVjdCBycGNfcnFzdA0KPiA+ID4gPiA+ID4gPiA+ICpy
ZXEpDQo+ID4gPiA+ID4gPiA+ID4gK19fbXVzdF9ob2xkKCZyZXEtPnJxX3hwcnQtPnRyYW5zcG9y
dF9sb2NrKQ0KPiA+ID4gPiA+ID4gPiA+ICt7DQo+ID4gPiA+ID4gPiA+ID4gKwlzdHJ1Y3QgcnBj
X3Rhc2sgKnRhc2sgPSByZXEtPnJxX3Rhc2s7DQo+ID4gPiA+ID4gPiA+ID4gKwlpZiAodGVzdF9i
aXQoUlBDX1RBU0tfTVNHX1JFQ1YsICZ0YXNrLQ0KPiA+ID4gPiA+ID4gPiA+ID4gdGtfcnVuc3Rh
dGUpKSB7DQo+ID4gPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiA+ID4gKwkJc3Bpbl91bmxvY2tf
YmgoJnJlcS0+cnFfeHBydC0NCj4gPiA+ID4gPiA+ID4gPiA+IHRyYW5zcG9ydF9sb2NrKTsNCj4g
PiA+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+ID4gPiArCQlzZXRfYml0KFJQQ19UQVNLX01TR19S
RUNWX1dBSVQsICZ0YXNrLQ0KPiA+ID4gPiA+ID4gPiA+ID4gdGtfcnVuc3RhdGUpOw0KPiA+ID4g
PiA+ID4gPiA+IA0KPiA+ID4gPiA+ID4gPiA+ICsJCXdhaXRfb25fYml0KCZ0YXNrLT50a19ydW5z
dGF0ZSwNCj4gPiA+ID4gPiA+ID4gPiBSUENfVEFTS19NU0dfUkVDViwNCj4gPiA+ID4gPiA+ID4g
PiArCQkJCVRBU0tfVU5JTlRFUlJVUFRJQkxFKTsNCj4gPiA+ID4gPiA+ID4gPiArCQljbGVhcl9i
aXQoUlBDX1RBU0tfTVNHX1JFQ1ZfV0FJVCwNCj4gPiA+ID4gPiA+ID4gPiAmdGFzay0NCj4gPiA+
ID4gPiA+ID4gPiA+IHRrX3J1bnN0YXRlKTsNCj4gPiA+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+
ID4gPiArCQlzcGluX2xvY2tfYmgoJnJlcS0+cnFfeHBydC0NCj4gPiA+ID4gPiA+ID4gPiA+IHRy
YW5zcG9ydF9sb2NrKTsNCj4gPiA+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+ID4gPiArCX0NCj4g
PiA+ID4gPiA+ID4gPiArfQ0KPiANCj4gTG9va2luZyBhdCB4cHJ0X3dhaXRfZm9yX3Bpbm5lZF9y
cXN0LCBJJ20gd29uZGVyaW5nDQo+IHRob3VnaCB3aGF0IGhhcHBlbnMgaWYgYSBzb2Z0IHRpbWVv
dXQgb2NjdXJzIGJ1dCB0aGUNCj4gUlBDIFJlcGx5IG5ldmVyIGFycml2ZXMgYmVjYXVzZSB0aGUg
c2VydmVyIGRpZWQuIFdpbGwNCj4gaXQgd2FpdCBmb3JldmVyIGluIHRoYXQgY2FzZT8NCj4gDQo+
IEkgY2FuJ3QgdGVsbCBpZiB3YWl0LWZvci1waW5uZWQgYWx3YXlzIHdhaXRzIGZvciB0aGUNCj4g
UmVwbHksIG9yIGlmIGl0IGNhbiBhbGxvdyB0aGUgUlBDIHRvIGV4aXQgY2xlYW5seSBpZg0KPiB0
aGUgUmVwbHkgaGFzbid0IGFycml2ZWQuDQoNCk9oLi4uIEkgdGhpbmsgSSBzZWUgd2hhdCB5b3Vy
IGNvbmNlcm4gaXMuDQoNClNvIG15IGFzc3VtcHRpb24gaXMgdGhhdCB0aGUgYml0IHdpbGwgYWx3
YXlzIGJlIHNldCBhbmQgcmVsZWFzZWQgaW4NCnhzX3RjcF9yZWFkX3JlcGx5KCkgKGFuZCBlcXVp
dmFsZW50KS4gSXQgc2hvdWxkIG5ldmVyIGJlIHNldCB3aGlsZQ0Kd2FpdGluZyBmb3IgZGF0YS4g
SXQgaXMgb25seSBzZXQgd2hlbiBkb2luZyBfbm9uLWJsb2NraW5nXyBjb3B5aW5nIG9mDQpkYXRh
IGZyb20gdGhlIHNvY2tldCB0byB0aGUgYnVmZmVycy4NCg0KSU9XOiB3YWl0X2Zvcl9waW5uZWQg
d2lsbCB3YWl0IG9ubHkgaWYgbmV3IHJlcGx5IGRhdGEgYXJyaXZlcyBvbiB0aGUNCnNvY2tldCBp
biB0aGUgaW5zdGFuY2Ugd2hlbiB0aGUgc3BlY2lmaWMgcmVxdWVzdCB0byB3aGljaCBpdCBpcyBi
ZWluZw0KZGVsaXZlcmVkIGlzIGJlaW5nIHRvcm4gZG93bi4gSWYgdGhlcmUgaXMgbm8gZGF0YSBm
b3IgdGhlIHJlcXVlc3QgaW4NCnRoZSBzb2NrZXQsIHRoZW4gdGhlcmUgaXMgbm8gd2FpdC4gSXQg
ZG9lc24ndCBtYXR0ZXIgaWYgdGhlcmUgaGFzIGJlZW4NCmEgcGFydGlhbCBkZWxpdmVyeSBvZiBk
YXRhIHByZXZpb3VzbHkgKG9yIG5vIGRlbGl2ZXJ5KTsgdW5sZXNzDQp4c190Y3BfcmVhZF9yZXBs
eSgpIGlzIGFjdHVhbGx5IHJ1bm5pbmcgYW5kIHBpbm5pbmcgdGhlIHJlcXVlc3QsIHRoZXJlDQpp
cyBubyB3YWl0Lg0KDQoNCj4gPiA+ID4gPiA+ID4gPiArDQo+ID4gPiA+ID4gPiA+ID4gc3RhdGlj
IHZvaWQgeHBydF91cGRhdGVfcnR0KHN0cnVjdCBycGNfdGFzayAqdGFzaykNCj4gPiA+ID4gPiA+
ID4gPiB7DQo+ID4gPiA+ID4gPiA+ID4gCXN0cnVjdCBycGNfcnFzdCAqcmVxID0gdGFzay0+dGtf
cnFzdHA7DQo+ID4gPiA+ID4gPiA+ID4gQEAgLTEzMDEsNiArMTM0Miw3IEBAIHZvaWQgeHBydF9y
ZWxlYXNlKHN0cnVjdCBycGNfdGFzaw0KPiA+ID4gPiA+ID4gPiA+ICp0YXNrKQ0KPiA+ID4gPiA+
ID4gPiA+IAkJbGlzdF9kZWwoJnJlcS0+cnFfbGlzdCk7DQo+ID4gPiA+ID4gPiA+ID4gCXhwcnQt
Pmxhc3RfdXNlZCA9IGppZmZpZXM7DQo+ID4gPiA+ID4gPiA+ID4gCXhwcnRfc2NoZWR1bGVfYXV0
b2Rpc2Nvbm5lY3QoeHBydCk7DQo+ID4gPiA+ID4gPiA+ID4gKwl4cHJ0X3dhaXRfb25fcGlubmVk
X3Jxc3QocmVxKTsNCj4gPiA+ID4gPiA+ID4gPiAJc3Bpbl91bmxvY2tfYmgoJnhwcnQtPnRyYW5z
cG9ydF9sb2NrKTsNCj4gPiA+ID4gPiA+ID4gDQo+ID4gPiA+ID4gPiA+IElzIGl0IE9LIHRvIGNh
bGwgd2FpdF9vbl9iaXQoVEFTS19VTklOVEVSUlVQVElCTEUpIHdoaWxlDQo+ID4gPiA+ID4gPiA+
IGhvbGRpbmcNCj4gPiA+ID4gPiA+ID4gYSBCSCBzcGluIGxvY2s/IFRoaXMgY291bGQgYmUgcHJv
bmUgdG8gZGVhZGxvY2suDQo+ID4gPiA+ID4gPiANCj4gPiA+ID4gPiA+IFdlIGRyb3AgdGhlIGxv
Y2sgaW5zaWRlIHhwcnRfd2FpdF9vbl9waW5uZWRfcnFzdCgpIGlmIHdlDQo+ID4gPiA+ID4gPiBu
ZWVkDQo+ID4gPiA+ID4gPiB0bw0KPiA+ID4gPiA+ID4gd2FpdC4NCj4gPiA+ID4gPiA+IA0KPiA+
ID4gPiA+ID4gVGhlIHJlYXNvbiB3aHkgd2Ugd2FudCB0byBob2xkIHRoZSBsb2NrIHRoZXJlIGlz
IHRvIGF2b2lkIGENCj4gPiA+ID4gPiA+IHVzZS0NCj4gPiA+ID4gPiA+IGFmdGVyLQ0KPiA+ID4g
PiA+ID4gZnJlZSBpbiB4cHJ0X3VucGluX3Jxc3QoKS4gV2l0aG91dCB0aGUgbG9jaywgdGhlcmUg
aXMgYQ0KPiA+ID4gPiA+ID4gcmlzaw0KPiA+ID4gPiA+ID4gdGhhdA0KPiA+ID4gPiA+ID4geHBy
dF9yZWxlYXNlKCkgY291bGQgY29tcGxldGUsIGFuZCB0aGUgdGFzayBnZXQgZnJlZWQNCj4gPiA+
ID4gPiA+IGJlZm9yZSB3ZQ0KPiA+ID4gPiA+ID4gaGF2ZQ0KPiA+ID4gPiA+ID4gY29tcGxldGVk
IHdha2VfdXBfYml0KCkuDQo+ID4gPiA+ID4gDQo+ID4gPiA+ID4gQWdyZWUgd2l0aCB0aGUgbGFz
dCBiaXQuIEkgaGFkIHRoYXQgY2hhbGxlbmdlIHdpdGggdGhlIG9yZGVyDQo+ID4gPiA+ID4gb2YN
Cj4gPiA+ID4gPiBtZW1vcnkgaW52YWxpZGF0aW9uICh3aGljaCB3YWl0cykgYW5kDQo+ID4gPiA+
ID4geHBydF9sb29rdXBfcnFzdC9jb21wbGV0ZV9ycXN0DQo+ID4gPiA+ID4gaW4gcnBjcmRtYV9y
ZXBseV9oYW5kbGVyLg0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IEhvd2V2ZXIgbXkgY29uY2VybiBp
cyB0aGF0IHRoZXJlIGFyZSBtYW55IG90aGVyIHVzZXJzIG9mIHRoZQ0KPiA+ID4gPiA+IHRyYW5z
cG9ydF9sb2NrLiBEb2VzIGl0IGJlY29tZSBtb3JlIHRyaWNreSBub3QgdG8gaW50cm9kdWNlIGEN
Cj4gPiA+ID4gPiBwcm9ibGVtIGJ5IGNoYW5naW5nIGFueSBvbmUgb2YgdGhlIHNpdGVzIHRoYXQg
dGFrZQ0KPiA+ID4gPiA+IHRyYW5zcG9ydF9sb2NrPw0KPiA+ID4gPiANCj4gPiA+ID4gV2hhdCBp
cyB5b3VyIGNvbmNlcm4/IFRoZSBtYWluIHJlYXNvbiBmb3IgdGFraW5nIHRoZSB0cmFuc3BvcnQN
Cj4gPiA+ID4gbG9jaw0KPiA+ID4gPiBpbg0KPiA+ID4gPiB0aGUgY3VycmVudCBjb2RlIGlzIHR3
b2ZvbGQ6DQo+ID4gPiA+IA0KPiA+ID4gPiAxKSBQZXJmb3JtIHRoZSBsb29rdXAgb2YgdGhlIHJl
cXVlc3Qgb24gdGhlIHJlY2VpdmUgbGlzdA0KPiA+ID4gPiAyKSBFbnN1cmUgdGhlIHJlcXVlc3Qg
ZG9lc24ndCBkaXNhcHBlYXIgd2hpbGUgd2UncmUgd3JpdGluZyB0bw0KPiA+ID4gPiBpdC4NCj4g
PiA+IA0KPiA+ID4gWWVzLCBJIHRoaW5rIHdlIHNlZSB0aGUgc2FtZSBwcm9ibGVtIHNwYWNlLg0K
PiA+ID4gDQo+ID4gPiBJIG1pc3NlZCwgdGhvdWdoLCB0aGF0IHhwcnRfd2FpdF9vbl9waW5uZWRf
cnFzdCBhbHdheXMgcmVsZWFzZXMNCj4gPiA+IHRyYW5zcG9ydF9sb2NrIGJlZm9yZSBpbnZva2lu
ZyB3YWl0X29uX2JpdC4NCj4gPiA+IA0KPiA+ID4gDQo+ID4gPiA+ID4gRGVzcGl0ZSB0aGUgZGVh
ZGxvY2sgY29uY2VybiwgSSBkb24ndCB0aGluayBpdCBtYWtlcyBzZW5zZSB0bw0KPiA+ID4gPiA+
IGNhbGwNCj4gPiA+ID4gPiB3YWl0X29uX2JpdCB3aGlsZSBob2xkaW5nIGEgc3BpbiBsb2NrIHNp
bXBseSBiZWNhdXNlIHNwaW4NCj4gPiA+ID4gPiBsb2Nrcw0KPiA+ID4gPiA+IGFyZQ0KPiA+ID4g
PiA+IHN1cHBvc2VkIHRvIGJlIGZvciB0aGluZ3MgdGhhdCBhcmUgcXVpY2ssIGxpa2UgdXBkYXRp
bmcgYQ0KPiA+ID4gPiA+IGRhdGENCj4gPiA+ID4gPiBzdHJ1Y3R1cmUuDQo+ID4gPiA+ID4gDQo+
ID4gPiA+ID4gd2FpdF9vbl9iaXQgY2FuIHRha2UgbWljcm9zZWNvbmRzOiBwcmVwYXJlX3RvX3dh
aXQgYW5kDQo+ID4gPiA+ID4gZmluaXNoX3dhaXQNCj4gPiA+ID4gPiBjYW4gYm90aCB0YWtlIGEg
aXJxc2F2ZSBzcGluX2xvY2ssIGZvciBleGFtcGxlLiBiaXRfd2FpdCBpcw0KPiA+ID4gPiA+IHRo
ZQ0KPiA+ID4gPiA+IGFjdGlvbiB0aGF0IGlzIGRvbmUgaWYgdGhlIGJpdCBpcyBub3QgcmVhZHks
IGFuZCB0aGF0IGNhbGxzDQo+ID4gPiA+ID4gc2NoZWR1bGUoKS4gT24gYSBidXN5IHN5c3RlbSwg
dGhhdCBraW5kIG9mIGNvbnRleHQgc3dpdGNoIGNhbg0KPiA+ID4gPiA+IHRha2UNCj4gPiA+ID4g
PiBkb3plbnMgb2YgbWljcm9zZWNvbmRzLg0KPiA+ID4gPiANCj4gPiA+ID4gQWdyZWVkLCBidXQg
d2Ugc2hvdWxkIG9ubHkgdmVyeSByYXJlbHkgYmUgaGl0dGluZyB0aGUgd2FpdA0KPiA+ID4gPiBj
YXNlLiBJbg0KPiA+ID4gPiBhbGwNCj4gPiA+ID4gc3VjY2Vzc2Z1bCBSUEMgY2FsbHMsIHRoZSBy
ZWNlaXZlIHdpbGwgaGF2ZSBoYXBwZW5lZCBsb25nDQo+ID4gPiA+IGJlZm9yZSB3ZQ0KPiA+ID4g
PiBjYWxsIHhwcnRfcmVsZWFzZSgpLiBJbiBmYWN0LCB0aGUgb25seSAyIGNhc2VzIHdoZXJlIHRo
aXMgY2FuDQo+ID4gPiA+IGhhcHBlbg0KPiA+ID4gPiB3b3VsZCBiZSB3aGVuIHRoZSBSUEMgY2Fs
bCBpdHNlbGYgaXMgaW50ZXJydXB0ZWQuIEVpdGhlcjoNCj4gPiA+ID4gDQo+ID4gPiA+IDEpIFRo
ZSB0YXNrIGlzIHN5bmNocm9ub3VzIGFuZCB3YXMgaW50ZXJydXB0ZWQgYnkgdGhlIHVzZXIuDQo+
ID4gPiA+IDIpIFRoZSB0YXNrIGlzIHRlcm1pbmF0aW5nIGVhcmx5IGR1ZSB0byBhIHNvZnQgdGlt
ZW91dC4NCj4gPiA+ID4gDQo+ID4gPiA+IEluIGJvdGggdGhvc2UgY2FzZXMsIHdlJ3JlIGluIGEg
bm9uLXBlcmZvcm1hbmNlIHBhdGguDQo+ID4gPiA+IEZ1cnRoZXJtb3JlLA0KPiA+ID4gPiB0aGV5
DQo+ID4gPiA+IHdvdWxkIGN1cnJlbnRseSBlbmQgdXAgc3Bpbm5pbmcgYWdhaW5zdCB0aGUgdHJh
bnNwb3J0IGxvY2suDQo+ID4gPiANCj4gPiA+IE9LLCBubyBhcmd1bWVudCB0aGF0IHRoZXJlIGlz
IGFueSBwZXJmb3JtYW5jZSBjb25jZXJuLg0KPiA+ID4gDQo+ID4gPiBJIGNhbiBoaXQgdGhpcyBj
YXNlIDIgb3IgMyB0aW1lcyBvdXQgb2YgNSB3aXRoIGdlbmVyaWMvMDI5IG9uDQo+ID4gPiBORlN2
NC9SRE1BLiBUaGVzZSBkYXlzLCB0aGUgd29yc2UgdGhhdCBoYXBwZW5zIGlzIHRoZSBjbGllbnQN
Cj4gPiA+IGRyb3BzDQo+ID4gPiB0aGUgUkRNQSBjb25uZWN0aW9uIGJlY2F1c2UgSSd2ZSBzcGVu
dCBhIHZlcnkgbG9uZyB0aW1lIGltbWVyc2VkDQo+ID4gPiBpbiB0aGVzZSBjb2RlIHBhdGhzLCB0
cnlpbmcgdG8gbWFrZSB0aGUgdHdvIGNhc2VzIHlvdSBsaXN0IGFib3ZlDQo+ID4gPiB3b3JrIDEw
MCUgd2l0aG91dCBkZWFkbG9jayBvciBjcmFzaC4gU2VlIGNvbW1pdHMgMDRkMjViN2Q1ZDFiDQo+
ID4gPiB0aHJvdWdoIDQzMWFmNjQ1Y2Y2NiwgYW5kIGNvbW1pdCA2ODc5MTY0OWE3MjUuDQo+ID4g
PiANCj4gPiA+IEl0IG1ha2VzIG1lIHVuY2VydGFpbiB0aGF0IHdhaXRpbmcgZm9yIGFueXRoaW5n
IGluIHhwcnRfcmVsZWFzZQ0KPiA+ID4gaXMgc2FmZSwgZXZlbiBpZiB0aGUgdHJhbnNwb3J0X2xv
Y2sgaXMgbm90IGJlaW5nIGhlbGQuIHhwcnRyZG1hDQo+ID4gPiB1c2VkIHRvIHBlcmZvcm0gc3lu
Y2hyb25vdXMgbWVtb3J5IGludmFsaWRhdGlvbiBpbiAtPmJ1Zl9mcmVlLg0KPiA+ID4gSXQgZG9l
c24ndCBhbnkgbW9yZSBmb3IgdGhpcyByZWFzb24uDQo+ID4gDQo+ID4gDQo+ID4gRnVsbCBkaXNj
bG9zdXJlOiBJIGRpZG4ndCBhY3R1YWxseSBlbmFibGUgdGhpcyBjb2RlIGZvciBSRE1BLg0KPiAN
Cj4gTm90ZWQgdGhhdDsgSSBhc3N1bWVkIEkgd291bGQgYmUgcmVzcG9uc2libGUgZm9yIGl0DQo+
IGlmIGl0IGNvdWxkIGJlIGFwcGxpZWQgdG8geHBydHJkbWEuDQo+IA0KPiANCj4gPiBUaGUgcmVh
c29uIGlzIGZpcnN0bHkgdGhhdCBJIHdvbid0IHByZXRlbmQgdG8gdW5kZXJzdGFuZCB0aGUNCj4g
PiBsb2NraW5nIGluDQo+ID4gcnBjcmRtYV9yZXBseV9oYW5kbGVyKCksIGJ1dCBmdXJ0aGVybW9y
ZSB0aGF0IFJETUEgZG9lc24ndCByZWFsbHkNCj4gPiBoYXZlDQo+ID4gYSBwcm9ibGVtIHdpdGgg
YnVsayBjb3B5IGluIHRoZSBzYW1lIHdheSB0aGF0IHRoZSBzb2NrZXQgYmFzZWQgY29kZQ0KPiA+
IGRvZXM6IHRoZSBkaXJlY3QgcGxhY2VtZW50IGludG8gdGhlIHJlcGx5IGJ1ZmZlciBtZWFucyB0
aGUgYnVsayBvZg0KPiA+IHRoZQ0KPiA+IGNvZGUgaW4gcnBjcmRtYV9yZXBseV9oYW5kbGVyKCkg
aXMgYWxsIGFib3V0IGNoZWNraW5nIHRoZSBhbGlnbm1lbnQNCj4gPiAod2l0aCwgYWRtaXR0ZWRs
eSBzb21lIHNjYXJ5IGJpdHMgaW4gcnBjcmRtYV9pbmxpbmVfZml4dXAoKSkuDQo+IA0KPiBycGNy
ZG1hX3JlcGx5X2hhbmRsZXIgYWN0dWFsbHkgY2FuIHVzZSB4cHJ0X3Bpbl9ycXN0Lg0KPiANCj4g
SXQncyBub3QgZG9pbmcgYSBkYXRhIGNvcHkgKGV4Y2VwdCBmb3IgdGhlIHB1bGwtdXAgaW4NCj4g
cnBjcmRtYV9pbmxpbmVfZml4dXApLCBidXQgaXQgaXMgcGVyZm9ybWluZyBhIHBvc3NpYmx5DQo+
IGxvbmctcnVubmluZyBzeW5jaHJvbm91cyBtZW1vcnkgaW52YWxpZGF0aW9uLiBEdXJpbmcNCj4g
dGhhdCBpbnZhbGlkYXRpb24sIHRoZSB0cmFuc3BvcnRfbG9jayBjYW5ub3QgYmUgaGVsZC4NCj4g
DQo+IFdpdGggeHBydF9waW5fcnFzdCwgcnBjcmRtYV9yZXBseV9oYW5kbGVyIHdvdWxkDQo+IHBy
b2JhYmx5IHdvcmsgbGlrZSB0aGlzOg0KPiANCj4gDQo+IHNwaW5fbG9jayh0cmFuc3BvcnRfbG9j
aykNCj4geHBydF9sb29rdXBfcnFzdA0KPiB4cHJ0X3Bpbl9ycXN0DQo+IHNwaW5fdW5sb2NrKHRy
YW5zcG9ydF9sb2NrKQ0KPiANCj4gaW52YWxpZGF0ZSBtZW1vcnkgYXNzb2NpYXRlZCB3aXRoIHRo
aXMgcnFzdCwgYW5kIHdhaXQNCj4gZm9yIGl0IHRvIGNvbXBsZXRlDQo+IA0KPiByZWNvbnN0cnVj
dCB0aGUgUmVwbHkgaW4gcnFzdC0+cnFfcmN2X2J1ZiwgaW5jbHVkaW5nDQo+IG1heWJlIGEgc3Vi
c3RhbnRpYWwgcHVsbC11cCBpZiB0aGUgUmVwbHkgaXMgbGFyZ2UgYW5kDQo+IGlubGluZQ0KPiAN
Cj4gc3Bpbl9sb2NrKHRyYW5zcG9ydF9sb2NrKQ0KPiB4cHJ0X2NvbXBsZXRlX3Jxc3QNCj4geHBy
dF91bnBpbl9ycXN0DQo+IHNwaW5fdW5sb2NrKHRyYW5zcG9ydF9sb2NrKQ0KPiANCj4gDQo+IFRo
aXMgd291bGQgaGF2ZSB0byBjby1vcmRpbmF0ZSB3aXRoIHhwcnRfcmRtYV9mcmVlLiBJZg0KPiB0
aGVyZSBhcmUgc3RpbGwgbWVtb3J5IHJlZ2lvbnMgcmVnaXN0ZXJlZCB3aGVuDQo+IHhwcnRfcmRt
YV9mcmVlIGlzIGNhbGxlZCwgdGhhdCBtZWFucyB0aGUgUlBDIHdhcw0KPiB0ZXJtaW5hdGVkIGJl
Zm9yZSB0aGUgUmVwbHkgYXJyaXZlZC4gVGhleSBzdGlsbCBuZWVkIHRvDQo+IGJlIGludmFsaWRh
dGVkLCBidXQgdGhlc2UgZnVuY3Rpb25zIGhhdmUgdG8gZW5zdXJlIHRoYXQNCj4gaW52YWxpZGF0
aW9uIGRvZXMgbm90IG9jY3VyIHR3aWNlLg0KPiANCj4gDQo+ID4gPiA+ID4gPiA+ID4gSU1PIGl0
IHdvdWxkIGJlIGEgbG90IG5pY2VyIGlmIHdlIGhhZCBhbiBGU00gc3RhdGUNCj4gPiA+ID4gPiA+
ID4gPiBhdmFpbGFibGUgdGhhdA0KPiA+ID4gPiA+IA0KPiA+ID4gPiA+IGNvdWxkIGFsbG93IGFu
b3RoZXIgc2xlZXAgd2hpbGUgYW4gUlBDIHRhc2sgZHVyaW5nDQo+ID4gPiA+ID4geHBydF9yZWxl
YXNlLg0KPiA+ID4gPiANCj4gPiA+ID4gV2h5IHdvdWxkIHRoYXQgYmUgcHJlZmVyYWJsZSwgaWYg
dGhlIGFyZ3VtZW50IGhvbGRzIHRoYXQgdGhpcw0KPiA+ID4gPiBvbmx5DQo+ID4gPiA+IGhhcHBl
bnMgaW4gdGhlc2UgMiByYXJlIGVycm9yIGNhc2VzPw0KPiA+ID4gDQo+ID4gPiBCZWNhdXNlIEZT
TSBzdGVwcyBhcmUgdGhlIHdheSB0aGlzIGNvZGUgbWFuYWdlcyBzbGVlcHMuIFRoYXQNCj4gPiA+
IG1ha2VzIGl0IGVhc2llciB0byB1bmRlcnN0YW5kIGFuZCBsZXNzIGJyaXR0bGUuIEkgZG9uJ3Qg
dGhpbmsNCj4gPiA+IGl0IHdvdWxkIGJlIG1vcmUgb3ZlcmhlYWQgdGhhbiB0ZXN0X2JpdCgpLg0K
PiA+IA0KPiA+IFRoZSBwcm9ibGVtIGlzIHRoYXQgaXQgd291bGQgYWRkIG5ldyBzdGF0ZXMgdG8g
YSBudW1iZXIgb2YNCj4gPiBmdW5jdGlvbnMNCj4gPiB0aGF0IGN1cnJlbnRseSBjYWxsIHhwcnRf
cmVsZWFzZSgpLiBSaWdodCBub3csIHRoYXQgd291bGQgbWVhbg0KPiA+IA0KPiA+IGNhbGxfcmVz
ZXJ2ZXJlc3VsdCgpDQo+ID4gcnBjX3ZlcmlmeV9oZWFkZXIoKQ0KPiA+IHJwY19leGl0X3Rhc2so
KQ0KPiANCj4gUmlnaHQuIFRoYXQncyB3aHkgSSB3YXNuJ3QgYm9sZCBlbm91Z2ggdG8gYWN0dWFs
bHkgdHJ5IGFkZGluZw0KPiB0aGUgRlNNIG15c2VsZi4NCj4gDQo+IFlvdXIgc29sdXRpb24gaXMg
aGVscGVkIHNvbWV3aGF0IGJ5IGNoZWNraW5nIHRvIHNlZSBpZiBhIENhbGwNCj4gd2FzIGFjdHVh
bGx5IHNlbnQgYW5kIGEgUmVwbHkgaXMgdGhlcmVmb3JlIGV4cGVjdGVkLiBJbiBzb21lDQo+IG9m
IHRoZXNlIGNhc2VzLCB0aGF0IG1lYW5zIHdhaXRpbmcgY2FuIGJlIGF2b2lkZWQgZW50aXJlbHkN
Cj4gd2hlbiB0aGUgUlBDIGV4aXRzIGJlZm9yZSBjYWxsX3RyYW5zbWl0Lg0KPiANCj4gDQo+ID4g
SW4gYWRkaXRpb24sIHdlJ2QgbmVlZCBhIHNlcGFyYXRlIHNvbHV0aW9uIGZvciBycGNfcHV0X3Rh
c2soKSwgYW5kDQo+ID4gcHJvYmFibHkgYWxzbyBmb3IgcnBjX3JlbGVhc2VfdGFzaygpIChzaW5j
ZSB0aGVzZSBhcmUgYm90aCBjYWxsZWQNCj4gPiBmcm9tDQo+ID4gb3V0c2lkZSB0aGUgbWFpbiBz
dGF0ZSBtYWNoaW5lIGxvb3ApLg0KPiA+IA0KPiA+IEknbSBnZXR0aW5nIGEgbGl0dGxlIHdvcnJp
ZWQgYWJvdXQgeW91ciByZXBseSBhYm92ZTogdGhlIFJQQyBlbmdpbmUNCj4gPiBvZmZlcnMgbm8g
cmVhbC10aW1lIGd1YXJhbnRlZXMsIHNvIGlmIHRoZSBSRE1BIGNvZGUgaXMgcmVhbGx5IHRoaXMN
Cj4gPiBsYXRlbmN5LXNlbnNpdGl2ZSwgdGhlbiB3aGF0IHdpbGwgaGFwcGVuIGlmIHdlIGVuYWJs
ZSBwcmUtZW1wdGlvbg0KPiA+IGFuZC9vciB3ZSBqdXN0IGhhdmUgYSB2ZXJ5IGNvbmdlc3RlZCB3
b3JrcXVldWU/DQo+IA0KPiBUaGUgc2Vuc2l0aXZpdHkgaXMgb25seSBwZXJmb3JtYW5jZS1yZWxh
dGVkLiBPbiBDUFUtaW50ZW5zaXZlDQo+IHdvcmtsb2FkcywgUlBDIGV4ZWN1dGlvbiB0aW1lIGNh
biBpbmNyZWFzZSBjb25zaWRlcmFibHkuIFRoZQ0KPiBSRE1BIHRyYW5zcG9ydCBkb2Vzbid0IHJl
bHkgb24gdGltZWx5IGV4ZWN1dGlvbiBmb3IgcHJvcGVyDQo+IG9wZXJhdGlvbiwgYnV0IG1heWJl
IEkgZG9uJ3QgdW5kZXJzdGFuZCB5b3VyIHdvcnJ5Lg0KPiANCj4gDQo+ID4gQWxzbywgaG93IHdv
dWxkIGEgc3RhdGUgbWFjaGluZSBzb2x1dGlvbiBoZWxwPyBJJ2QgZXhwZWN0IHRoYXQgdG8NCj4g
PiBtYWtlDQo+ID4gdGhlIGxhdGVuY3kgd29yc2Ugbm90IGJldHRlciwgc2luY2UgdGhlIHdvcmsg
aXRlbSB3b3VsZCBlbmQgdXANCj4gPiBnZXR0aW5nDQo+ID4gcmVxdWV1ZWQuIElmIHRoZSB3b3Jr
ZXIgdGhyZWFkIHRoZW4gZ2V0cyByZXNjaGVkdWxlZCBhcyB3ZWxsLi4uDQo+ID4gDQo+ID4gT25l
IHNvbHV0aW9uIG1heSBiZSB0byBzaW1wbHkgc2F5IHdlJ3JlIG5ldmVyIGdvaW5nIHRvIHVzZSB0
aGlzDQo+ID4gbWVjaGFuaXNtIGZvciBSRE1BLiBJIHN1c3BlY3QgdGhhdCBpZiB3ZSBqdXN0IHJl
cGxhY2UgdGhlIHVzZSBvZg0KPiA+IHhwcnQtDQo+ID4gPiB0cmFuc3BvcnRfbG9jayB3aGVuIHBy
b3RlY3RpbmcgdGhlIHJlY2VpdmUgcXVldWUgd2l0aCBhIG5ldyBsb2NrDQo+ID4gPiB0aGF0DQo+
ID4gDQo+ID4gd291bGRuJ3QgbmVlZCB0byBiZSBiaC1zYWZlIHRoZW4gdGhhdCBtaWdodCBzdWZm
aWNlIHRvIGRlYWwgd2l0aA0KPiA+IHRoZQ0KPiA+IFJETUEgbGF0ZW5jeSBpc3N1ZXMuIEkgaGF2
ZSBsZXNzIGNvbmZpZGVuY2UgdGhhdCB3b3VsZCB3b3JrIGZvciB0aGUNCj4gPiBvdGhlcnMgZHVl
IHRvIHRoZSBidWxrIGRhdGEgY29weSBpc3N1ZS4NCj4gDQo+IEkgd291bGQgbGlrZSBhIHNlcGFy
YXRlIG5vbi1CSCBsb2NrLCBpZiB0aGF0IGNhbiBiZSBtYWRlIHRvDQo+IHdvcmsuIFRoYXQncyBp
biBmYWN0IGhvdyB4cHJ0cmRtYSBjdXJyZW50bHkgd29ya3MuIHhwcnRyZG1hDQo+IGhhcyBpdHMg
b3duIHJlY2VpdmUgbGlzdCBhdCB0aGUgbW9tZW50IHNvIHRoYXQgaXQgY2FuIGZpbmQNCj4gbWVt
b3J5IGFzc29jaWF0ZWQgd2l0aCBhIHJxc3QgX3dpdGhvdXRfIG5lZWRpbmcgdG8gY2FsbA0KPiB4
cHJ0X2xvb2t1cF9ycXN0LiBBIG5vbi1CSCBsb2NrIGlzIHVzZWQgdG8gcHJvdGVjdCB0aGF0DQo+
IGxpc3QuDQoNClRoYXQgY2FuIGRlZmluaXRlbHkgYmUgYWRkZWQuIEkgc2VlIHRoYXQgYXMgYSBz
ZXBhcmF0ZSBpc3N1ZS4NCg0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5GUyBjbGllbnQg
bWFpbnRhaW5lciwgUHJpbWFyeURhdGENCnRyb25kLm15a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20N
Cg==
> On Aug 15, 2017, at 11:00 AM, Trond Myklebust <[email protected]> wrote:
>
> On Tue, 2017-08-15 at 10:23 -0400, Chuck Lever wrote:
>>> On Aug 14, 2017, at 9:18 PM, Trond Myklebust <[email protected]
>>> om> wrote:
>>>
>>> On Mon, 2017-08-14 at 18:25 -0400, Chuck Lever wrote:
>>>>> On Aug 14, 2017, at 4:38 PM, Trond Myklebust <trondmy@primaryda
>>>>> ta.c
>>>>> om> wrote:
>>>>>
>>>>> On Mon, 2017-08-14 at 16:23 -0400, Chuck Lever wrote:
>>>>>>> On Aug 14, 2017, at 4:07 PM, Trond Myklebust <trondmy@prima
>>>>>>> ryda
>>>>>>> ta.c
>>>>>>> om> wrote:
>>>>>>>
>>>>>>> On Mon, 2017-08-14 at 15:28 -0400, Chuck Lever wrote:
>>>>>>>>> On Aug 14, 2017, at 3:16 PM, Trond Myklebust
>>>>>>>>> <trond.myklebu
>>>>>>>>> st@p
>>>>>>>>> rima
>>>>>>>>> rydata.com> wrote:
>>>>>>>>>
>>>>>>>>> Instead add a mechanism to ensure that the request
>>>>>>>>> doesn't
>>>>>>>>> disappear
>>>>>>>>> from underneath us while copying from the socket. We do
>>>>>>>>> this by
>>>>>>>>> preventing xprt_release() from freeing the XDR buffers
>>>>>>>>> until
>>>>>>>>> the
>>>>>>>>> flag RPC_TASK_MSG_RECV has been cleared from the
>>>>>>>>> request.
>>>>>>>>>
>>>>>>>>> Signed-off-by: Trond Myklebust <trond.myklebust@primary
>>>>>>>>> data
>>>>>>>>> .com
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> ---
>>>>>>>>> include/linux/sunrpc/sched.h | 2 ++
>>>>>>>>> include/linux/sunrpc/xprt.h | 2 ++
>>>>>>>>> net/sunrpc/xprt.c | 42
>>>>>>>>> ++++++++++++++++++++++++++++++++++++++++++
>>>>>>>>> net/sunrpc/xprtsock.c | 27
>>>>>>>>> ++++++++++++++++++++++---
>>>>>>>>> --
>>>>>>>>> 4 files changed, 68 insertions(+), 5 deletions(-)
>>>>>>>>>
>>>>>>>>> diff --git a/include/linux/sunrpc/sched.h
>>>>>>>>> b/include/linux/sunrpc/sched.h
>>>>>>>>> index 15bc1cd6ee5c..e972d9e05426 100644
>>>>>>>>> --- a/include/linux/sunrpc/sched.h
>>>>>>>>> +++ b/include/linux/sunrpc/sched.h
>>>>>>>>> @@ -141,6 +141,8 @@ struct rpc_task_setup {
>>>>>>>>> #define RPC_TASK_ACTIVE 2
>>>>>>>>> #define RPC_TASK_MSG_XMIT 3
>>>>>>>>> #define RPC_TASK_MSG_XMIT_WAIT 4
>>>>>>>>> +#define RPC_TASK_MSG_RECV 5
>>>>>>>>> +#define RPC_TASK_MSG_RECV_WAIT 6
>>>>>>>>>
>>>>>>>>> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNN
>>>>>>>>> ING,
>>>>>>>>> &(t)-
>>>>>>>>>> tk_runstate)
>>>>>>>>>
>>>>>>>>> #define rpc_set_running(t) set_bit(RPC_TASK_RUNN
>>>>>>>>> ING,
>>>>>>>>> &(t)-
>>>>>>>>>> tk_runstate)
>>>>>>>>>
>>>>>>>>> diff --git a/include/linux/sunrpc/xprt.h
>>>>>>>>> b/include/linux/sunrpc/xprt.h
>>>>>>>>> index eab1c749e192..65b9e0224753 100644
>>>>>>>>> --- a/include/linux/sunrpc/xprt.h
>>>>>>>>> +++ b/include/linux/sunrpc/xprt.h
>>>>>>>>> @@ -372,6 +372,8 @@ void xprt_wr
>>>>>>>>> ite_
>>>>>>>>> spac
>>>>>>>>> e(st
>>>>>>>>> ruct rpc_xprt *xprt);
>>>>>>>>> void xprt_adjust_cwnd(struct
>>>>>>>>> rpc_xprt
>>>>>>>>> *xprt,
>>>>>>>>> struct rpc_task *task, int result);
>>>>>>>>> struct rpc_rqst * xprt_lookup_rqst(struct
>>>>>>>>> rpc_xprt
>>>>>>>>> *xprt,
>>>>>>>>> __be32 xid);
>>>>>>>>> void xprt_complete_rqst(struct
>>>>>>>>> rpc_task
>>>>>>>>> *task, int copied);
>>>>>>>>> +void xprt_pin_rqst(struct
>>>>>>>>> rpc_rqst
>>>>>>>>> *req);
>>>>>>>>> +void xprt_unpin_rqst(struct
>>>>>>>>> rpc_rqst
>>>>>>>>> *req);
>>>>>>>>> void xprt_release_rqst_cong(stru
>>>>>>>>> ct
>>>>>>>>> rpc_task
>>>>>>>>> *task);
>>>>>>>>> void xprt_disconnect_done(struct
>>>>>>>>> rpc_xprt
>>>>>>>>> *xprt);
>>>>>>>>> void xprt_force_disconnect(struc
>>>>>>>>> t
>>>>>>>>> rpc_xprt
>>>>>>>>> *xprt);
>>>>>>>>> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
>>>>>>>>> index 788c1b6138c2..62c379865f7c 100644
>>>>>>>>> --- a/net/sunrpc/xprt.c
>>>>>>>>> +++ b/net/sunrpc/xprt.c
>>>>>>>>> @@ -844,6 +844,47 @@ struct rpc_rqst
>>>>>>>>> *xprt_lookup_rqst(struct
>>>>>>>>> rpc_xprt *xprt, __be32 xid)
>>>>>>>>> }
>>>>>>>>> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>>>>>>>>>
>>>>>>>>> +/**
>>>>>>>>> + * xprt_pin_rqst - Pin a request on the transport
>>>>>>>>> receive
>>>>>>>>> list
>>>>>>>>> + * @req: Request to pin
>>>>>>>>> + *
>>>>>>>>> + * Caller must ensure this is atomic with the call to
>>>>>>>>> xprt_lookup_rqst()
>>>>>>>>> + * so should be holding the xprt transport lock.
>>>>>>>>> + */
>>>>>>>>> +void xprt_pin_rqst(struct rpc_rqst *req)
>>>>>>>>> +{
>>>>>>>>> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task-
>>>>>>>>>> tk_runstate);
>>>>>>>>>
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +/**
>>>>>>>>> + * xprt_unpin_rqst - Unpin a request on the transport
>>>>>>>>> receive
>>>>>>>>> list
>>>>>>>>> + * @req: Request to pin
>>>>>>>>> + *
>>>>>>>>> + * Caller should be holding the xprt transport lock.
>>>>>>>>> + */
>>>>>>>>> +void xprt_unpin_rqst(struct rpc_rqst *req)
>>>>>>>>> +{
>>>>>>>>> + struct rpc_task *task = req->rq_task;
>>>>>>>>> +
>>>>>>>>> + clear_bit(RPC_TASK_MSG_RECV, &task-
>>>>>>>>>> tk_runstate);
>>>>>>>>> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>>>>>> tk_runstate))
>>>>>>>>>
>>>>>>>>> + wake_up_bit(&task->tk_runstate,
>>>>>>>>> RPC_TASK_MSG_RECV);
>>>>>>>>> +}
>>>>>>>>> +
>>>>>>>>> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst
>>>>>>>>> *req)
>>>>>>>>> +__must_hold(&req->rq_xprt->transport_lock)
>>>>>>>>> +{
>>>>>>>>> + struct rpc_task *task = req->rq_task;
>>>>>>>>> + if (test_bit(RPC_TASK_MSG_RECV, &task-
>>>>>>>>>> tk_runstate)) {
>>>>>>>>>
>>>>>>>>> + spin_unlock_bh(&req->rq_xprt-
>>>>>>>>>> transport_lock);
>>>>>>>>>
>>>>>>>>> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task-
>>>>>>>>>> tk_runstate);
>>>>>>>>>
>>>>>>>>> + wait_on_bit(&task->tk_runstate,
>>>>>>>>> RPC_TASK_MSG_RECV,
>>>>>>>>> + TASK_UNINTERRUPTIBLE);
>>>>>>>>> + clear_bit(RPC_TASK_MSG_RECV_WAIT,
>>>>>>>>> &task-
>>>>>>>>>> tk_runstate);
>>>>>>>>>
>>>>>>>>> + spin_lock_bh(&req->rq_xprt-
>>>>>>>>>> transport_lock);
>>>>>>>>>
>>>>>>>>> + }
>>>>>>>>> +}
>>
>> Looking at xprt_wait_for_pinned_rqst, I'm wondering
>> though what happens if a soft timeout occurs but the
>> RPC Reply never arrives because the server died. Will
>> it wait forever in that case?
>>
>> I can't tell if wait-for-pinned always waits for the
>> Reply, or if it can allow the RPC to exit cleanly if
>> the Reply hasn't arrived.
>
> Oh... I think I see what your concern is.
>
> So my assumption is that the bit will always be set and released in
> xs_tcp_read_reply() (and equivalent). It should never be set while
> waiting for data. It is only set when doing _non-blocking_ copying of
> data from the socket to the buffers.
>
> IOW: wait_for_pinned will wait only if new reply data arrives on the
> socket in the instance when the specific request to which it is being
> delivered is being torn down. If there is no data for the request in
> the socket, then there is no wait. It doesn't matter if there has been
> a partial delivery of data previously (or no delivery); unless
> xs_tcp_read_reply() is actually running and pinning the request, there
> is no wait.
Ah, of course.
Reviewed-by: Chuck Lever <[email protected]>
>>>>>>>>> +
>>>>>>>>> static void xprt_update_rtt(struct rpc_task *task)
>>>>>>>>> {
>>>>>>>>> struct rpc_rqst *req = task->tk_rqstp;
>>>>>>>>> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task
>>>>>>>>> *task)
>>>>>>>>> list_del(&req->rq_list);
>>>>>>>>> xprt->last_used = jiffies;
>>>>>>>>> xprt_schedule_autodisconnect(xprt);
>>>>>>>>> + xprt_wait_on_pinned_rqst(req);
>>>>>>>>> spin_unlock_bh(&xprt->transport_lock);
>>>>>>>>
>>>>>>>> Is it OK to call wait_on_bit(TASK_UNINTERRUPTIBLE) while
>>>>>>>> holding
>>>>>>>> a BH spin lock? This could be prone to deadlock.
>>>>>>>
>>>>>>> We drop the lock inside xprt_wait_on_pinned_rqst() if we
>>>>>>> need
>>>>>>> to
>>>>>>> wait.
>>>>>>>
>>>>>>> The reason why we want to hold the lock there is to avoid a
>>>>>>> use-
>>>>>>> after-
>>>>>>> free in xprt_unpin_rqst(). Without the lock, there is a
>>>>>>> risk
>>>>>>> that
>>>>>>> xprt_release() could complete, and the task get freed
>>>>>>> before we
>>>>>>> have
>>>>>>> completed wake_up_bit().
>>>>>>
>>>>>> Agree with the last bit. I had that challenge with the order
>>>>>> of
>>>>>> memory invalidation (which waits) and
>>>>>> xprt_lookup_rqst/complete_rqst
>>>>>> in rpcrdma_reply_handler.
>>>>>>
>>>>>> However my concern is that there are many other users of the
>>>>>> transport_lock. Does it become more tricky not to introduce a
>>>>>> problem by changing any one of the sites that take
>>>>>> transport_lock?
>>>>>
>>>>> What is your concern? The main reason for taking the transport
>>>>> lock
>>>>> in
>>>>> the current code is twofold:
>>>>>
>>>>> 1) Perform the lookup of the request on the receive list
>>>>> 2) Ensure the request doesn't disappear while we're writing to
>>>>> it.
>>>>
>>>> Yes, I think we see the same problem space.
>>>>
>>>> I missed, though, that xprt_wait_on_pinned_rqst always releases
>>>> transport_lock before invoking wait_on_bit.
>>>>
>>>>
>>>>>> Despite the deadlock concern, I don't think it makes sense to
>>>>>> call
>>>>>> wait_on_bit while holding a spin lock simply because spin
>>>>>> locks
>>>>>> are
>>>>>> supposed to be for things that are quick, like updating a
>>>>>> data
>>>>>> structure.
>>>>>>
>>>>>> wait_on_bit can take microseconds: prepare_to_wait and
>>>>>> finish_wait
>>>>>> can both take a irqsave spin_lock, for example. bit_wait is
>>>>>> the
>>>>>> action that is done if the bit is not ready, and that calls
>>>>>> schedule(). On a busy system, that kind of context switch can
>>>>>> take
>>>>>> dozens of microseconds.
>>>>>
>>>>> Agreed, but we should only very rarely be hitting the wait
>>>>> case. In
>>>>> all
>>>>> successful RPC calls, the receive will have happened long
>>>>> before we
>>>>> call xprt_release(). In fact, the only 2 cases where this can
>>>>> happen
>>>>> would be when the RPC call itself is interrupted. Either:
>>>>>
>>>>> 1) The task is synchronous and was interrupted by the user.
>>>>> 2) The task is terminating early due to a soft timeout.
>>>>>
>>>>> In both those cases, we're in a non-performance path.
>>>>> Furthermore,
>>>>> they
>>>>> would currently end up spinning against the transport lock.
>>>>
>>>> OK, no argument that there is any performance concern.
>>>>
>>>> I can hit this case 2 or 3 times out of 5 with generic/029 on
>>>> NFSv4/RDMA. These days, the worse that happens is the client
>>>> drops
>>>> the RDMA connection because I've spent a very long time immersed
>>>> in these code paths, trying to make the two cases you list above
>>>> work 100% without deadlock or crash. See commits 04d25b7d5d1b
>>>> through 431af645cf66, and commit 68791649a725.
>>>>
>>>> It makes me uncertain that waiting for anything in xprt_release
>>>> is safe, even if the transport_lock is not being held. xprtrdma
>>>> used to perform synchronous memory invalidation in ->buf_free.
>>>> It doesn't any more for this reason.
>>>
>>>
>>> Full disclosure: I didn't actually enable this code for RDMA.
>>
>> Noted that; I assumed I would be responsible for it
>> if it could be applied to xprtrdma.
>>
>>
>>> The reason is firstly that I won't pretend to understand the
>>> locking in
>>> rpcrdma_reply_handler(), but furthermore that RDMA doesn't really
>>> have
>>> a problem with bulk copy in the same way that the socket based code
>>> does: the direct placement into the reply buffer means the bulk of
>>> the
>>> code in rpcrdma_reply_handler() is all about checking the alignment
>>> (with, admittedly some scary bits in rpcrdma_inline_fixup()).
>>
>> rpcrdma_reply_handler actually can use xprt_pin_rqst.
>>
>> It's not doing a data copy (except for the pull-up in
>> rpcrdma_inline_fixup), but it is performing a possibly
>> long-running synchronous memory invalidation. During
>> that invalidation, the transport_lock cannot be held.
>>
>> With xprt_pin_rqst, rpcrdma_reply_handler would
>> probably work like this:
>>
>>
>> spin_lock(transport_lock)
>> xprt_lookup_rqst
>> xprt_pin_rqst
>> spin_unlock(transport_lock)
>>
>> invalidate memory associated with this rqst, and wait
>> for it to complete
>>
>> reconstruct the Reply in rqst->rq_rcv_buf, including
>> maybe a substantial pull-up if the Reply is large and
>> inline
>>
>> spin_lock(transport_lock)
>> xprt_complete_rqst
>> xprt_unpin_rqst
>> spin_unlock(transport_lock)
>>
>>
>> This would have to co-ordinate with xprt_rdma_free. If
>> there are still memory regions registered when
>> xprt_rdma_free is called, that means the RPC was
>> terminated before the Reply arrived. They still need to
>> be invalidated, but these functions have to ensure that
>> invalidation does not occur twice.
>>
>>
>>>>>>>>> IMO it would be a lot nicer if we had an FSM state
>>>>>>>>> available that
>>>>>>
>>>>>> could allow another sleep while an RPC task during
>>>>>> xprt_release.
>>>>>
>>>>> Why would that be preferable, if the argument holds that this
>>>>> only
>>>>> happens in these 2 rare error cases?
>>>>
>>>> Because FSM steps are the way this code manages sleeps. That
>>>> makes it easier to understand and less brittle. I don't think
>>>> it would be more overhead than test_bit().
>>>
>>> The problem is that it would add new states to a number of
>>> functions
>>> that currently call xprt_release(). Right now, that would mean
>>>
>>> call_reserveresult()
>>> rpc_verify_header()
>>> rpc_exit_task()
>>
>> Right. That's why I wasn't bold enough to actually try adding
>> the FSM myself.
>>
>> Your solution is helped somewhat by checking to see if a Call
>> was actually sent and a Reply is therefore expected. In some
>> of these cases, that means waiting can be avoided entirely
>> when the RPC exits before call_transmit.
>>
>>
>>> In addition, we'd need a separate solution for rpc_put_task(), and
>>> probably also for rpc_release_task() (since these are both called
>>> from
>>> outside the main state machine loop).
>>>
>>> I'm getting a little worried about your reply above: the RPC engine
>>> offers no real-time guarantees, so if the RDMA code is really this
>>> latency-sensitive, then what will happen if we enable pre-emption
>>> and/or we just have a very congested workqueue?
>>
>> The sensitivity is only performance-related. On CPU-intensive
>> workloads, RPC execution time can increase considerably. The
>> RDMA transport doesn't rely on timely execution for proper
>> operation, but maybe I don't understand your worry.
>>
>>
>>> Also, how would a state machine solution help? I'd expect that to
>>> make
>>> the latency worse not better, since the work item would end up
>>> getting
>>> requeued. If the worker thread then gets rescheduled as well...
>>>
>>> One solution may be to simply say we're never going to use this
>>> mechanism for RDMA. I suspect that if we just replace the use of
>>> xprt-
>>>> transport_lock when protecting the receive queue with a new lock
>>>> that
>>>
>>> wouldn't need to be bh-safe then that might suffice to deal with
>>> the
>>> RDMA latency issues. I have less confidence that would work for the
>>> others due to the bulk data copy issue.
>>
>> I would like a separate non-BH lock, if that can be made to
>> work. That's in fact how xprtrdma currently works. xprtrdma
>> has its own receive list at the moment so that it can find
>> memory associated with a rqst _without_ needing to call
>> xprt_lookup_rqst. A non-BH lock is used to protect that
>> list.
>
> That can definitely be added. I see that as a separate issue.
>
> --
> Trond Myklebust
> Linux NFS client maintainer, PrimaryData
> [email protected]
> N���r�y���b�X�ǧv�^�){.n�+�{�"��^n�r��z���h���&��G��h�(�階�ݢj"��m����z�ޖ��f�h�~�m
--
Chuck Lever
T24gVHVlLCAyMDE3LTA4LTE1IGF0IDExOjA1IC0wNDAwLCBDaHVjayBMZXZlciB3cm90ZToNCj4g
UmV2aWV3ZWQtYnk6IENodWNrIExldmVyIDxjaHVjay5sZXZlckBvcmFjbGUuY29tPg0KDQpUaGFu
a3MhIEkgYWdyZWUgd2l0aCB5b3VyIHByb3Bvc2FsIGZvciBob3cgdG8gYXBwbHkgdGhpcyB0byB0
aGUgUkRNQQ0KY29kZS4gSSdtIGhvcGluZyB5b3Ugd2lsbCBiZSBhYmxlIHRvIGxvb2sgaW50byB0
aGF0IGFuZCBjaGVjayB0aGF0IHRoZQ0KcmVtb3ZhbCBvZiB0cmFuc3BvcnQgbG9jayBwcm90ZWN0
aW9uIHdvbid0IGVuZCB1cCBjYXVzaW5nIGNvcnJ1cHRpb24gb2YNCnRoZSBSRE1BIHN0YXRpc3Rp
Y3MgY291bnRlcnMsIGV0Yy4NCg0KLS0gDQpUcm9uZCBNeWtsZWJ1c3QNCkxpbnV4IE5GUyBjbGll
bnQgbWFpbnRhaW5lciwgUHJpbWFyeURhdGENCnRyb25kLm15a2xlYnVzdEBwcmltYXJ5ZGF0YS5j
b20NCg==
> On Aug 15, 2017, at 12:13 PM, Trond Myklebust <[email protected]> wrote:
>
> On Tue, 2017-08-15 at 11:05 -0400, Chuck Lever wrote:
>> Reviewed-by: Chuck Lever <[email protected]>
>
> Thanks! I agree with your proposal for how to apply this to the RDMA
> code. I'm hoping you will be able to look into that and check that the
> removal of transport lock protection won't end up causing corruption of
> the RDMA statistics counters, etc.
The client-side RDMA statistics counters are currently not protected,
in general (although yes, some of them happen to be manipulated
only behind locks). I'm more worried about how to guarantee that
MR invalidation occurs exactly once as an RPC completes.
My plan is to wait until xprt_pin_rqst lands in mainline, then add
xprtrdma usage of that API in the following kernel release. Let me
know if you'd like something to go in _with_ xprt_pin_rqst instead,
and we can try to co-ordinate.
--
Chuck Lever
Hi Trond,
On 08/14/2017 03:16 PM, Trond Myklebust wrote:
> Instead add a mechanism to ensure that the request doesn't disappear
> from underneath us while copying from the socket. We do this by
> preventing xprt_release() from freeing the XDR buffers until the
> flag RPC_TASK_MSG_RECV has been cleared from the request.
>
> Signed-off-by: Trond Myklebust <[email protected]>
I see the following kernel crash after applying this patch and running xfstests generic/089:
[ 25.408414] BUG: unable to handle kernel NULL pointer dereference at 0000000000000030
[ 25.409253] IP: xprt_pin_rqst+0x23/0x30 [sunrpc]
[ 25.409728] PGD 32ef2067
[ 25.409729] P4D 32ef2067
[ 25.410003] PUD 32ed0067
[ 25.410276] PMD 0
[ 25.410561]
[ 25.410935] Oops: 0002 [#1] PREEMPT SMP
[ 25.411326] Modules linked in: rpcsec_gss_krb5 nfsv4 nfs fscache rpcrdma cfg80211 rfkill crct10dif_pclmul crc32_pclmul ghash_
clmulni_intel pcbc joydev vmwgfx mousedev ppdev snd_hda_codec_generic aesni_intel drm_kms_helper aes_x86_64 crypto_simd snd_hda_
intel cryptd evdev glue_helper input_leds led_class snd_hda_codec psmouse mac_hid pcspkr snd_hwdep syscopyarea snd_hda_core sysf
illrect sysimgblt fb_sys_fops snd_pcm ttm snd_timer parport_pc drm pvpanic parport snd intel_agp soundcore intel_gtt i2c_piix4 b
utton sch_fq_codel nfsd auth_rpcgss oid_registry nfs_acl lockd grace sunrpc ip_tables x_tables ata_generic pata_acpi serio_raw a
tkbd libps2 ata_piix i8042 serio floppy libata scsi_mod uhci_hcd ehci_pci ehci_hcd usbcore usb_common virtio_balloon xfs libcrc3
2c crc32c_generic crc32c_intel virtio_net
[ 25.418332] virtio_pci virtio_blk virtio_ring virtio
[ 25.418853] CPU: 1 PID: 84 Comm: kworker/1:1H Not tainted 4.13.0-rc5-ANNA+ #4039
[ 25.419603] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
[ 25.420166] Workqueue: xprtiod xs_tcp_data_receive_workfn [sunrpc]
[ 25.420796] task: ffff88003c645880 task.stack: ffffc9000054c000
[ 25.421702] RIP: 0010:xprt_pin_rqst+0x23/0x30 [sunrpc]
[ 25.422507] RSP: 0018:ffffc9000054fd08 EFLAGS: 00010282
[ 25.423327] RAX: 0000000000000000 RBX: 00000000000000cc RCX: 0000000000001000
[ 25.424340] RDX: 0000000000000001 RSI: 0000100000000000 RDI: ffff880037bcfc00
[ 25.425338] RBP: ffffc9000054fd08 R08: ffff880038778568 R09: ffff88003fc18780
[ 25.426329] R10: ffff88003dbe7300 R11: 0000000000000000 R12: ffff880038778438
[ 25.427324] R13: ffff880038778000 R14: ffffffffa02ba1bb R15: ffff880037bcfc00
[ 25.428316] FS: 0000000000000000(0000) GS:ffff88003fd00000(0000) knlGS:0000000000000000
[ 25.429704] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 25.430585] CR2: 0000000000000030 CR3: 0000000032ea7000 CR4: 00000000001406e0
[ 25.431613] Call Trace:
[ 25.432188] xs_tcp_data_recv+0x598/0xa40 [sunrpc]
[ 25.432988] ? rpc_wake_up_first_on_wq+0xa0/0x1d0 [sunrpc]
[ 25.434403] ? call_decode+0x830/0x830 [sunrpc]
[ 25.435263] ? xprt_release_xprt+0x86/0x90 [sunrpc]
[ 25.436074] tcp_read_sock+0x98/0x1d0
[ 25.436760] ? xs_tcp_check_fraghdr.part.0+0x60/0x60 [sunrpc]
[ 25.437702] xs_tcp_data_receive_workfn+0xc2/0x170 [sunrpc]
[ 25.438554] process_one_work+0x1ed/0x430
[ 25.439242] worker_thread+0x56/0x400
[ 25.439942] kthread+0x134/0x150
[ 25.440554] ? process_one_work+0x430/0x430
[ 25.441235] ? kthread_create_on_node+0x80/0x80
[ 25.441946] ret_from_fork+0x25/0x30
[ 25.442555] Code: 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 55 48 89 e5 48 81 ec 20 10 00 00 48 83 0c 24 00 48 81 c4 20 10 00 00
48 8b 87 88 00 00 00 <f0> 80 48 30 08 5d c3 66 0f 1f 44 00 00 0f 1f 44 00 00 48 8b 87
[ 25.445096] RIP: xprt_pin_rqst+0x23/0x30 [sunrpc] RSP: ffffc9000054fd08
[ 25.446007] CR2: 0000000000000030
[ 25.446601] ---[ end trace 9d711ee39d595517 ]---
[ 25.447320] Kernel panic - not syncing: Fatal exception in interrupt
[ 25.448292] Kernel Offset: disabled
[ 25.448904] ---[ end Kernel panic - not syncing: Fatal exception in interrupt
> ---
> include/linux/sunrpc/sched.h | 2 ++
> include/linux/sunrpc/xprt.h | 2 ++
> net/sunrpc/xprt.c | 42 ++++++++++++++++++++++++++++++++++++++++++
> net/sunrpc/xprtsock.c | 27 ++++++++++++++++++++++-----
> 4 files changed, 68 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
> index 15bc1cd6ee5c..e972d9e05426 100644
> --- a/include/linux/sunrpc/sched.h
> +++ b/include/linux/sunrpc/sched.h
> @@ -141,6 +141,8 @@ struct rpc_task_setup {
> #define RPC_TASK_ACTIVE 2
> #define RPC_TASK_MSG_XMIT 3
> #define RPC_TASK_MSG_XMIT_WAIT 4
> +#define RPC_TASK_MSG_RECV 5
> +#define RPC_TASK_MSG_RECV_WAIT 6
>
> #define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
> #define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index eab1c749e192..65b9e0224753 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -372,6 +372,8 @@ void xprt_write_space(struct rpc_xprt *xprt);
> void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
> struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
> void xprt_complete_rqst(struct rpc_task *task, int copied);
> +void xprt_pin_rqst(struct rpc_rqst *req);
> +void xprt_unpin_rqst(struct rpc_rqst *req);
> void xprt_release_rqst_cong(struct rpc_task *task);
> void xprt_disconnect_done(struct rpc_xprt *xprt);
> void xprt_force_disconnect(struct rpc_xprt *xprt);
> diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
> index 788c1b6138c2..62c379865f7c 100644
> --- a/net/sunrpc/xprt.c
> +++ b/net/sunrpc/xprt.c
> @@ -844,6 +844,47 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
> }
> EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
>
> +/**
> + * xprt_pin_rqst - Pin a request on the transport receive list
> + * @req: Request to pin
> + *
> + * Caller must ensure this is atomic with the call to xprt_lookup_rqst()
> + * so should be holding the xprt transport lock.
> + */
> +void xprt_pin_rqst(struct rpc_rqst *req)
> +{
> + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
> +}
> +
> +/**
> + * xprt_unpin_rqst - Unpin a request on the transport receive list
> + * @req: Request to pin
> + *
> + * Caller should be holding the xprt transport lock.
> + */
> +void xprt_unpin_rqst(struct rpc_rqst *req)
> +{
> + struct rpc_task *task = req->rq_task;
> +
> + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
> + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
> + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
> +}
> +
> +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
> +__must_hold(&req->rq_xprt->transport_lock)
> +{
> + struct rpc_task *task = req->rq_task;
> + if (test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
> + spin_unlock_bh(&req->rq_xprt->transport_lock);
> + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
> + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
> + TASK_UNINTERRUPTIBLE);
> + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
> + spin_lock_bh(&req->rq_xprt->transport_lock);
> + }
> +}
> +
> static void xprt_update_rtt(struct rpc_task *task)
> {
> struct rpc_rqst *req = task->tk_rqstp;
> @@ -1301,6 +1342,7 @@ void xprt_release(struct rpc_task *task)
> list_del(&req->rq_list);
> xprt->last_used = jiffies;
> xprt_schedule_autodisconnect(xprt);
> + xprt_wait_on_pinned_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
> if (req->rq_buffer)
> xprt->ops->buf_free(task);
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 4f154d388748..222993fbaf99 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -973,6 +973,8 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
> rovr = xprt_lookup_rqst(xprt, *xp);
> if (!rovr)
> goto out_unlock;
> + xprt_pin_rqst(rovr);
> + spin_unlock_bh(&xprt->transport_lock);
> task = rovr->rq_task;
>
> copied = rovr->rq_private_buf.buflen;
> @@ -981,11 +983,14 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt,
>
> if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
> dprintk("RPC: sk_buff copy failed\n");
> - goto out_unlock;
> + spin_lock_bh(&xprt->transport_lock);
> + goto out_unpin;
> }
>
> + spin_lock_bh(&xprt->transport_lock);
> xprt_complete_rqst(task, copied);
> -
> +out_unpin:
> + xprt_unpin_rqst(rovr);
> out_unlock:
> spin_unlock_bh(&xprt->transport_lock);
> }
> @@ -1054,6 +1059,8 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
> rovr = xprt_lookup_rqst(xprt, *xp);
> if (!rovr)
> goto out_unlock;
> + xprt_pin_rqst(rovr);
> + spin_unlock_bh(&xprt->transport_lock);
> task = rovr->rq_task;
>
> if ((copied = rovr->rq_private_buf.buflen) > repsize)
> @@ -1062,14 +1069,17 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt,
> /* Suck it into the iovec, verify checksum if not done by hw. */
> if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
> __UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
> - goto out_unlock;
> + spin_lock_bh(&xprt->transport_lock);
> + goto out_unpin;
> }
>
> __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
>
> + spin_lock_bh(&xprt->transport_lock);
> xprt_adjust_cwnd(xprt, task, copied);
> xprt_complete_rqst(task, copied);
> -
> +out_unpin:
> + xprt_unpin_rqst(rovr);
> out_unlock:
> spin_unlock_bh(&xprt->transport_lock);
> }
> @@ -1351,12 +1361,15 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
> spin_unlock_bh(&xprt->transport_lock);
> return -1;
> }
> + xprt_pin_rqst(req);
> + spin_unlock_bh(&xprt->transport_lock);
>
> xs_tcp_read_common(xprt, desc, req);
>
> + spin_lock_bh(&xprt->transport_lock);
> if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
> xprt_complete_rqst(req->rq_task, transport->tcp_copied);
> -
> + xprt_unpin_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
> return 0;
> }
> @@ -1385,12 +1398,16 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt,
> xprt_force_disconnect(xprt);
> return -1;
> }
> + xprt_pin_rqst(req);
> + spin_unlock_bh(&xprt->transport_lock);
>
> dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
> xs_tcp_read_common(xprt, desc, req);
>
> + spin_lock_bh(&xprt->transport_lock);
> if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
> xprt_complete_bc_request(req, transport->tcp_copied);
> + xprt_unpin_rqst(req);
> spin_unlock_bh(&xprt->transport_lock);
>
> return 0;
>
T24gV2VkLCAyMDE3LTA4LTE2IGF0IDExOjQ1IC0wNDAwLCBBbm5hIFNjaHVtYWtlciB3cm90ZToN
Cj4gSGkgVHJvbmQsDQo+IA0KPiBPbiAwOC8xNC8yMDE3IDAzOjE2IFBNLCBUcm9uZCBNeWtsZWJ1
c3Qgd3JvdGU6DQo+ID4gSW5zdGVhZCBhZGQgYSBtZWNoYW5pc20gdG8gZW5zdXJlIHRoYXQgdGhl
IHJlcXVlc3QgZG9lc24ndA0KPiA+IGRpc2FwcGVhcg0KPiA+IGZyb20gdW5kZXJuZWF0aCB1cyB3
aGlsZSBjb3B5aW5nIGZyb20gdGhlIHNvY2tldC4gV2UgZG8gdGhpcyBieQ0KPiA+IHByZXZlbnRp
bmcgeHBydF9yZWxlYXNlKCkgZnJvbSBmcmVlaW5nIHRoZSBYRFIgYnVmZmVycyB1bnRpbCB0aGUN
Cj4gPiBmbGFnIFJQQ19UQVNLX01TR19SRUNWIGhhcyBiZWVuIGNsZWFyZWQgZnJvbSB0aGUgcmVx
dWVzdC4NCj4gPiANCj4gPiBTaWduZWQtb2ZmLWJ5OiBUcm9uZCBNeWtsZWJ1c3QgPHRyb25kLm15
a2xlYnVzdEBwcmltYXJ5ZGF0YS5jb20+DQo+IA0KPiBJIHNlZSB0aGUgZm9sbG93aW5nIGtlcm5l
bCBjcmFzaCBhZnRlciBhcHBseWluZyB0aGlzIHBhdGNoIGFuZA0KPiBydW5uaW5nIHhmc3Rlc3Rz
IGdlbmVyaWMvMDg5Og0KPiANCj4gWyAgIDI1LjQwODQxNF0gQlVHOiB1bmFibGUgdG8gaGFuZGxl
IGtlcm5lbCBOVUxMIHBvaW50ZXIgZGVyZWZlcmVuY2UNCj4gYXQgMDAwMDAwMDAwMDAwMDAzMA0K
PiBbICAgMjUuNDA5MjUzXSBJUDogeHBydF9waW5fcnFzdCsweDIzLzB4MzAgW3N1bnJwY10NCj4g
WyAgIDI1LjQwOTcyOF0gUEdEIDMyZWYyMDY3IA0KPiBbICAgMjUuNDA5NzI5XSBQNEQgMzJlZjIw
NjcgDQo+IFsgICAyNS40MTAwMDNdIFBVRCAzMmVkMDA2NyANCj4gWyAgIDI1LjQxMDI3Nl0gUE1E
IDAgDQo+IFsgICAyNS40MTA1NjFdIA0KPiBbICAgMjUuNDEwOTM1XSBPb3BzOiAwMDAyIFsjMV0g
UFJFRU1QVCBTTVANCj4gWyAgIDI1LjQxMTMyNl0gTW9kdWxlcyBsaW5rZWQgaW46IHJwY3NlY19n
c3Nfa3JiNSBuZnN2NCBuZnMgZnNjYWNoZQ0KPiBycGNyZG1hIGNmZzgwMjExIHJma2lsbCBjcmN0
MTBkaWZfcGNsbXVsIGNyYzMyX3BjbG11bCBnaGFzaF8NCj4gY2xtdWxuaV9pbnRlbCBwY2JjIGpv
eWRldiB2bXdnZnggbW91c2VkZXYgcHBkZXYgc25kX2hkYV9jb2RlY19nZW5lcmljDQo+IGFlc25p
X2ludGVsIGRybV9rbXNfaGVscGVyIGFlc194ODZfNjQgY3J5cHRvX3NpbWQgc25kX2hkYV8NCj4g
aW50ZWwgY3J5cHRkIGV2ZGV2IGdsdWVfaGVscGVyIGlucHV0X2xlZHMgbGVkX2NsYXNzIHNuZF9o
ZGFfY29kZWMNCj4gcHNtb3VzZSBtYWNfaGlkIHBjc3BrciBzbmRfaHdkZXAgc3lzY29weWFyZWEg
c25kX2hkYV9jb3JlIHN5c2YNCj4gaWxscmVjdCBzeXNpbWdibHQgZmJfc3lzX2ZvcHMgc25kX3Bj
bSB0dG0gc25kX3RpbWVyIHBhcnBvcnRfcGMgZHJtDQo+IHB2cGFuaWMgcGFycG9ydCBzbmQgaW50
ZWxfYWdwIHNvdW5kY29yZSBpbnRlbF9ndHQgaTJjX3BpaXg0IGINCj4gdXR0b24gc2NoX2ZxX2Nv
ZGVsIG5mc2QgYXV0aF9ycGNnc3Mgb2lkX3JlZ2lzdHJ5IG5mc19hY2wgbG9ja2QgZ3JhY2UNCj4g
c3VucnBjIGlwX3RhYmxlcyB4X3RhYmxlcyBhdGFfZ2VuZXJpYyBwYXRhX2FjcGkgc2VyaW9fcmF3
IGENCj4gdGtiZCBsaWJwczIgYXRhX3BpaXggaTgwNDIgc2VyaW8gZmxvcHB5IGxpYmF0YSBzY3Np
X21vZCB1aGNpX2hjZA0KPiBlaGNpX3BjaSBlaGNpX2hjZCB1c2Jjb3JlIHVzYl9jb21tb24gdmly
dGlvX2JhbGxvb24geGZzIGxpYmNyYzMNCj4gMmMgY3JjMzJjX2dlbmVyaWMgY3JjMzJjX2ludGVs
IHZpcnRpb19uZXQNCj4gWyAgIDI1LjQxODMzMl0gIHZpcnRpb19wY2kgdmlydGlvX2JsayB2aXJ0
aW9fcmluZyB2aXJ0aW8NCj4gWyAgIDI1LjQxODg1M10gQ1BVOiAxIFBJRDogODQgQ29tbToga3dv
cmtlci8xOjFIIE5vdCB0YWludGVkIDQuMTMuMC0NCj4gcmM1LUFOTkErICM0MDM5DQo+IFsgICAy
NS40MTk2MDNdIEhhcmR3YXJlIG5hbWU6IEJvY2hzIEJvY2hzLCBCSU9TIEJvY2hzIDAxLzAxLzIw
MTENCj4gWyAgIDI1LjQyMDE2Nl0gV29ya3F1ZXVlOiB4cHJ0aW9kIHhzX3RjcF9kYXRhX3JlY2Vp
dmVfd29ya2ZuIFtzdW5ycGNdDQo+IFsgICAyNS40MjA3OTZdIHRhc2s6IGZmZmY4ODAwM2M2NDU4
ODAgdGFzay5zdGFjazogZmZmZmM5MDAwMDU0YzAwMA0KPiBbICAgMjUuNDIxNzAyXSBSSVA6IDAw
MTA6eHBydF9waW5fcnFzdCsweDIzLzB4MzAgW3N1bnJwY10NCj4gWyAgIDI1LjQyMjUwN10gUlNQ
OiAwMDE4OmZmZmZjOTAwMDA1NGZkMDggRUZMQUdTOiAwMDAxMDI4Mg0KPiBbICAgMjUuNDIzMzI3
XSBSQVg6IDAwMDAwMDAwMDAwMDAwMDAgUkJYOiAwMDAwMDAwMDAwMDAwMGNjIFJDWDoNCj4gMDAw
MDAwMDAwMDAwMTAwMA0KPiBbICAgMjUuNDI0MzQwXSBSRFg6IDAwMDAwMDAwMDAwMDAwMDEgUlNJ
OiAwMDAwMTAwMDAwMDAwMDAwIFJESToNCj4gZmZmZjg4MDAzN2JjZmMwMA0KPiBbICAgMjUuNDI1
MzM4XSBSQlA6IGZmZmZjOTAwMDA1NGZkMDggUjA4OiBmZmZmODgwMDM4Nzc4NTY4IFIwOToNCj4g
ZmZmZjg4MDAzZmMxODc4MA0KPiBbICAgMjUuNDI2MzI5XSBSMTA6IGZmZmY4ODAwM2RiZTczMDAg
UjExOiAwMDAwMDAwMDAwMDAwMDAwIFIxMjoNCj4gZmZmZjg4MDAzODc3ODQzOA0KPiBbICAgMjUu
NDI3MzI0XSBSMTM6IGZmZmY4ODAwMzg3NzgwMDAgUjE0OiBmZmZmZmZmZmEwMmJhMWJiIFIxNToN
Cj4gZmZmZjg4MDAzN2JjZmMwMA0KPiBbICAgMjUuNDI4MzE2XSBGUzogIDAwMDAwMDAwMDAwMDAw
MDAoMDAwMCkgR1M6ZmZmZjg4MDAzZmQwMDAwMCgwMDAwKQ0KPiBrbmxHUzowMDAwMDAwMDAwMDAw
MDAwDQo+IFsgICAyNS40Mjk3MDRdIENTOiAgMDAxMCBEUzogMDAwMCBFUzogMDAwMCBDUjA6IDAw
MDAwMDAwODAwNTAwMzMNCj4gWyAgIDI1LjQzMDU4NV0gQ1IyOiAwMDAwMDAwMDAwMDAwMDMwIENS
MzogMDAwMDAwMDAzMmVhNzAwMCBDUjQ6DQo+IDAwMDAwMDAwMDAxNDA2ZTANCj4gWyAgIDI1LjQz
MTYxM10gQ2FsbCBUcmFjZToNCj4gWyAgIDI1LjQzMjE4OF0gIHhzX3RjcF9kYXRhX3JlY3YrMHg1
OTgvMHhhNDAgW3N1bnJwY10NCj4gWyAgIDI1LjQzMjk4OF0gID8gcnBjX3dha2VfdXBfZmlyc3Rf
b25fd3ErMHhhMC8weDFkMCBbc3VucnBjXQ0KPiBbICAgMjUuNDM0NDAzXSAgPyBjYWxsX2RlY29k
ZSsweDgzMC8weDgzMCBbc3VucnBjXQ0KPiBbICAgMjUuNDM1MjYzXSAgPyB4cHJ0X3JlbGVhc2Vf
eHBydCsweDg2LzB4OTAgW3N1bnJwY10NCj4gWyAgIDI1LjQzNjA3NF0gIHRjcF9yZWFkX3NvY2sr
MHg5OC8weDFkMA0KPiBbICAgMjUuNDM2NzYwXSAgPyB4c190Y3BfY2hlY2tfZnJhZ2hkci5wYXJ0
LjArMHg2MC8weDYwIFtzdW5ycGNdDQo+IFsgICAyNS40Mzc3MDJdICB4c190Y3BfZGF0YV9yZWNl
aXZlX3dvcmtmbisweGMyLzB4MTcwIFtzdW5ycGNdDQo+IFsgICAyNS40Mzg1NTRdICBwcm9jZXNz
X29uZV93b3JrKzB4MWVkLzB4NDMwDQo+IFsgICAyNS40MzkyNDJdICB3b3JrZXJfdGhyZWFkKzB4
NTYvMHg0MDANCj4gWyAgIDI1LjQzOTk0Ml0gIGt0aHJlYWQrMHgxMzQvMHgxNTANCj4gWyAgIDI1
LjQ0MDU1NF0gID8gcHJvY2Vzc19vbmVfd29yaysweDQzMC8weDQzMA0KPiBbICAgMjUuNDQxMjM1
XSAgPyBrdGhyZWFkX2NyZWF0ZV9vbl9ub2RlKzB4ODAvMHg4MA0KPiBbICAgMjUuNDQxOTQ2XSAg
cmV0X2Zyb21fZm9yaysweDI1LzB4MzANCj4gWyAgIDI1LjQ0MjU1NV0gQ29kZTogMGYgMWYgODQg
MDAgMDAgMDAgMDAgMDAgMGYgMWYgNDQgMDAgMDAgNTUgNDggODkNCj4gZTUgNDggODEgZWMgMjAg
MTAgMDAgMDAgNDggODMgMGMgMjQgMDAgNDggODEgYzQgMjAgMTAgMDAgMDANCj4gIDQ4IDhiIDg3
IDg4IDAwIDAwIDAwIDxmMD4gODAgNDggMzAgMDggNWQgYzMgNjYgMGYgMWYgNDQgMDAgMDAgMGYg
MWYNCj4gNDQgMDAgMDAgNDggOGIgODcgDQo+IFsgICAyNS40NDUwOTZdIFJJUDogeHBydF9waW5f
cnFzdCsweDIzLzB4MzAgW3N1bnJwY10gUlNQOg0KPiBmZmZmYzkwMDAwNTRmZDA4DQo+IFsgICAy
NS40NDYwMDddIENSMjogMDAwMDAwMDAwMDAwMDAzMA0KPiBbICAgMjUuNDQ2NjAxXSAtLS1bIGVu
ZCB0cmFjZSA5ZDcxMWVlMzlkNTk1NTE3IF0tLS0NCj4gWyAgIDI1LjQ0NzMyMF0gS2VybmVsIHBh
bmljIC0gbm90IHN5bmNpbmc6IEZhdGFsIGV4Y2VwdGlvbiBpbg0KPiBpbnRlcnJ1cHQNCj4gWyAg
IDI1LjQ0ODI5Ml0gS2VybmVsIE9mZnNldDogZGlzYWJsZWQNCj4gWyAgIDI1LjQ0ODkwNF0gLS0t
WyBlbmQgS2VybmVsIHBhbmljIC0gbm90IHN5bmNpbmc6IEZhdGFsIGV4Y2VwdGlvbg0KPiBpbiBp
bnRlcnJ1cHQNCg0KQWguLi4gVGhhdCBsb29rcyBsaWtlIGl0IG1pZ2h0IGJlIHRoZSBjYWxsYmFj
ayBjaGFubmVsLCB3aGljaCBkb2Vzbid0DQpoYXZlIGFuIGFzc29jaWF0ZWQgcnFfdGFzay4NCg0K
SG1tLi4uIFRoYXQgY29kZSBsb29rcyBhcyBpZiB3ZSBjYW4ganVzdCBza2lwIHRoZSBwaW5uaW5n
IGFsdG9nZXRoZXIsDQpzaW5jZSBhIGNhbGxiYWNrIHJlcXVlc3QgaXMgZ29pbmcgbm93aGVyZSB1
bnRpbCBpdCBhY3R1YWxseSBoYXMNCnJlY2VpdmVkIGRhdGEuDQoNCi0tIA0KVHJvbmQgTXlrbGVi
dXN0DQpMaW51eCBORlMgY2xpZW50IG1haW50YWluZXIsIFByaW1hcnlEYXRhDQp0cm9uZC5teWts
ZWJ1c3RAcHJpbWFyeWRhdGEuY29tDQo=