The RPC code is occasionally hanging when the receive code fails to
empty the socket buffer due to a partial read of the data. When we
convert that to an EAGAIN, it appears we occasionally leave data in the
socket. The fix is to just keep reading until the socket returns
EAGAIN/EWOULDBLOCK.
Reported-by: Catalin Marinas <[email protected]>
Reported-by: Cristian Marussi <[email protected]>
Reported-by: Chuck Lever <[email protected]>
Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/xprtsock.c | 39 +++++++++++++++++++--------------------
1 file changed, 19 insertions(+), 20 deletions(-)
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index ae77c71c1f64..1c0560dc5557 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -398,7 +398,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
seek -= buf->head[0].iov_len;
@@ -418,7 +418,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
seek = 0;
} else {
seek -= buf->page_len;
@@ -433,7 +433,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
goto out;
if (ret != want)
- goto eagain;
+ goto out;
} else
offset += buf->tail[0].iov_len;
ret = -EMSGSIZE;
@@ -441,9 +441,6 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
out:
*read = offset - seek_init;
return ret;
-eagain:
- ret = -EAGAIN;
- goto out;
sock_err:
offset += seek;
goto out;
@@ -486,19 +483,18 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
if (transport->recv.offset == transport->recv.len) {
if (xs_read_stream_request_done(transport))
msg->msg_flags |= MSG_EOR;
- return transport->recv.copied;
+ return read;
}
switch (ret) {
+ default:
+ break;
case -EMSGSIZE:
- return transport->recv.copied;
+ return read;
case 0:
return -ESHUTDOWN;
- default:
- if (ret < 0)
- return ret;
}
- return -EAGAIN;
+ return ret;
}
static size_t
@@ -537,7 +533,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
ret = xs_read_stream_request(transport, msg, flags, req);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_bc_request(req, ret);
+ xprt_complete_bc_request(req, transport->recv.copied);
return ret;
}
@@ -570,7 +566,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
spin_lock(&xprt->queue_lock);
if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
- xprt_complete_rqst(req->rq_task, ret);
+ xprt_complete_rqst(req->rq_task, transport->recv.copied);
xprt_unpin_rqst(req);
out:
spin_unlock(&xprt->queue_lock);
@@ -591,10 +587,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
if (ret <= 0)
goto out_err;
transport->recv.offset = ret;
- if (ret != want) {
- ret = -EAGAIN;
- goto out_err;
- }
+ if (transport->recv.offset != want)
+ return transport->recv.offset;
transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
RPC_FRAGMENT_SIZE_MASK;
transport->recv.offset -= sizeof(transport->recv.fraghdr);
@@ -602,6 +596,9 @@ xs_read_stream(struct sock_xprt *transport, int flags)
}
switch (be32_to_cpu(transport->recv.calldir)) {
+ default:
+ msg.msg_flags |= MSG_TRUNC;
+ break;
case RPC_CALL:
ret = xs_read_stream_call(transport, &msg, flags);
break;
@@ -616,6 +613,8 @@ xs_read_stream(struct sock_xprt *transport, int flags)
goto out_err;
read += ret;
if (transport->recv.offset < transport->recv.len) {
+ if (!(msg.msg_flags & MSG_TRUNC))
+ return read;
ret = xs_read_discard(transport->sock, &msg, flags,
transport->recv.len - transport->recv.offset);
if (ret <= 0)
@@ -623,7 +622,7 @@ xs_read_stream(struct sock_xprt *transport, int flags)
transport->recv.offset += ret;
read += ret;
if (transport->recv.offset != transport->recv.len)
- return -EAGAIN;
+ return read;
}
if (xs_read_stream_request_done(transport)) {
trace_xs_stream_read_request(transport);
@@ -653,7 +652,7 @@ static void xs_stream_data_receive(struct sock_xprt *transport)
clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
ret = xs_read_stream(transport, MSG_DONTWAIT);
- if (ret <= 0)
+ if (ret < 0)
break;
read += ret;
cond_resched();
--
2.19.2
On Tue, 2018-12-04 at 14:24 -0500, Trond Myklebust wrote:
> The RPC code is occasionally hanging when the receive code fails to
> empty the socket buffer due to a partial read of the data. When we
> convert that to an EAGAIN, it appears we occasionally leave data in
> the
> socket. The fix is to just keep reading until the socket returns
> EAGAIN/EWOULDBLOCK.
>
> Reported-by: Catalin Marinas <[email protected]>
> Reported-by: Cristian Marussi <[email protected]>
> Reported-by: Chuck Lever <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
> ---
Hi guys,
I think I managed to reproduce the hangs that you were reporting by
setting the r/wsize to 1024, and then running the xfstests generic/074
test. This patch (which should apply on top of the linux-next branch
that I pointed you to previously) appears to fix the hang for that test
case, so I'm hoping it will also fix the issues you reported.
This patch can also be retrieved as part of the 'testing' branch (
https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/testing
)
git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git testing
Cheers
Trond
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]
Hi
On 04/12/2018 19:31, Trond Myklebust wrote:
> On Tue, 2018-12-04 at 14:24 -0500, Trond Myklebust wrote:
>> The RPC code is occasionally hanging when the receive code fails to
>> empty the socket buffer due to a partial read of the data. When we
>> convert that to an EAGAIN, it appears we occasionally leave data in
>> the
>> socket. The fix is to just keep reading until the socket returns
>> EAGAIN/EWOULDBLOCK.
>>
>> Reported-by: Catalin Marinas <[email protected]>
>> Reported-by: Cristian Marussi <[email protected]>
>> Reported-by: Chuck Lever <[email protected]>
>> Signed-off-by: Trond Myklebust <[email protected]>
>> ---
>
> Hi guys,
>
> I think I managed to reproduce the hangs that you were reporting by
> setting the r/wsize to 1024, and then running the xfstests generic/074
> test. This patch (which should apply on top of the linux-next branch
> that I pointed you to previously) appears to fix the hang for that test
> case, so I'm hoping it will also fix the issues you reported.
Thanks !
I'll try it out.
Cheers
Cristian
>
> This patch can also be retrieved as part of the 'testing' branch (
> https://git.linux-nfs.org/?p=trondmy/linux-nfs.git;a=shortlog;h=refs/heads/testing
> )
>
> git pull git://git.linux-nfs.org/projects/trondmy/linux-nfs.git testing
>
> Cheers
> Trond
>
On Tue, Dec 04, 2018 at 02:24:39PM -0500, Trond Myklebust wrote:
> The RPC code is occasionally hanging when the receive code fails to
> empty the socket buffer due to a partial read of the data. When we
> convert that to an EAGAIN, it appears we occasionally leave data in the
> socket. The fix is to just keep reading until the socket returns
> EAGAIN/EWOULDBLOCK.
>
> Reported-by: Catalin Marinas <[email protected]>
> Reported-by: Cristian Marussi <[email protected]>
> Reported-by: Chuck Lever <[email protected]>
> Signed-off-by: Trond Myklebust <[email protected]>
It seems to do the trick for me. Thanks.
Tested-by: Catalin Marinas <[email protected]>
Hi
On 05/12/2018 09:14, Cristian Marussi wrote:
> Hi
>
> On 04/12/2018 19:31, Trond Myklebust wrote:
>> On Tue, 2018-12-04 at 14:24 -0500, Trond Myklebust wrote:
>>> The RPC code is occasionally hanging when the receive code fails to
>>> empty the socket buffer due to a partial read of the data. When we
>>> convert that to an EAGAIN, it appears we occasionally leave data in
>>> the
>>> socket. The fix is to just keep reading until the socket returns
>>> EAGAIN/EWOULDBLOCK.
>>>
>>> Reported-by: Catalin Marinas <[email protected]>
>>> Reported-by: Cristian Marussi <[email protected]>
>>> Reported-by: Chuck Lever <[email protected]>
>>> Signed-off-by: Trond Myklebust <[email protected]>
>>> ---
>>
[snip]
Applying the patch on top of your linux-nfs next:
8739cbe10efb (HEAD -> linux-next) SUNRPC: Fix RPC receive hangs
0a9a4304f361 (origin/linux-next) SUNRPC: Fix a potential race in xprt_connect()
71700bb96047 SUNRPC: Fix a memory leak in call_encode()
8dae5398ab1a SUNRPC: Fix leak of krb5p encode pages
and testing on arm64 64k pages without rsize workaround.
SOLVES for me. No hang or slowdown launching LKP/LTP.
dbench results seems fine again.(as with rsize workaround)
Operation Count AvgLat MaxLat
----------------------------------------
NTCreateX 106349 11.479 400.908
Close 78111 16.508 414.883
Rename 4500 19.722 246.805
Unlink 21475 3.625 196.797
Qpathinfo 96460 1.786 278.724
Qfileinfo 16829 10.044 233.708
Qfsinfo 17615 2.119 319.131
Sfileinfo 8700 16.819 145.051
Find 37251 3.389 264.889
WriteX 52509 0.048 6.663
ReadX 166675 0.655 189.954
LockX 348 11.840 227.481
UnlockX 348 2.563 72.949
Flush 7470 20.296 274.855
Throughput 5.50353 MB/sec 6 clients 6 procs max_latency=414.902 ms
Thanks
Cristian