By switching to an XFS-backed export, I am able to reproduce the
ibcomp worker crash on my client with xfstests generic/013.
For the failing LISTXATTRS operation, xdr_inline_pages() is called
with page_len=12 and buflen=128.
- When ->send_request() is called, rpcrdma_marshal_req() does not
set up a Reply chunk because buflen is smaller than the inline
threshold. Thus rpcrdma_convert_iovs() does not get invoked at
all and the transport's XDRBUF_SPARSE_PAGES logic is not invoked
on the receive buffer.
- During reply processing, rpcrdma_inline_fixup() tries to copy
received data into rq_rcv_buf->pages because page_len is positive.
But there are no receive pages because rpcrdma_marshal_req() never
allocated them.
The result is that the ibcomp worker faults and dies. Sometimes that
causes a visible crash, and sometimes it results in a transport hang
without other symptoms.
RPC/RDMA's XDRBUF_SPARSE_PAGES support is not entirely correct, and
should eventually be fixed or replaced. However, my preference is
that upper-layer operations should explicitly allocate their receive
buffers (using GFP_KERNEL) when possible, rather than relying on
XDRBUF_SPARSE_PAGES.
Reported-by: Olga kornievskaia <[email protected]>
Suggested-by: Olga kornievskaia <[email protected]>
Fixes: c10a75145feb ("NFSv4.2: add the extended attribute proc functions.")
Signed-off-by: Chuck Lever <[email protected]>
Reviewed-by: Olga kornievskaia <[email protected]>
Reviewed-by: Frank van der Linden <[email protected]>
Tested-by: Olga kornievskaia <[email protected]>
---
fs/nfs/nfs42proc.c | 17 ++++++++++-------
fs/nfs/nfs42xdr.c | 1 -
2 files changed, 10 insertions(+), 8 deletions(-)
Changes since v1:
- Added a Fixes: tag
- Added Reviewed-by:, etc tags
- Clarified the patch description
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 2b2211d1234e..24810305ec1c 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1241,7 +1241,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
.rpc_resp = &res,
};
u32 xdrlen;
- int ret, np;
+ int ret, np, i;
res.scratch = alloc_page(GFP_KERNEL);
@@ -1253,10 +1253,14 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
xdrlen = server->lxasize;
np = xdrlen / PAGE_SIZE + 1;
+ ret = -ENOMEM;
pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
- if (pages == NULL) {
- __free_page(res.scratch);
- return -ENOMEM;
+ if (pages == NULL)
+ goto out_free;
+ for (i = 0; i < np; i++) {
+ pages[i] = alloc_page(GFP_KERNEL);
+ if (!pages[i])
+ goto out_free;
}
arg.xattr_pages = pages;
@@ -1271,14 +1275,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
*eofp = res.eof;
}
+out_free:
while (--np >= 0) {
if (pages[np])
__free_page(pages[np]);
}
-
- __free_page(res.scratch);
kfree(pages);
-
+ __free_page(res.scratch);
return ret;
}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6e060a88f98c..8432bd6b95f0 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1528,7 +1528,6 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
hdr.replen);
- req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
encode_nops(&hdr);
}
On Tue, Nov 24, 2020 at 05:21:47PM -0500, Chuck Lever wrote:
>
>
> By switching to an XFS-backed export, I am able to reproduce the
> ibcomp worker crash on my client with xfstests generic/013.
>
> For the failing LISTXATTRS operation, xdr_inline_pages() is called
> with page_len=12 and buflen=128.
>
> - When ->send_request() is called, rpcrdma_marshal_req() does not
> set up a Reply chunk because buflen is smaller than the inline
> threshold. Thus rpcrdma_convert_iovs() does not get invoked at
> all and the transport's XDRBUF_SPARSE_PAGES logic is not invoked
> on the receive buffer.
>
> - During reply processing, rpcrdma_inline_fixup() tries to copy
> received data into rq_rcv_buf->pages because page_len is positive.
> But there are no receive pages because rpcrdma_marshal_req() never
> allocated them.
>
> The result is that the ibcomp worker faults and dies. Sometimes that
> causes a visible crash, and sometimes it results in a transport hang
> without other symptoms.
>
> RPC/RDMA's XDRBUF_SPARSE_PAGES support is not entirely correct, and
> should eventually be fixed or replaced. However, my preference is
> that upper-layer operations should explicitly allocate their receive
> buffers (using GFP_KERNEL) when possible, rather than relying on
> XDRBUF_SPARSE_PAGES.
>
> Reported-by: Olga kornievskaia <[email protected]>
> Suggested-by: Olga kornievskaia <[email protected]>
> Fixes: c10a75145feb ("NFSv4.2: add the extended attribute proc functions.")
> Signed-off-by: Chuck Lever <[email protected]>
> Reviewed-by: Olga kornievskaia <[email protected]>
> Reviewed-by: Frank van der Linden <[email protected]>
> Tested-by: Olga kornievskaia <[email protected]>
> ---
> fs/nfs/nfs42proc.c | 17 ++++++++++-------
> fs/nfs/nfs42xdr.c | 1 -
> 2 files changed, 10 insertions(+), 8 deletions(-)
>
> Changes since v1:
> - Added a Fixes: tag
> - Added Reviewed-by:, etc tags
> - Clarified the patch description
>
>
> diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
> index 2b2211d1234e..24810305ec1c 100644
> --- a/fs/nfs/nfs42proc.c
> +++ b/fs/nfs/nfs42proc.c
> @@ -1241,7 +1241,7 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
> .rpc_resp = &res,
> };
> u32 xdrlen;
> - int ret, np;
> + int ret, np, i;
>
>
> res.scratch = alloc_page(GFP_KERNEL);
> @@ -1253,10 +1253,14 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
> xdrlen = server->lxasize;
> np = xdrlen / PAGE_SIZE + 1;
>
> + ret = -ENOMEM;
> pages = kcalloc(np, sizeof(struct page *), GFP_KERNEL);
> - if (pages == NULL) {
> - __free_page(res.scratch);
> - return -ENOMEM;
> + if (pages == NULL)
> + goto out_free;
> + for (i = 0; i < np; i++) {
> + pages[i] = alloc_page(GFP_KERNEL);
> + if (!pages[i])
> + goto out_free;
> }
>
> arg.xattr_pages = pages;
> @@ -1271,14 +1275,13 @@ static ssize_t _nfs42_proc_listxattrs(struct inode *inode, void *buf,
> *eofp = res.eof;
> }
>
> +out_free:
> while (--np >= 0) {
> if (pages[np])
> __free_page(pages[np]);
> }
> -
> - __free_page(res.scratch);
> kfree(pages);
> -
> + __free_page(res.scratch);
> return ret;
>
> }
I missed this in v1 - if you use goto out_free after failing to allocate
the pages array, this will still try to use it when trying to free the
individual pages. Looks like you'll need a second label to goto at the
end for the pages == NULL case.
- Frank