2022-03-22 01:48:35

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v2 3/9] SUNRPC: Fix unx_lookup_cred() allocation

From: Trond Myklebust <[email protected]>

Default to the same mempool allocation strategy as for rpc_malloc().

Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/auth_unix.c | 18 ++++++++++--------
1 file changed, 10 insertions(+), 8 deletions(-)

diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index c629d366030e..1e091d3fa607 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -40,17 +40,19 @@ unx_destroy(struct rpc_auth *auth)
/*
* Lookup AUTH_UNIX creds for current process
*/
-static struct rpc_cred *
-unx_lookup_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
+static struct rpc_cred *unx_lookup_cred(struct rpc_auth *auth,
+ struct auth_cred *acred, int flags)
{
- gfp_t gfp = GFP_KERNEL;
struct rpc_cred *ret;

- if (flags & RPCAUTH_LOOKUP_ASYNC)
- gfp = GFP_NOWAIT | __GFP_NOWARN;
- ret = mempool_alloc(unix_pool, gfp);
- if (!ret)
- return ERR_PTR(-ENOMEM);
+ ret = kmalloc(sizeof(*ret), rpc_task_gfp_mask());
+ if (!ret) {
+ if (!(flags & RPCAUTH_LOOKUP_ASYNC))
+ return ERR_PTR(-ENOMEM);
+ ret = mempool_alloc(unix_pool, GFP_NOWAIT);
+ if (!ret)
+ return ERR_PTR(-ENOMEM);
+ }
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
--
2.35.1


2022-03-22 01:48:44

by Trond Myklebust

[permalink] [raw]
Subject: [PATCH v2 4/9] SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent

From: Trond Myklebust <[email protected]>

Make sure that rpciod and xprtiod are always using the same slab
allocation modes.

Signed-off-by: Trond Myklebust <[email protected]>
---
net/sunrpc/backchannel_rqst.c | 8 ++++----
net/sunrpc/rpcb_clnt.c | 4 ++--
net/sunrpc/socklib.c | 3 ++-
net/sunrpc/xprt.c | 5 +----
net/sunrpc/xprtsock.c | 11 ++++++-----
5 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 22a2c235abf1..5a6b61dcdf2d 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -75,9 +75,9 @@ static int xprt_alloc_xdr_buf(struct xdr_buf *buf, gfp_t gfp_flags)
return 0;
}

-static
-struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt, gfp_t gfp_flags)
+static struct rpc_rqst *xprt_alloc_bc_req(struct rpc_xprt *xprt)
{
+ gfp_t gfp_flags = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
struct rpc_rqst *req;

/* Pre-allocate one backchannel rpc_rqst */
@@ -154,7 +154,7 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs)
INIT_LIST_HEAD(&tmp_list);
for (i = 0; i < min_reqs; i++) {
/* Pre-allocate one backchannel rpc_rqst */
- req = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ req = xprt_alloc_bc_req(xprt);
if (req == NULL) {
printk(KERN_ERR "Failed to create bc rpc_rqst\n");
goto out_free;
@@ -343,7 +343,7 @@ struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid)
break;
} else if (req)
break;
- new = xprt_alloc_bc_req(xprt, GFP_KERNEL);
+ new = xprt_alloc_bc_req(xprt);
} while (new);
return req;
}
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 0fdeb8666bfd..5a8e6d46809a 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -714,7 +714,7 @@ void rpcb_getport_async(struct rpc_task *task)
goto bailout_nofree;
}

- map = kzalloc(sizeof(struct rpcbind_args), GFP_KERNEL);
+ map = kzalloc(sizeof(struct rpcbind_args), rpc_task_gfp_mask());
if (!map) {
status = -ENOMEM;
goto bailout_release_client;
@@ -730,7 +730,7 @@ void rpcb_getport_async(struct rpc_task *task)
case RPCBVERS_4:
case RPCBVERS_3:
map->r_netid = xprt->address_strings[RPC_DISPLAY_NETID];
- map->r_addr = rpc_sockaddr2uaddr(sap, GFP_KERNEL);
+ map->r_addr = rpc_sockaddr2uaddr(sap, rpc_task_gfp_mask());
if (!map->r_addr) {
status = -ENOMEM;
goto bailout_free_args;
diff --git a/net/sunrpc/socklib.c b/net/sunrpc/socklib.c
index d52313af82bc..05b38bf68316 100644
--- a/net/sunrpc/socklib.c
+++ b/net/sunrpc/socklib.c
@@ -15,6 +15,7 @@
#include <linux/pagemap.h>
#include <linux/udp.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/sunrpc/sched.h>
#include <linux/sunrpc/xdr.h>
#include <linux/export.h>

@@ -222,7 +223,7 @@ static int xprt_send_pagedata(struct socket *sock, struct msghdr *msg,
{
int err;

- err = xdr_alloc_bvec(xdr, GFP_KERNEL);
+ err = xdr_alloc_bvec(xdr, rpc_task_gfp_mask());
if (err < 0)
return err;

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index bbe913121f43..744c6c1d536f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1679,15 +1679,12 @@ static bool xprt_throttle_congested(struct rpc_xprt *xprt, struct rpc_task *task
static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt)
{
struct rpc_rqst *req = ERR_PTR(-EAGAIN);
- gfp_t gfp_mask = GFP_KERNEL;

if (xprt->num_reqs >= xprt->max_reqs)
goto out;
++xprt->num_reqs;
spin_unlock(&xprt->reserve_lock);
- if (current->flags & PF_WQ_WORKER)
- gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
- req = kzalloc(sizeof(*req), gfp_mask);
+ req = kzalloc(sizeof(*req), rpc_task_gfp_mask());
spin_lock(&xprt->reserve_lock);
if (req != NULL)
goto out;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8909c768fe71..b52eaa8a0cda 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -428,9 +428,9 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
offset += want;
}

- want = xs_alloc_sparse_pages(buf,
- min_t(size_t, count - offset, buf->page_len),
- GFP_KERNEL);
+ want = xs_alloc_sparse_pages(
+ buf, min_t(size_t, count - offset, buf->page_len),
+ GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (seek < want) {
ret = xs_read_bvec(sock, msg, flags, buf->bvec,
xdr_buf_pagecount(buf),
@@ -826,7 +826,8 @@ static void
xs_stream_prepare_request(struct rpc_rqst *req)
{
xdr_free_bvec(&req->rq_rcv_buf);
- req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
+ req->rq_task->tk_status = xdr_alloc_bvec(
+ &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
}

/*
@@ -2487,7 +2488,7 @@ static int bc_malloc(struct rpc_task *task)
return -EINVAL;
}

- page = alloc_page(GFP_KERNEL);
+ page = alloc_page(GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
if (!page)
return -ENOMEM;

--
2.35.1

2022-03-25 19:10:04

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH v2 4/9] SUNRPC: Make the rpciod and xprtiod slab allocation modes consistent

On Tue, 22 Mar 2022, [email protected] wrote:
> From: Trond Myklebust <[email protected]>
>
> Make sure that rpciod and xprtiod are always using the same slab
> allocation modes.
>
> Signed-off-by: Trond Myklebust <[email protected]>
....
> xs_stream_prepare_request(struct rpc_rqst *req)
> {
> xdr_free_bvec(&req->rq_rcv_buf);
> - req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_KERNEL);
> + req->rq_task->tk_status = xdr_alloc_bvec(
> + &req->rq_rcv_buf, GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
> }

I did some testing of swap-over-NFS, and got a crash quite quickly, due
to this change.
The problem is that GFP_KERNEL allocations almost never fail.
Multi-page allocations might occasionally fail, and others might fail
for a process that has been killed by the OOM killer (or maybe just has
a fatal signal pending), but in general GFP_KERNEL is more likely to
wait (and wait and wait) than to fail.
So the failure paths haven't been tested.

xs_stream_prepare_request() is called from xprt_request_prepare(), which
is called from xprt_request_enqueue_receive() which is called in
call_encode() *after* ->tk_status has been tested.
So when the above code sets ->tk_status to -ENOMEM - which is now more
likely - that fact is ignored and we get a crash

[ 298.911356] Workqueue: xprtiod xs_stream_data_receive_workfn
[ 298.911696] RIP: 0010:_copy_to_iter+0x1cc/0x435
..
[ 298.918259] __skb_datagram_iter+0x64/0x225
[ 298.918507] skb_copy_datagram_iter+0xe9/0xf2
[ 298.918767] tcp_recvmsg_locked+0x653/0x77e
[ 298.919015] tcp_recvmsg+0x100/0x188
[ 298.919226] inet_recvmsg+0x5d/0x86
[ 298.919431] xs_read_stream_request.constprop.0+0x247/0x378
[ 298.919754] xs_read_stream.constprop.0+0x1c2/0x39b
[ 298.920038] xs_stream_data_receive_workfn+0x50/0x160
[ 298.920331] process_one_work+0x267/0x422
[ 298.920568] worker_thread+0x193/0x234

So we really need to audit all these places where we add __GFP_NORETRY
and ensure errors are actually handled.

For call_encode(), it might be easiest to move
/* Add task to reply queue before transmission to avoid races */
if (rpc_reply_expected(task))
xprt_request_enqueue_receive(task);

up before the
/* Did the encode result in an error condition? */
if (task->tk_status != 0) {

and change it to

/* Add task to reply queue before transmission to avoid races */
if (task->tk_status == 0 && rpc_reply_expected(task))
xprt_request_enqueue_receive(task);

I'll try a bit more testing and auditing.

Thanks,
NeilBrown