2018-12-17 16:23:58

by Vasily Averin

[permalink] [raw]
Subject: [PATCH 1/4] nfs: use-after-free in svc_process_common()

if node have NFSv41+ mounts inside several net namespaces
it can lead to use-after-free in svc_process_common()

svc_process_common()
/* Setup reply header */
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE

svc_process_common() can use already freed rqstp->rq_xprt,
it was assigned in bc_svc_process() where it was taken from serv->sv_bc_xprt.

serv is global structure but sv_bc_xprt is assigned per-netnamespace,
so if nfsv41+ shares are mounted in several containers together
bc_svc_process() can use wrong backchannel or even access freed memory.

To find correct svc_xprt of client-related backchannel
bc_svc_process() now calls new .bc_get_xprt callback
that executes svc_find_xprt() with proper xprt name.

Signed-off-by: Vasily Averin <[email protected]>
---
include/linux/sunrpc/xprt.h | 1 +
net/sunrpc/svc.c | 22 ++++++++++++++++------
net/sunrpc/xprtrdma/backchannel.c | 5 +++++
net/sunrpc/xprtrdma/transport.c | 1 +
net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
net/sunrpc/xprtsock.c | 7 +++++++
6 files changed, 31 insertions(+), 6 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index a4ab4f8d9140..031d2843a002 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -158,6 +158,7 @@ struct rpc_xprt_ops {
int (*bc_setup)(struct rpc_xprt *xprt,
unsigned int min_reqs);
int (*bc_up)(struct svc_serv *serv, struct net *net);
+ struct svc_xprt*(*bc_get_xprt)(struct svc_serv *serv, struct net *net);
size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
void (*bc_free_rqst)(struct rpc_rqst *rqst);
void (*bc_destroy)(struct rpc_xprt *xprt,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d13e05f1a990..a7264fd1b3db 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1450,16 +1450,22 @@ int
bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
struct svc_rqst *rqstp)
{
+ struct net *net = req->rq_xprt->xprt_net;
struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
struct rpc_task *task;
+ struct svc_xprt *s_xprt;
int proc_error;
int error;

dprintk("svc: %s(%p)\n", __func__, req);

+ s_xprt = req->rq_xprt->ops->bc_get_xprt(serv, net);
+ if (!s_xprt)
+ goto proc_error;
+
/* Build the svc_rqst used by the common processing routine */
- rqstp->rq_xprt = serv->sv_bc_xprt;
+ rqstp->rq_xprt = s_xprt;
rqstp->rq_xid = req->rq_xid;
rqstp->rq_prot = req->rq_xprt->prot;
rqstp->rq_server = serv;
@@ -1494,13 +1500,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,

/* Parse and execute the bc call */
proc_error = svc_process_common(rqstp, argv, resv);
+ svc_xprt_put(rqstp->rq_xprt);

atomic_inc(&req->rq_xprt->bc_free_slots);
- if (!proc_error) {
- /* Processing error: drop the request */
- xprt_free_bc_request(req);
- return 0;
- }
+ if (!proc_error)
+ goto proc_error;

/* Finally, send the reply synchronously */
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
@@ -1517,6 +1521,12 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
out:
dprintk("svc: %s(), error=%d\n", __func__, error);
return error;
+
+proc_error:
+ /* Processing error: drop the request */
+ xprt_free_bc_request(req);
+ error = -EINVAL;
+ goto out;
}
EXPORT_SYMBOL_GPL(bc_svc_process);
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index e5b367a3e517..3e06aeacda43 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -133,6 +133,11 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
return 0;
}

+struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net)
+{
+ return svc_find_xprt(serv, "rdma-bc", net, AF_UNSPEC, 0);
+}
+
/**
* xprt_rdma_bc_maxpayload - Return maximum backchannel message size
* @xprt: transport
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index ae2a83828953..41d67de93531 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -828,6 +828,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
.bc_setup = xprt_rdma_bc_setup,
.bc_up = xprt_rdma_bc_up,
+ .bc_get_xprt = xprt_rdma_bc_get_xprt,
.bc_maxpayload = xprt_rdma_bc_maxpayload,
.bc_free_rqst = xprt_rdma_bc_free_rqst,
.bc_destroy = xprt_rdma_bc_destroy,
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index a13ccb643ce0..2726d71052a8 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -662,6 +662,7 @@ void xprt_rdma_cleanup(void);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
int xprt_rdma_bc_up(struct svc_serv *, struct net *);
+struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net);
size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 8a5e823e0b33..16f9c7720465 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1411,6 +1411,12 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
return 0;
}

+static struct svc_xprt *xs_tcp_bc_get_xprt(struct svc_serv *serv,
+ struct net *net)
+{
+ return svc_find_xprt(serv, "tcp-bc", net, AF_UNSPEC, 0);
+}
+
static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
{
return PAGE_SIZE;
@@ -2668,6 +2674,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
#ifdef CONFIG_SUNRPC_BACKCHANNEL
.bc_setup = xprt_setup_bc,
.bc_up = xs_tcp_bc_up,
+ .bc_get_xprt = xs_tcp_bc_get_xprt,
.bc_maxpayload = xs_tcp_bc_maxpayload,
.bc_free_rqst = xprt_free_bc_rqst,
.bc_destroy = xprt_destroy_bc,
--
2.17.1



2018-12-17 17:49:27

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Mon, 2018-12-17 at 19:23 +0300, Vasily Averin wrote:
> if node have NFSv41+ mounts inside several net namespaces
> it can lead to use-after-free in svc_process_common()
>
> svc_process_common()
> /* Setup reply header */
> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE
>
> svc_process_common() can use already freed rqstp->rq_xprt,
> it was assigned in bc_svc_process() where it was taken from serv->sv_bc_xprt.
>
> serv is global structure but sv_bc_xprt is assigned per-netnamespace,
> so if nfsv41+ shares are mounted in several containers together
> bc_svc_process() can use wrong backchannel or even access freed memory.
>
> To find correct svc_xprt of client-related backchannel
> bc_svc_process() now calls new .bc_get_xprt callback
> that executes svc_find_xprt() with proper xprt name.
>
> Signed-off-by: Vasily Averin <[email protected]>
> ---
> include/linux/sunrpc/xprt.h | 1 +
> net/sunrpc/svc.c | 22 ++++++++++++++++------
> net/sunrpc/xprtrdma/backchannel.c | 5 +++++
> net/sunrpc/xprtrdma/transport.c | 1 +
> net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
> net/sunrpc/xprtsock.c | 7 +++++++
> 6 files changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index a4ab4f8d9140..031d2843a002 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -158,6 +158,7 @@ struct rpc_xprt_ops {
> int (*bc_setup)(struct rpc_xprt *xprt,
> unsigned int min_reqs);
> int (*bc_up)(struct svc_serv *serv, struct net *net);
> + struct svc_xprt*(*bc_get_xprt)(struct svc_serv *serv, struct net *net);
> size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
> void (*bc_free_rqst)(struct rpc_rqst *rqst);
> void (*bc_destroy)(struct rpc_xprt *xprt,
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index d13e05f1a990..a7264fd1b3db 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1450,16 +1450,22 @@ int
> bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
> struct svc_rqst *rqstp)
> {
> + struct net *net = req->rq_xprt->xprt_net;
> struct kvec *argv = &rqstp->rq_arg.head[0];
> struct kvec *resv = &rqstp->rq_res.head[0];
> struct rpc_task *task;
> + struct svc_xprt *s_xprt;
> int proc_error;
> int error;
>
> dprintk("svc: %s(%p)\n", __func__, req);
>
> + s_xprt = req->rq_xprt->ops->bc_get_xprt(serv, net);
> + if (!s_xprt)
> + goto proc_error;
> +
> /* Build the svc_rqst used by the common processing routine */
> - rqstp->rq_xprt = serv->sv_bc_xprt;
> + rqstp->rq_xprt = s_xprt;
> rqstp->rq_xid = req->rq_xid;
> rqstp->rq_prot = req->rq_xprt->prot;
> rqstp->rq_server = serv;
> @@ -1494,13 +1500,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
>
> /* Parse and execute the bc call */
> proc_error = svc_process_common(rqstp, argv, resv);
> + svc_xprt_put(rqstp->rq_xprt);
>
> atomic_inc(&req->rq_xprt->bc_free_slots);
> - if (!proc_error) {
> - /* Processing error: drop the request */
> - xprt_free_bc_request(req);
> - return 0;
> - }
> + if (!proc_error)
> + goto proc_error;
>
> /* Finally, send the reply synchronously */
> memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
> @@ -1517,6 +1521,12 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
> out:
> dprintk("svc: %s(), error=%d\n", __func__, error);
> return error;
> +
> +proc_error:
> + /* Processing error: drop the request */
> + xprt_free_bc_request(req);
> + error = -EINVAL;
> + goto out;
> }
> EXPORT_SYMBOL_GPL(bc_svc_process);
> #endif /* CONFIG_SUNRPC_BACKCHANNEL */
> diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
> index e5b367a3e517..3e06aeacda43 100644
> --- a/net/sunrpc/xprtrdma/backchannel.c
> +++ b/net/sunrpc/xprtrdma/backchannel.c
> @@ -133,6 +133,11 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
> return 0;
> }
>
> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net)
> +{
> + return svc_find_xprt(serv, "rdma-bc", net, AF_UNSPEC, 0);
> +}
> +
> /**
> * xprt_rdma_bc_maxpayload - Return maximum backchannel message size
> * @xprt: transport
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index ae2a83828953..41d67de93531 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -828,6 +828,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> .bc_setup = xprt_rdma_bc_setup,
> .bc_up = xprt_rdma_bc_up,
> + .bc_get_xprt = xprt_rdma_bc_get_xprt,
> .bc_maxpayload = xprt_rdma_bc_maxpayload,
> .bc_free_rqst = xprt_rdma_bc_free_rqst,
> .bc_destroy = xprt_rdma_bc_destroy,
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index a13ccb643ce0..2726d71052a8 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -662,6 +662,7 @@ void xprt_rdma_cleanup(void);
> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
> int xprt_rdma_bc_up(struct svc_serv *, struct net *);
> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net);
> size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
> int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
> void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..16f9c7720465 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1411,6 +1411,12 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
> return 0;
> }
>
> +static struct svc_xprt *xs_tcp_bc_get_xprt(struct svc_serv *serv,
> + struct net *net)
> +{
> + return svc_find_xprt(serv, "tcp-bc", net, AF_UNSPEC, 0);
> +}
> +
> static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
> {
> return PAGE_SIZE;
> @@ -2668,6 +2674,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
> #ifdef CONFIG_SUNRPC_BACKCHANNEL
> .bc_setup = xprt_setup_bc,
> .bc_up = xs_tcp_bc_up,
> + .bc_get_xprt = xs_tcp_bc_get_xprt,
> .bc_maxpayload = xs_tcp_bc_maxpayload,
> .bc_free_rqst = xprt_free_bc_rqst,
> .bc_destroy = xprt_destroy_bc,

Reviewed-by: Jeff Layton <[email protected]>


2018-12-17 21:50:28

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Mon, Dec 17, 2018 at 07:23:54PM +0300, Vasily Averin wrote:
> if node have NFSv41+ mounts inside several net namespaces
> it can lead to use-after-free in svc_process_common()
>
> svc_process_common()
> /* Setup reply header */
> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE
>
> svc_process_common() can use already freed rqstp->rq_xprt,
> it was assigned in bc_svc_process() where it was taken from serv->sv_bc_xprt.
>
> serv is global structure but sv_bc_xprt is assigned per-netnamespace,
> so if nfsv41+ shares are mounted in several containers together
> bc_svc_process() can use wrong backchannel or even access freed memory.
>
> To find correct svc_xprt of client-related backchannel
> bc_svc_process() now calls new .bc_get_xprt callback
> that executes svc_find_xprt() with proper xprt name.

This stuff is confusing and I need to stare at it some more before I
understand, but it's weird that we'd need to search for the right xprt.

We know which connection the backchannel request came over, and there
should only be one backchannel using that connection, why can't we find
it by just chasing pointers the right way?

OK, I do need to look at it more.

--b.

>
> Signed-off-by: Vasily Averin <[email protected]>
> ---
> include/linux/sunrpc/xprt.h | 1 +
> net/sunrpc/svc.c | 22 ++++++++++++++++------
> net/sunrpc/xprtrdma/backchannel.c | 5 +++++
> net/sunrpc/xprtrdma/transport.c | 1 +
> net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
> net/sunrpc/xprtsock.c | 7 +++++++
> 6 files changed, 31 insertions(+), 6 deletions(-)
>
> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
> index a4ab4f8d9140..031d2843a002 100644
> --- a/include/linux/sunrpc/xprt.h
> +++ b/include/linux/sunrpc/xprt.h
> @@ -158,6 +158,7 @@ struct rpc_xprt_ops {
> int (*bc_setup)(struct rpc_xprt *xprt,
> unsigned int min_reqs);
> int (*bc_up)(struct svc_serv *serv, struct net *net);
> + struct svc_xprt*(*bc_get_xprt)(struct svc_serv *serv, struct net *net);
> size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
> void (*bc_free_rqst)(struct rpc_rqst *rqst);
> void (*bc_destroy)(struct rpc_xprt *xprt,
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index d13e05f1a990..a7264fd1b3db 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1450,16 +1450,22 @@ int
> bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
> struct svc_rqst *rqstp)
> {
> + struct net *net = req->rq_xprt->xprt_net;
> struct kvec *argv = &rqstp->rq_arg.head[0];
> struct kvec *resv = &rqstp->rq_res.head[0];
> struct rpc_task *task;
> + struct svc_xprt *s_xprt;
> int proc_error;
> int error;
>
> dprintk("svc: %s(%p)\n", __func__, req);
>
> + s_xprt = req->rq_xprt->ops->bc_get_xprt(serv, net);
> + if (!s_xprt)
> + goto proc_error;
> +
> /* Build the svc_rqst used by the common processing routine */
> - rqstp->rq_xprt = serv->sv_bc_xprt;
> + rqstp->rq_xprt = s_xprt;
> rqstp->rq_xid = req->rq_xid;
> rqstp->rq_prot = req->rq_xprt->prot;
> rqstp->rq_server = serv;
> @@ -1494,13 +1500,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
>
> /* Parse and execute the bc call */
> proc_error = svc_process_common(rqstp, argv, resv);
> + svc_xprt_put(rqstp->rq_xprt);
>
> atomic_inc(&req->rq_xprt->bc_free_slots);
> - if (!proc_error) {
> - /* Processing error: drop the request */
> - xprt_free_bc_request(req);
> - return 0;
> - }
> + if (!proc_error)
> + goto proc_error;
>
> /* Finally, send the reply synchronously */
> memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
> @@ -1517,6 +1521,12 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
> out:
> dprintk("svc: %s(), error=%d\n", __func__, error);
> return error;
> +
> +proc_error:
> + /* Processing error: drop the request */
> + xprt_free_bc_request(req);
> + error = -EINVAL;
> + goto out;
> }
> EXPORT_SYMBOL_GPL(bc_svc_process);
> #endif /* CONFIG_SUNRPC_BACKCHANNEL */
> diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
> index e5b367a3e517..3e06aeacda43 100644
> --- a/net/sunrpc/xprtrdma/backchannel.c
> +++ b/net/sunrpc/xprtrdma/backchannel.c
> @@ -133,6 +133,11 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
> return 0;
> }
>
> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net)
> +{
> + return svc_find_xprt(serv, "rdma-bc", net, AF_UNSPEC, 0);
> +}
> +
> /**
> * xprt_rdma_bc_maxpayload - Return maximum backchannel message size
> * @xprt: transport
> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
> index ae2a83828953..41d67de93531 100644
> --- a/net/sunrpc/xprtrdma/transport.c
> +++ b/net/sunrpc/xprtrdma/transport.c
> @@ -828,6 +828,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> .bc_setup = xprt_rdma_bc_setup,
> .bc_up = xprt_rdma_bc_up,
> + .bc_get_xprt = xprt_rdma_bc_get_xprt,
> .bc_maxpayload = xprt_rdma_bc_maxpayload,
> .bc_free_rqst = xprt_rdma_bc_free_rqst,
> .bc_destroy = xprt_rdma_bc_destroy,
> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
> index a13ccb643ce0..2726d71052a8 100644
> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
> @@ -662,6 +662,7 @@ void xprt_rdma_cleanup(void);
> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
> int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
> int xprt_rdma_bc_up(struct svc_serv *, struct net *);
> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net);
> size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
> int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
> void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
> index 8a5e823e0b33..16f9c7720465 100644
> --- a/net/sunrpc/xprtsock.c
> +++ b/net/sunrpc/xprtsock.c
> @@ -1411,6 +1411,12 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
> return 0;
> }
>
> +static struct svc_xprt *xs_tcp_bc_get_xprt(struct svc_serv *serv,
> + struct net *net)
> +{
> + return svc_find_xprt(serv, "tcp-bc", net, AF_UNSPEC, 0);
> +}
> +
> static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
> {
> return PAGE_SIZE;
> @@ -2668,6 +2674,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
> #ifdef CONFIG_SUNRPC_BACKCHANNEL
> .bc_setup = xprt_setup_bc,
> .bc_up = xs_tcp_bc_up,
> + .bc_get_xprt = xs_tcp_bc_get_xprt,
> .bc_maxpayload = xs_tcp_bc_maxpayload,
> .bc_free_rqst = xprt_free_bc_rqst,
> .bc_destroy = xprt_destroy_bc,
> --
> 2.17.1

2018-12-18 06:46:07

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/18/18 12:50 AM, J. Bruce Fields wrote:
> On Mon, Dec 17, 2018 at 07:23:54PM +0300, Vasily Averin wrote:
>> if node have NFSv41+ mounts inside several net namespaces
>> it can lead to use-after-free in svc_process_common()
>>
>> svc_process_common()
>> /* Setup reply header */
>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<< HERE
>>
>> svc_process_common() can use already freed rqstp->rq_xprt,
>> it was assigned in bc_svc_process() where it was taken from serv->sv_bc_xprt.
>>
>> serv is global structure but sv_bc_xprt is assigned per-netnamespace,
>> so if nfsv41+ shares are mounted in several containers together
>> bc_svc_process() can use wrong backchannel or even access freed memory.
>>
>> To find correct svc_xprt of client-related backchannel
>> bc_svc_process() now calls new .bc_get_xprt callback
>> that executes svc_find_xprt() with proper xprt name.
>
> This stuff is confusing and I need to stare at it some more before I
> understand, but it's weird that we'd need to search for the right xprt.

All NFS clients in all net namespaces used the same minorversion
shares common nfs_callback_data taken from global nfs_callback_info array.

Moreover these clients can use either rdma or nfs transport,
however only one of them can be used in one net namespace.

Each net namespace must have own backchannel,
it cannot depend on other net namespaces,
because at least they can use different transports.

So one svc_serv should be able to handle several (per-netns) backchannels.

Frankly speaking If you prefer I can easily convert global nfs_callback_info to per net-namespace.
I've checked, it works too. However current solution looks better for me.

> We know which connection the backchannel request came over, and there
> should only be one backchannel using that connection, why can't we find
> it by just chasing pointers the right way?

it is allocated by using follwing calltrace:
nfs_callback_up
nfs_callback_up_net
xprt->ops->bc_up(serv, net) -> xs_tcp_bc_up
svc_create_xprt(serv, "tcp-bc")
__svc_xpo_create
svc_bc_tcp_create
svc_bc_create_socket

Here backchannel's svc_sock/svc/xprt is created.
It is per-netns and therefore it cannot be saved as pointer on global svc_serv.

It could be saved on some xprt related to forechannel,
I've expected it was done already -- but it was not done.
I've tried to find any way to do it -- but without success,
according structures seems are not accessible in svc_bc_tcp_create.

Finally I've found that backchannel's xprt is added into serv->sv_permsocks
and svc_find_xprt can find it by name.

It would be great if you can advise some more simple way.

>
> OK, I do need to look at it more.

It is quite important for containers so I think this patch (or any alternative solution)
should be pushed in stable@.


> --b.
>
>>
>> Signed-off-by: Vasily Averin <[email protected]>
>> ---
>> include/linux/sunrpc/xprt.h | 1 +
>> net/sunrpc/svc.c | 22 ++++++++++++++++------
>> net/sunrpc/xprtrdma/backchannel.c | 5 +++++
>> net/sunrpc/xprtrdma/transport.c | 1 +
>> net/sunrpc/xprtrdma/xprt_rdma.h | 1 +
>> net/sunrpc/xprtsock.c | 7 +++++++
>> 6 files changed, 31 insertions(+), 6 deletions(-)
>>
>> diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
>> index a4ab4f8d9140..031d2843a002 100644
>> --- a/include/linux/sunrpc/xprt.h
>> +++ b/include/linux/sunrpc/xprt.h
>> @@ -158,6 +158,7 @@ struct rpc_xprt_ops {
>> int (*bc_setup)(struct rpc_xprt *xprt,
>> unsigned int min_reqs);
>> int (*bc_up)(struct svc_serv *serv, struct net *net);
>> + struct svc_xprt*(*bc_get_xprt)(struct svc_serv *serv, struct net *net);
>> size_t (*bc_maxpayload)(struct rpc_xprt *xprt);
>> void (*bc_free_rqst)(struct rpc_rqst *rqst);
>> void (*bc_destroy)(struct rpc_xprt *xprt,
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index d13e05f1a990..a7264fd1b3db 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -1450,16 +1450,22 @@ int
>> bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
>> struct svc_rqst *rqstp)
>> {
>> + struct net *net = req->rq_xprt->xprt_net;
>> struct kvec *argv = &rqstp->rq_arg.head[0];
>> struct kvec *resv = &rqstp->rq_res.head[0];
>> struct rpc_task *task;
>> + struct svc_xprt *s_xprt;
>> int proc_error;
>> int error;
>>
>> dprintk("svc: %s(%p)\n", __func__, req);
>>
>> + s_xprt = req->rq_xprt->ops->bc_get_xprt(serv, net);
>> + if (!s_xprt)
>> + goto proc_error;
>> +
>> /* Build the svc_rqst used by the common processing routine */
>> - rqstp->rq_xprt = serv->sv_bc_xprt;
>> + rqstp->rq_xprt = s_xprt;
>> rqstp->rq_xid = req->rq_xid;
>> rqstp->rq_prot = req->rq_xprt->prot;
>> rqstp->rq_server = serv;
>> @@ -1494,13 +1500,11 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
>>
>> /* Parse and execute the bc call */
>> proc_error = svc_process_common(rqstp, argv, resv);
>> + svc_xprt_put(rqstp->rq_xprt);
>>
>> atomic_inc(&req->rq_xprt->bc_free_slots);
>> - if (!proc_error) {
>> - /* Processing error: drop the request */
>> - xprt_free_bc_request(req);
>> - return 0;
>> - }
>> + if (!proc_error)
>> + goto proc_error;
>>
>> /* Finally, send the reply synchronously */
>> memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
>> @@ -1517,6 +1521,12 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
>> out:
>> dprintk("svc: %s(), error=%d\n", __func__, error);
>> return error;
>> +
>> +proc_error:
>> + /* Processing error: drop the request */
>> + xprt_free_bc_request(req);
>> + error = -EINVAL;
>> + goto out;
>> }
>> EXPORT_SYMBOL_GPL(bc_svc_process);
>> #endif /* CONFIG_SUNRPC_BACKCHANNEL */
>> diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
>> index e5b367a3e517..3e06aeacda43 100644
>> --- a/net/sunrpc/xprtrdma/backchannel.c
>> +++ b/net/sunrpc/xprtrdma/backchannel.c
>> @@ -133,6 +133,11 @@ int xprt_rdma_bc_up(struct svc_serv *serv, struct net *net)
>> return 0;
>> }
>>
>> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net)
>> +{
>> + return svc_find_xprt(serv, "rdma-bc", net, AF_UNSPEC, 0);
>> +}
>> +
>> /**
>> * xprt_rdma_bc_maxpayload - Return maximum backchannel message size
>> * @xprt: transport
>> diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
>> index ae2a83828953..41d67de93531 100644
>> --- a/net/sunrpc/xprtrdma/transport.c
>> +++ b/net/sunrpc/xprtrdma/transport.c
>> @@ -828,6 +828,7 @@ static const struct rpc_xprt_ops xprt_rdma_procs = {
>> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
>> .bc_setup = xprt_rdma_bc_setup,
>> .bc_up = xprt_rdma_bc_up,
>> + .bc_get_xprt = xprt_rdma_bc_get_xprt,
>> .bc_maxpayload = xprt_rdma_bc_maxpayload,
>> .bc_free_rqst = xprt_rdma_bc_free_rqst,
>> .bc_destroy = xprt_rdma_bc_destroy,
>> diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
>> index a13ccb643ce0..2726d71052a8 100644
>> --- a/net/sunrpc/xprtrdma/xprt_rdma.h
>> +++ b/net/sunrpc/xprtrdma/xprt_rdma.h
>> @@ -662,6 +662,7 @@ void xprt_rdma_cleanup(void);
>> #if defined(CONFIG_SUNRPC_BACKCHANNEL)
>> int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int);
>> int xprt_rdma_bc_up(struct svc_serv *, struct net *);
>> +struct svc_xprt *xprt_rdma_bc_get_xprt(struct svc_serv *serv, struct net *net);
>> size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *);
>> int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int);
>> void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *);
>> diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
>> index 8a5e823e0b33..16f9c7720465 100644
>> --- a/net/sunrpc/xprtsock.c
>> +++ b/net/sunrpc/xprtsock.c
>> @@ -1411,6 +1411,12 @@ static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
>> return 0;
>> }
>>
>> +static struct svc_xprt *xs_tcp_bc_get_xprt(struct svc_serv *serv,
>> + struct net *net)
>> +{
>> + return svc_find_xprt(serv, "tcp-bc", net, AF_UNSPEC, 0);
>> +}
>> +
>> static size_t xs_tcp_bc_maxpayload(struct rpc_xprt *xprt)
>> {
>> return PAGE_SIZE;
>> @@ -2668,6 +2674,7 @@ static const struct rpc_xprt_ops xs_tcp_ops = {
>> #ifdef CONFIG_SUNRPC_BACKCHANNEL
>> .bc_setup = xprt_setup_bc,
>> .bc_up = xs_tcp_bc_up,
>> + .bc_get_xprt = xs_tcp_bc_get_xprt,
>> .bc_maxpayload = xs_tcp_bc_maxpayload,
>> .bc_free_rqst = xprt_free_bc_rqst,
>> .bc_destroy = xprt_destroy_bc,
>> --
>> 2.17.1
>

2018-12-18 12:52:16

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Tue, 2018-12-18 at 09:45 +0300, Vasily Averin wrote:
> On 12/18/18 12:50 AM, J. Bruce Fields wrote:
> > On Mon, Dec 17, 2018 at 07:23:54PM +0300, Vasily Averin wrote:
> > > if node have NFSv41+ mounts inside several net namespaces
> > > it can lead to use-after-free in svc_process_common()
> > >
> > > svc_process_common()
> > > /* Setup reply header */
> > > rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<<
> > > HERE
> > >
> > > svc_process_common() can use already freed rqstp->rq_xprt,
> > > it was assigned in bc_svc_process() where it was taken from serv-
> > > >sv_bc_xprt.
> > >
> > > serv is global structure but sv_bc_xprt is assigned per-
> > > netnamespace,
> > > so if nfsv41+ shares are mounted in several containers together
> > > bc_svc_process() can use wrong backchannel or even access freed
> > > memory.
> > >
> > > To find correct svc_xprt of client-related backchannel
> > > bc_svc_process() now calls new .bc_get_xprt callback
> > > that executes svc_find_xprt() with proper xprt name.
> >
> > This stuff is confusing and I need to stare at it some more before
> > I
> > understand, but it's weird that we'd need to search for the right
> > xprt.
>
> All NFS clients in all net namespaces used the same minorversion
> shares common nfs_callback_data taken from global nfs_callback_info
> array.
>
> Moreover these clients can use either rdma or nfs transport,
> however only one of them can be used in one net namespace.
>
> Each net namespace must have own backchannel,
> it cannot depend on other net namespaces,
> because at least they can use different transports.
>
> So one svc_serv should be able to handle several (per-netns)
> backchannels.
>
> Frankly speaking If you prefer I can easily convert global
> nfs_callback_info to per net-namespace.
> I've checked, it works too. However current solution looks better for
> me.
>
> > We know which connection the backchannel request came over, and
> > there
> > should only be one backchannel using that connection, why can't we
> > find
> > it by just chasing pointers the right way?
>
> it is allocated by using follwing calltrace:
> nfs_callback_up
> nfs_callback_up_net
> xprt->ops->bc_up(serv, net) -> xs_tcp_bc_up
> svc_create_xprt(serv, "tcp-bc")
> __svc_xpo_create
> svc_bc_tcp_create
> svc_bc_create_socket
>
> Here backchannel's svc_sock/svc/xprt is created.
> It is per-netns and therefore it cannot be saved as pointer on global
> svc_serv.
>
> It could be saved on some xprt related to forechannel,
> I've expected it was done already -- but it was not done.
> I've tried to find any way to do it -- but without success,
> according structures seems are not accessible in svc_bc_tcp_create.
>
> Finally I've found that backchannel's xprt is added into serv-
> >sv_permsocks
> and svc_find_xprt can find it by name.
>
> It would be great if you can advise some more simple way.
>
> > OK, I do need to look at it more.
>
> It is quite important for containers so I think this patch (or any
> alternative solution)
> should be pushed in stable@.
>

The whole "let's set up rqstp->rq_xprt for the back channel" is nothing
but a giant hack in order to work around the fact that
svc_process_common() uses it to find the xpt_ops, and perform a couple
of (meaningless for the back channel) tests of xpt_flags.

What say we just pass in the xpt_ops as a parameter to
svc_process_common(), and make those xpt_flags tests check for whether
or not rqstp->rq_xprt is actually non-NULL?

It probably also requires us to store a pointer to struct net in the
struct svc_rqst so that nfs4_callback_compound() and
svcauth_gss_accept() can find it, but that should be OK since the
transport already has that referenced.

Cheers,
Trond
--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-18 14:36:11

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/18/18 3:49 PM, Trond Myklebust wrote:
> On Tue, 2018-12-18 at 09:45 +0300, Vasily Averin wrote:
>> On 12/18/18 12:50 AM, J. Bruce Fields wrote:
>>> On Mon, Dec 17, 2018 at 07:23:54PM +0300, Vasily Averin wrote:
>>>> if node have NFSv41+ mounts inside several net namespaces
>>>> it can lead to use-after-free in svc_process_common()
>>>>
>>>> svc_process_common()
>>>> /* Setup reply header */
>>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp); <<<
>>>> HERE
>>>>
>>>> svc_process_common() can use already freed rqstp->rq_xprt,
>>>> it was assigned in bc_svc_process() where it was taken from serv-
>>>>> sv_bc_xprt.
>>>>
>>>> serv is global structure but sv_bc_xprt is assigned per-
>>>> netnamespace,
>>>> so if nfsv41+ shares are mounted in several containers together
>>>> bc_svc_process() can use wrong backchannel or even access freed
>>>> memory.
>>>>
>>>> To find correct svc_xprt of client-related backchannel
>>>> bc_svc_process() now calls new .bc_get_xprt callback
>>>> that executes svc_find_xprt() with proper xprt name.
>>>
>>> This stuff is confusing and I need to stare at it some more before
>>> I
>>> understand, but it's weird that we'd need to search for the right
>>> xprt.
>>
>> All NFS clients in all net namespaces used the same minorversion
>> shares common nfs_callback_data taken from global nfs_callback_info
>> array.
>>
>> Moreover these clients can use either rdma or nfs transport,
>> however only one of them can be used in one net namespace.
>>
>> Each net namespace must have own backchannel,
>> it cannot depend on other net namespaces,
>> because at least they can use different transports.
>>
>> So one svc_serv should be able to handle several (per-netns)
>> backchannels.
>>
>> Frankly speaking If you prefer I can easily convert global
>> nfs_callback_info to per net-namespace.
>> I've checked, it works too. However current solution looks better for
>> me.
>>
>>> We know which connection the backchannel request came over, and
>>> there
>>> should only be one backchannel using that connection, why can't we
>>> find
>>> it by just chasing pointers the right way?
>>
>> it is allocated by using follwing calltrace:
>> nfs_callback_up
>> nfs_callback_up_net
>> xprt->ops->bc_up(serv, net) -> xs_tcp_bc_up
>> svc_create_xprt(serv, "tcp-bc")
>> __svc_xpo_create
>> svc_bc_tcp_create
>> svc_bc_create_socket
>>
>> Here backchannel's svc_sock/svc/xprt is created.
>> It is per-netns and therefore it cannot be saved as pointer on global
>> svc_serv.
>>
>> It could be saved on some xprt related to forechannel,
>> I've expected it was done already -- but it was not done.
>> I've tried to find any way to do it -- but without success,
>> according structures seems are not accessible in svc_bc_tcp_create.
>>
>> Finally I've found that backchannel's xprt is added into serv-
>>> sv_permsocks
>> and svc_find_xprt can find it by name.
>>
>> It would be great if you can advise some more simple way.
>>
>>> OK, I do need to look at it more.
>>
>> It is quite important for containers so I think this patch (or any
>> alternative solution)
>> should be pushed in stable@.
>>
>
> The whole "let's set up rqstp->rq_xprt for the back channel" is nothing
> but a giant hack in order to work around the fact that
> svc_process_common() uses it to find the xpt_ops, and perform a couple
> of (meaningless for the back channel) tests of xpt_flags.
>
> What say we just pass in the xpt_ops as a parameter to
> svc_process_common(), and make those xpt_flags tests check for whether
> or not rqstp->rq_xprt is actually non-NULL?

To access proper xpt_flags inside svc_process_common()
we need to pass svc_xprt instead of xpt_ops.

Do you mean something like following?

--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1148,7 +1148,7 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
* Common routine for processing the RPC request.
*/
static int
-svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv, struct svc_xprt *s_xprt)
{
struct svc_program *progp;
const struct svc_version *versp = NULL; /* compiler food */
@@ -1172,7 +1172,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
clear_bit(RQ_DROPME, &rqstp->rq_flags);

/* Setup reply header */
- rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
+ s_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);

svc_putu32(resv, rqstp->rq_xid);

@@ -1245,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
* fit.
*/
if (versp->vs_need_cong_ctrl &&
- !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
+ !test_bit(XPT_CONG_CTRL, &s_xprt->xpt_flags))


@@ -1336,8 +1336,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
return 0;

close:
- if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
- svc_close_xprt(rqstp->rq_xprt);
+ if (test_bit(XPT_TEMP, &s_xprt->xpt_flags))
+ svc_close_xprt(s_xprt);
dprintk("svc: svc_process close\n");
return 0;


> It probably also requires us to store a pointer to struct net in the
> struct svc_rqst so that nfs4_callback_compound() and
> svcauth_gss_accept() can find it, but that should be OK since the
> transport already has that referenced.
>
> Cheers,
> Trond
>

2018-12-18 14:55:22

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Tue, 2018-12-18 at 17:35 +0300, Vasily Averin wrote:
> On 12/18/18 3:49 PM, Trond Myklebust wrote:
> > On Tue, 2018-12-18 at 09:45 +0300, Vasily Averin wrote:
> > > On 12/18/18 12:50 AM, J. Bruce Fields wrote:
> > > > On Mon, Dec 17, 2018 at 07:23:54PM +0300, Vasily Averin wrote:
> > > > > if node have NFSv41+ mounts inside several net namespaces
> > > > > it can lead to use-after-free in svc_process_common()
> > > > >
> > > > > svc_process_common()
> > > > > /* Setup reply header */
> > > > > rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
> > > > > <<<
> > > > > HERE
> > > > >
> > > > > svc_process_common() can use already freed rqstp->rq_xprt,
> > > > > it was assigned in bc_svc_process() where it was taken from
> > > > > serv-
> > > > > > sv_bc_xprt.
> > > > >
> > > > > serv is global structure but sv_bc_xprt is assigned per-
> > > > > netnamespace,
> > > > > so if nfsv41+ shares are mounted in several containers
> > > > > together
> > > > > bc_svc_process() can use wrong backchannel or even access
> > > > > freed
> > > > > memory.
> > > > >
> > > > > To find correct svc_xprt of client-related backchannel
> > > > > bc_svc_process() now calls new .bc_get_xprt callback
> > > > > that executes svc_find_xprt() with proper xprt name.
> > > >
> > > > This stuff is confusing and I need to stare at it some more
> > > > before
> > > > I
> > > > understand, but it's weird that we'd need to search for the
> > > > right
> > > > xprt.
> > >
> > > All NFS clients in all net namespaces used the same minorversion
> > > shares common nfs_callback_data taken from global
> > > nfs_callback_info
> > > array.
> > >
> > > Moreover these clients can use either rdma or nfs transport,
> > > however only one of them can be used in one net namespace.
> > >
> > > Each net namespace must have own backchannel,
> > > it cannot depend on other net namespaces,
> > > because at least they can use different transports.
> > >
> > > So one svc_serv should be able to handle several (per-netns)
> > > backchannels.
> > >
> > > Frankly speaking If you prefer I can easily convert global
> > > nfs_callback_info to per net-namespace.
> > > I've checked, it works too. However current solution looks better
> > > for
> > > me.
> > >
> > > > We know which connection the backchannel request came over, and
> > > > there
> > > > should only be one backchannel using that connection, why can't
> > > > we
> > > > find
> > > > it by just chasing pointers the right way?
> > >
> > > it is allocated by using follwing calltrace:
> > > nfs_callback_up
> > > nfs_callback_up_net
> > > xprt->ops->bc_up(serv, net) -> xs_tcp_bc_up
> > > svc_create_xprt(serv, "tcp-bc")
> > > __svc_xpo_create
> > > svc_bc_tcp_create
> > > svc_bc_create_socket
> > >
> > > Here backchannel's svc_sock/svc/xprt is created.
> > > It is per-netns and therefore it cannot be saved as pointer on
> > > global
> > > svc_serv.
> > >
> > > It could be saved on some xprt related to forechannel,
> > > I've expected it was done already -- but it was not done.
> > > I've tried to find any way to do it -- but without success,
> > > according structures seems are not accessible in
> > > svc_bc_tcp_create.
> > >
> > > Finally I've found that backchannel's xprt is added into serv-
> > > > sv_permsocks
> > > and svc_find_xprt can find it by name.
> > >
> > > It would be great if you can advise some more simple way.
> > >
> > > > OK, I do need to look at it more.
> > >
> > > It is quite important for containers so I think this patch (or
> > > any
> > > alternative solution)
> > > should be pushed in stable@.
> > >
> >
> > The whole "let's set up rqstp->rq_xprt for the back channel" is
> > nothing
> > but a giant hack in order to work around the fact that
> > svc_process_common() uses it to find the xpt_ops, and perform a
> > couple
> > of (meaningless for the back channel) tests of xpt_flags.
> >
> > What say we just pass in the xpt_ops as a parameter to
> > svc_process_common(), and make those xpt_flags tests check for
> > whether
> > or not rqstp->rq_xprt is actually non-NULL?
>
> To access proper xpt_flags inside svc_process_common()
> we need to pass svc_xprt instead of xpt_ops.

No. We don't care about xpt_flags for the back channel because there is
no "server transport". The actual transport is stored in the 'struct
rpc_rqst', and is the struct rpc_xprt corresponding to the client
socket or RDMA channel.

IOW: All we really need in svc_process_common() is to be able to run
rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
either as a pointer to the struct svc_xprt_ops itself.

The flags are irrelevant, because they refer to a transport object that
isn't real.

>
> Do you mean something like following?
>
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1148,7 +1148,7 @@ static __printf(2,3) void svc_printk(struct
> svc_rqst *rqstp, const char *fmt, ..
> * Common routine for processing the RPC request.
> */
> static int
> -svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct
> kvec *resv)
> +svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct
> kvec *resv, struct svc_xprt *s_xprt)
> {
> struct svc_program *progp;
> const struct svc_version *versp = NULL; /* compiler food */
> @@ -1172,7 +1172,7 @@ svc_process_common(struct svc_rqst *rqstp,
> struct kvec *argv, struct kvec *resv)
> clear_bit(RQ_DROPME, &rqstp->rq_flags);
>
> /* Setup reply header */
> - rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
> + s_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
>
> svc_putu32(resv, rqstp->rq_xid);
>
> @@ -1245,7 +1245,7 @@ svc_process_common(struct svc_rqst *rqstp,
> struct kvec *argv, struct kvec *resv)
> * fit.
> */
> if (versp->vs_need_cong_ctrl &&
> - !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags))
> + !test_bit(XPT_CONG_CTRL, &s_xprt->xpt_flags))


if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && !test_bit(...)))

>
>
> @@ -1336,8 +1336,8 @@ svc_process_common(struct svc_rqst *rqstp,
> struct kvec *argv, struct kvec *resv)
> return 0;
>
> close:
> - if (test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
> - svc_close_xprt(rqstp->rq_xprt);
> + if (test_bit(XPT_TEMP, &s_xprt->xpt_flags))
> + svc_close_xprt(s_xprt);
> dprintk("svc: svc_process close\n");
> return 0;
>
>
> > It probably also requires us to store a pointer to struct net in
> > the
> > struct svc_rqst so that nfs4_callback_compound() and
> > svcauth_gss_accept() can find it, but that should be OK since the
> > transport already has that referenced.
> >
> > Cheers,
> > Trond
> >

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-18 20:03:01

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/18/18 5:55 PM, Trond Myklebust wrote:
>>> It probably also requires us to store a pointer to struct net in
>>> the
>>> struct svc_rqst so that nfs4_callback_compound() and
>>> svcauth_gss_accept() can find it, but that should be OK since the
>>> transport already has that referenced.

Ok, I can fix these functions and their sub-calls.
However rqst->rq_xprt is used in other functions that seems can be called inside svc_process_common()
- in trace_svc_process(rqstp, progp->pg_name);
- in svc_reserve_auth(rqstp, ...) -> svc_reserve()
- svc_authorise() -> svcauth_gss_release()

It seems I should fix these places too, it isn't?
could you please advise how to fix svc_reserve() ?

2018-12-18 20:43:16

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Tue, 2018-12-18 at 23:02 +0300, Vasily Averin wrote:
> On 12/18/18 5:55 PM, Trond Myklebust wrote:
> > > > It probably also requires us to store a pointer to struct net
> > > > in
> > > > the
> > > > struct svc_rqst so that nfs4_callback_compound() and
> > > > svcauth_gss_accept() can find it, but that should be OK since
> > > > the
> > > > transport already has that referenced.
>
> Ok, I can fix these functions and their sub-calls.
> However rqst->rq_xprt is used in other functions that seems can be
> called inside svc_process_common()
> - in trace_svc_process(rqstp, progp->pg_name);
> - in svc_reserve_auth(rqstp, ...) -> svc_reserve()
> - svc_authorise() -> svcauth_gss_release()
>
> It seems I should fix these places too, it isn't?
> could you please advise how to fix svc_reserve() ?

We don't want svc_reserve() to run at all for the back channel, so I
guess that a test for rqstp->rq_xprt != NULL is appropriate there too.

svcauth_gss_release() is just using rqstp->rq_xprt to find the net
namespace, so if you add a pointer rqstp->rq_net to fix
nfs4_callback_compound, then that will fix the gss case as well.

For trace_svc_process(), maybe pull rqst->rq_xprt->xpt_remotebuf out of
the tracepoint definition in include/trace/events/sunrpc.h and make it
a tracepoint argument that is allowed to be NULL?

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-18 21:31:39

by Vladis Dronov

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

Hello,

The CVE-2018-16884 id was assigned to this flaw and proposed to MITRE.
We would like to suggest to use this id in public communications
regarding this flaw.

Best regards,
Vladis Dronov | Red Hat, Inc. | Product Security Engineer

2018-12-19 11:25:19

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()



On 12/18/18 11:43 PM, Trond Myklebust wrote:
> On Tue, 2018-12-18 at 23:02 +0300, Vasily Averin wrote:
>> On 12/18/18 5:55 PM, Trond Myklebust wrote:
>>>>> It probably also requires us to store a pointer to struct net
>>>>> in
>>>>> the
>>>>> struct svc_rqst so that nfs4_callback_compound() and
>>>>> svcauth_gss_accept() can find it, but that should be OK since
>>>>> the
>>>>> transport already has that referenced.
>>
>> Ok, I can fix these functions and their sub-calls.
>> However rqst->rq_xprt is used in other functions that seems can be
>> called inside svc_process_common()
>> - in trace_svc_process(rqstp, progp->pg_name);
>> - in svc_reserve_auth(rqstp, ...) -> svc_reserve()
>> - svc_authorise() -> svcauth_gss_release()
>>
>> It seems I should fix these places too, it isn't?
>> could you please advise how to fix svc_reserve() ?
>
> We don't want svc_reserve() to run at all for the back channel, so I
> guess that a test for rqstp->rq_xprt != NULL is appropriate there too.
>
> svcauth_gss_release() is just using rqstp->rq_xprt to find the net
> namespace, so if you add a pointer rqstp->rq_net to fix
> nfs4_callback_compound, then that will fix the gss case as well.
>
> For trace_svc_process(), maybe pull rqst->rq_xprt->xpt_remotebuf out of
> the tracepoint definition in include/trace/events/sunrpc.h and make it
> a tracepoint argument that is allowed to be NULL?

This one seems works, could you please check it before formal submit ?
NFSv4 callback-1644 [002] .... 4731.064372: svc_process: addr=(null) xid=0x0b0924e3 service=NFSv4 callback vers=1 proc=1

Frankly speaking I'm afraid that I missed something,
rqstp->rq_xprt is widely used and nobody expect that it can be NULL.

And even I missed nothing -- it's quite tricky anyway.
Future cahnges can add new calls or execute old non-empty-xprt-aware
functions and trigger crash in some exotic configuration.

Thank you,
Vasily Averin


Attachments:
diff-empty-rq_xprt (6.32 kB)

2018-12-20 01:39:20

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

Dear Trond,
Red Hat security believes the problem is quite important security issue:
https://access.redhat.com/security/cve/cve-2018-16884

Fix should be backported to affected distributions.

Could you please approve my first patch and push it to stable@ ?
From my PoV it is correctly fixes the problem, it breaks nothing and easy for backports,
lightly modified it can be even live-patched.

Other patches including switch to using empty rqst->rq_xprt can wait.

Thank you,
Vasily Averin

On 12/19/18 2:25 PM, Vasily Averin wrote:
> On 12/18/18 11:43 PM, Trond Myklebust wrote:
>> On Tue, 2018-12-18 at 23:02 +0300, Vasily Averin wrote:
>>> On 12/18/18 5:55 PM, Trond Myklebust wrote:
>>>>>> It probably also requires us to store a pointer to struct net
>>>>>> in
>>>>>> the
>>>>>> struct svc_rqst so that nfs4_callback_compound() and
>>>>>> svcauth_gss_accept() can find it, but that should be OK since
>>>>>> the
>>>>>> transport already has that referenced.
>>>
>>> Ok, I can fix these functions and their sub-calls.
>>> However rqst->rq_xprt is used in other functions that seems can be
>>> called inside svc_process_common()
>>> - in trace_svc_process(rqstp, progp->pg_name);
>>> - in svc_reserve_auth(rqstp, ...) -> svc_reserve()
>>> - svc_authorise() -> svcauth_gss_release()
>>>
>>> It seems I should fix these places too, it isn't?
>>> could you please advise how to fix svc_reserve() ?
>>
>> We don't want svc_reserve() to run at all for the back channel, so I
>> guess that a test for rqstp->rq_xprt != NULL is appropriate there too.
>>
>> svcauth_gss_release() is just using rqstp->rq_xprt to find the net
>> namespace, so if you add a pointer rqstp->rq_net to fix
>> nfs4_callback_compound, then that will fix the gss case as well.
>>
>> For trace_svc_process(), maybe pull rqst->rq_xprt->xpt_remotebuf out of
>> the tracepoint definition in include/trace/events/sunrpc.h and make it
>> a tracepoint argument that is allowed to be NULL?
>
> This one seems works, could you please check it before formal submit ?
> NFSv4 callback-1644 [002] .... 4731.064372: svc_process: addr=(null) xid=0x0b0924e3 service=NFSv4 callback vers=1 proc=1
>
> Frankly speaking I'm afraid that I missed something,
> rqstp->rq_xprt is widely used and nobody expect that it can be NULL.
>
> And even I missed nothing -- it's quite tricky anyway.
> Future cahnges can add new calls or execute old non-empty-xprt-aware
> functions and trigger crash in some exotic configuration.
>
> Thank you,
> Vasily Averin
>

2018-12-20 01:58:56

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Thu, 2018-12-20 at 04:39 +0300, Vasily Averin wrote:
> Dear Trond,
> Red Hat security believes the problem is quite important security
> issue:
> https://access.redhat.com/security/cve/cve-2018-16884
>
> Fix should be backported to affected distributions.
>
> Could you please approve my first patch and push it to stable@ ?
> From my PoV it is correctly fixes the problem, it breaks nothing and
> easy for backports,
> lightly modified it can be even live-patched.
>
> Other patches including switch to using empty rqst->rq_xprt can wait.
>

That patch is not acceptable for upstream.



> Thank you,
> Vasily Averin
>
> On 12/19/18 2:25 PM, Vasily Averin wrote:
> > On 12/18/18 11:43 PM, Trond Myklebust wrote:
> > > On Tue, 2018-12-18 at 23:02 +0300, Vasily Averin wrote:
> > > > On 12/18/18 5:55 PM, Trond Myklebust wrote:
> > > > > > > It probably also requires us to store a pointer to struct
> > > > > > > net
> > > > > > > in
> > > > > > > the
> > > > > > > struct svc_rqst so that nfs4_callback_compound() and
> > > > > > > svcauth_gss_accept() can find it, but that should be OK
> > > > > > > since
> > > > > > > the
> > > > > > > transport already has that referenced.
> > > >
> > > > Ok, I can fix these functions and their sub-calls.
> > > > However rqst->rq_xprt is used in other functions that seems
> > > > can be
> > > > called inside svc_process_common()
> > > > - in trace_svc_process(rqstp, progp->pg_name);
> > > > - in svc_reserve_auth(rqstp, ...) -> svc_reserve()
> > > > - svc_authorise() -> svcauth_gss_release()
> > > >
> > > > It seems I should fix these places too, it isn't?
> > > > could you please advise how to fix svc_reserve() ?
> > >
> > > We don't want svc_reserve() to run at all for the back channel,
> > > so I
> > > guess that a test for rqstp->rq_xprt != NULL is appropriate there
> > > too.
> > >
> > > svcauth_gss_release() is just using rqstp->rq_xprt to find the
> > > net
> > > namespace, so if you add a pointer rqstp->rq_net to fix
> > > nfs4_callback_compound, then that will fix the gss case as well.
> > >
> > > For trace_svc_process(), maybe pull rqst->rq_xprt->xpt_remotebuf
> > > out of
> > > the tracepoint definition in include/trace/events/sunrpc.h and
> > > make it
> > > a tracepoint argument that is allowed to be NULL?
> >
> > This one seems works, could you please check it before formal
> > submit ?
> > NFSv4 callback-1644 [002] .... 4731.064372: svc_process:
> > addr=(null) xid=0x0b0924e3 service=NFSv4 callback vers=1 proc=1
> >
> > Frankly speaking I'm afraid that I missed something,
> > rqstp->rq_xprt is widely used and nobody expect that it can be
> > NULL.
> >
> > And even I missed nothing -- it's quite tricky anyway.
> > Future cahnges can add new calls or execute old non-empty-xprt-
> > aware
> > functions and trigger crash in some exotic configuration.
> >
> > Thank you,
> > Vasily Averin
> >
--
Trond Myklebust
CTO, Hammerspace Inc
4300 El Camino Real, Suite 105
Los Altos, CA 94022
http://www.hammer.space


2018-12-20 09:33:17

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/20/18 4:58 AM, Trond Myklebust wrote:
> On Thu, 2018-12-20 at 04:39 +0300, Vasily Averin wrote:
>> Dear Trond,
>> Red Hat security believes the problem is quite important security
>> issue:
>> https://access.redhat.com/security/cve/cve-2018-16884
>>
>> Fix should be backported to affected distributions.
>>
>> Could you please approve my first patch and push it to stable@ ?
>> From my PoV it is correctly fixes the problem, it breaks nothing and
>> easy for backports,
>> lightly modified it can be even live-patched.
>>
>> Other patches including switch to using empty rqst->rq_xprt can wait.
>>
>
> That patch is not acceptable for upstream.

In this case how about my initial plan B -- make svc_serv per net-namespace?
It executes additional per-netns nfsv4 callback threads
but does not require any changes in existing sunrpc code?


Attachments:
diff-ms-nfs-make-svc_serv-per-netns (4.25 kB)

2018-12-20 11:58:11

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Thu, 2018-12-20 at 12:30 +0300, Vasily Averin wrote:
> On 12/20/18 4:58 AM, Trond Myklebust wrote:
> > On Thu, 2018-12-20 at 04:39 +0300, Vasily Averin wrote:
> > > Dear Trond,
> > > Red Hat security believes the problem is quite important security
> > > issue:
> > > https://access.redhat.com/security/cve/cve-2018-16884
> > >
> > > Fix should be backported to affected distributions.
> > >
> > > Could you please approve my first patch and push it to stable@ ?
> > > From my PoV it is correctly fixes the problem, it breaks nothing
> > > and
> > > easy for backports,
> > > lightly modified it can be even live-patched.
> > >
> > > Other patches including switch to using empty rqst->rq_xprt can
> > > wait.
> > >
> >
> > That patch is not acceptable for upstream.
>
> In this case how about my initial plan B -- make svc_serv per net-
> namespace?
> It executes additional per-netns nfsv4 callback threads
> but does not require any changes in existing sunrpc code?

Can we please fix this issue properly without adding more hacks? The
hacks are what has caused the problem in the first place.

The server transport code is completely irrelevant to the client
backchannel and so anything in the backchannel code path that relies on
tests or checks of the "server transport state" is going to be broken.

--
Trond Myklebust
CTO, Hammerspace Inc
4300 El Camino Real, Suite 105
Los Altos, CA 94022
http://www.hammer.space


2018-12-21 01:00:08

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
> No. We don't care about xpt_flags for the back channel because there is
> no "server transport". The actual transport is stored in the 'struct
> rpc_rqst', and is the struct rpc_xprt corresponding to the client
> socket or RDMA channel.
>
> IOW: All we really need in svc_process_common() is to be able to run
> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
> either as a pointer to the struct svc_xprt_ops itself.

For what it's worth, I'd rather get rid of that op--it's an awfully
roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.

--b.

2018-12-21 11:30:51

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/21/18 4:00 AM, [email protected] wrote:
> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>> No. We don't care about xpt_flags for the back channel because there is
>> no "server transport". The actual transport is stored in the 'struct
>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>> socket or RDMA channel.
>>
>> IOW: All we really need in svc_process_common() is to be able to run
>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
>> either as a pointer to the struct svc_xprt_ops itself.
>
> For what it's worth, I'd rather get rid of that op--it's an awfully
> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.

I'll try to save pointer to xpt_ops on per-netns sunrpc_net,
and use it in svc_process_common() if rqstp->rq_xprt == NULL.

2018-12-21 17:39:57

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/21/18 2:30 PM, Vasily Averin wrote:
> On 12/21/18 4:00 AM, [email protected] wrote:
>> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>>> No. We don't care about xpt_flags for the back channel because there is
>>> no "server transport". The actual transport is stored in the 'struct
>>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>>> socket or RDMA channel.
>>>
>>> IOW: All we really need in svc_process_common() is to be able to run
>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
>>> either as a pointer to the struct svc_xprt_ops itself.
>>
>> For what it's worth, I'd rather get rid of that op--it's an awfully
>> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>
> I'll try to save pointer to xpt_ops on per-netns sunrpc_net,
> and use it in svc_process_common() if rqstp->rq_xprt == NULL.

Bruce, Trond,
I've send v3 patch version, and waiting for your feedback.

Thank you,
Vasily Averin

2018-12-22 17:47:11

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/21/18 4:00 AM, [email protected] wrote:
> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>> No. We don't care about xpt_flags for the back channel because there is
>> no "server transport". The actual transport is stored in the 'struct
>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>> socket or RDMA channel.
>>
>> IOW: All we really need in svc_process_common() is to be able to run
>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
>> either as a pointer to the struct svc_xprt_ops itself.
>
> For what it's worth, I'd rather get rid of that op--it's an awfully
> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.

Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY to call
svc_tcp_prep_reply_hdr() in svc_process_common() ?
And according call for rdma-bc does nothing useful at all?

I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and just
provide pointer to svc_tcp_prep_reply_hdr() in svc_process_common()
via per-netns sunrpc_net -- and seems it was enough, my testcase worked correctly.

Am I missed something probably?
Should we really remove svc_create_xprt( "tcp/rdma-bc"...) related stuff? ?

2018-12-23 20:52:54

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Sat, Dec 22, 2018 at 08:46:55PM +0300, Vasily Averin wrote:
> On 12/21/18 4:00 AM, [email protected] wrote:
> > On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
> >> No. We don't care about xpt_flags for the back channel because there is
> >> no "server transport". The actual transport is stored in the 'struct
> >> rpc_rqst', and is the struct rpc_xprt corresponding to the client
> >> socket or RDMA channel.
> >>
> >> IOW: All we really need in svc_process_common() is to be able to run
> >> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
> >> either as a pointer to the struct svc_xprt_ops itself.
> >
> > For what it's worth, I'd rather get rid of that op--it's an awfully
> > roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>
> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY to call
> svc_tcp_prep_reply_hdr() in svc_process_common() ?
> And according call for rdma-bc does nothing useful at all?

Right, in the rdma case it's:

void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
{
}

> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and
> just provide pointer to svc_tcp_prep_reply_hdr() in
> svc_process_common() via per-netns sunrpc_net -- and seems it was
> enough, my testcase worked correctly.
>
> Am I missed something probably? Should we really remove
> svc_create_xprt( "tcp/rdma-bc"...) related stuff? ?

Haven't looked carefully, but off the top of my head I can't see why
that wouldn't work.

I also tried some patches that replace that op by a flag bit (doesn't
address the original problem here, just seemed like a simplification):

git://linux-nfs.org/~bfields/linux-topics.git

but I don't if that's compatible with what you've done.

--b.

2018-12-23 21:03:40

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/23/18 11:52 PM, [email protected] wrote:
> On Sat, Dec 22, 2018 at 08:46:55PM +0300, Vasily Averin wrote:
>> On 12/21/18 4:00 AM, [email protected] wrote:
>>> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>>>> No. We don't care about xpt_flags for the back channel because there is
>>>> no "server transport". The actual transport is stored in the 'struct
>>>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>>>> socket or RDMA channel.
>>>>
>>>> IOW: All we really need in svc_process_common() is to be able to run
>>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be passed
>>>> either as a pointer to the struct svc_xprt_ops itself.
>>>
>>> For what it's worth, I'd rather get rid of that op--it's an awfully
>>> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>>
>> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY to call
>> svc_tcp_prep_reply_hdr() in svc_process_common() ?
>> And according call for rdma-bc does nothing useful at all?
>
> Right, in the rdma case it's:
>
> void svc_rdma_prep_reply_hdr(struct svc_rqst *rqstp)
> {
> }
>
>> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and
>> just provide pointer to svc_tcp_prep_reply_hdr() in
>> svc_process_common() via per-netns sunrpc_net -- and seems it was
>> enough, my testcase worked correctly.
>>
>> Am I missed something probably? Should we really remove
>> svc_create_xprt( "tcp/rdma-bc"...) related stuff? ?
>
> Haven't looked carefully, but off the top of my head I can't see why
> that wouldn't work.

I've prepared new patch version removed svc_create_xprt( "tcp/rdma-bc"...)
as far as I see it works correctly.
I'm going to submit it tomorrow morning.

> I also tried some patches that replace that op by a flag bit (doesn't
> address the original problem here, just seemed like a simplification):
>
> git://linux-nfs.org/~bfields/linux-topics.git
>
> but I don't if that's compatible with what you've done.
>
> --b.
>

2018-12-23 23:56:50

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
> On 12/21/18 4:00 AM, [email protected] wrote:
> > On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
> > > No. We don't care about xpt_flags for the back channel because
> > > there is
> > > no "server transport". The actual transport is stored in the
> > > 'struct
> > > rpc_rqst', and is the struct rpc_xprt corresponding to the client
> > > socket or RDMA channel.
> > >
> > > IOW: All we really need in svc_process_common() is to be able to
> > > run
> > > rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be
> > > passed
> > > either as a pointer to the struct svc_xprt_ops itself.
> >
> > For what it's worth, I'd rather get rid of that op--it's an awfully
> > roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>
> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY
> to call
> svc_tcp_prep_reply_hdr() in svc_process_common() ?
> And according call for rdma-bc does nothing useful at all?
>
> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and
> just
> provide pointer to svc_tcp_prep_reply_hdr() in svc_process_common()
> via per-netns sunrpc_net -- and seems it was enough, my testcase
> worked correctly.

I don't see how that function is related to net namespaces. As far as I
can tell, it only signals whether or not the type of transport uses the
TCP record marking scheme.

IOW: it depends on whether the client is using a stream based protocol
like TCP, or a datagram-like protocol like UDP, or RDMA. Whether that
use is occurring in a private net namespace or in the init process
namespace would be irrelevant.

> Am I missed something probably?
> Should we really remove svc_create_xprt( "tcp/rdma-bc"...) related
> stuff? ?

Agreed. The 'bc_up' callback in struct rpc_xprt_ops serves no
discernible purpose, and can be removed.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-24 05:51:24

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/24/18 2:56 AM, Trond Myklebust wrote:
> On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
>> On 12/21/18 4:00 AM, [email protected] wrote:
>>> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>>>> No. We don't care about xpt_flags for the back channel because
>>>> there is
>>>> no "server transport". The actual transport is stored in the
>>>> 'struct
>>>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>>>> socket or RDMA channel.
>>>>
>>>> IOW: All we really need in svc_process_common() is to be able to
>>>> run
>>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be
>>>> passed
>>>> either as a pointer to the struct svc_xprt_ops itself.
>>>
>>> For what it's worth, I'd rather get rid of that op--it's an awfully
>>> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>>
>> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY
>> to call
>> svc_tcp_prep_reply_hdr() in svc_process_common() ?
>> And according call for rdma-bc does nothing useful at all?
>>
>> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and
>> just
>> provide pointer to svc_tcp_prep_reply_hdr() in svc_process_common()
>> via per-netns sunrpc_net -- and seems it was enough, my testcase
>> worked correctly.
>
> I don't see how that function is related to net namespaces. As far as I
> can tell, it only signals whether or not the type of transport uses the
> TCP record marking scheme.

We need to know which kind of transport is used in specified net namespace,
for example init_ns can use RDMA transport and netns "second" can use
TCP transport at the same time.
If you do not like an idea to use function pointer as a mark -- ok
I can save only some boolean flag on sunrpc_net, check it in svc_process_common()
and if it is set -- call svc_tcp_prep_reply_hdr() directly.

Is it acceptable for you?

> IOW: it depends on whether the client is using a stream based protocol
> like TCP, or a datagram-like protocol like UDP, or RDMA. Whether that
> use is occurring in a private net namespace or in the init process
> namespace would be irrelevant.
>
>> Am I missed something probably?
>> Should we really remove svc_create_xprt( "tcp/rdma-bc"...) related
>> stuff? ?
>
> Agreed. The 'bc_up' callback in struct rpc_xprt_ops serves no
> discernible purpose, and can be removed.
>

2018-12-24 06:05:24

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/24/18 8:51 AM, Vasily Averin wrote:
> On 12/24/18 2:56 AM, Trond Myklebust wrote:
>> On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
>>> On 12/21/18 4:00 AM, [email protected] wrote:
>>>> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust wrote:
>>>>> No. We don't care about xpt_flags for the back channel because
>>>>> there is
>>>>> no "server transport". The actual transport is stored in the
>>>>> 'struct
>>>>> rpc_rqst', and is the struct rpc_xprt corresponding to the client
>>>>> socket or RDMA channel.
>>>>>
>>>>> IOW: All we really need in svc_process_common() is to be able to
>>>>> run
>>>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can be
>>>>> passed
>>>>> either as a pointer to the struct svc_xprt_ops itself.
>>>>
>>>> For what it's worth, I'd rather get rid of that op--it's an awfully
>>>> roundabout way just to do "svc_putnl(resv, 0);" in the tcp case.
>>>
>>> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used ONLY
>>> to call
>>> svc_tcp_prep_reply_hdr() in svc_process_common() ?
>>> And according call for rdma-bc does nothing useful at all?
>>>
>>> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up() and
>>> just
>>> provide pointer to svc_tcp_prep_reply_hdr() in svc_process_common()
>>> via per-netns sunrpc_net -- and seems it was enough, my testcase
>>> worked correctly.
>>
>> I don't see how that function is related to net namespaces. As far as I
>> can tell, it only signals whether or not the type of transport uses the
>> TCP record marking scheme.
>
> We need to know which kind of transport is used in specified net namespace,
> for example init_ns can use RDMA transport and netns "second" can use
> TCP transport at the same time.
> If you do not like an idea to use function pointer as a mark -- ok
> I can save only some boolean flag on sunrpc_net, check it in svc_process_common()
> and if it is set -- call svc_tcp_prep_reply_hdr() directly.

moreover, I can do not change sunrpc_net at all,
I can check in bc_svc_common() which transport uses incoming svc_req
and provide such flag as new parameter to svc_process_common().

>> IOW: it depends on whether the client is using a stream based protocol
>> like TCP, or a datagram-like protocol like UDP, or RDMA. Whether that
>> use is occurring in a private net namespace or in the init process
>> namespace would be irrelevant.
>>
>>> Am I missed something probably?
>>> Should we really remove svc_create_xprt( "tcp/rdma-bc"...) related
>>> stuff? ?
>>
>> Agreed. The 'bc_up' callback in struct rpc_xprt_ops serves no
>> discernible purpose, and can be removed.
>>

2018-12-24 08:21:09

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Mon, 2018-12-24 at 09:05 +0300, Vasily Averin wrote:
> On 12/24/18 8:51 AM, Vasily Averin wrote:
> > On 12/24/18 2:56 AM, Trond Myklebust wrote:
> > > On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
> > > > On 12/21/18 4:00 AM, [email protected] wrote:
> > > > > On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust
> > > > > wrote:
> > > > > > No. We don't care about xpt_flags for the back channel
> > > > > > because
> > > > > > there is
> > > > > > no "server transport". The actual transport is stored in
> > > > > > the
> > > > > > 'struct
> > > > > > rpc_rqst', and is the struct rpc_xprt corresponding to the
> > > > > > client
> > > > > > socket or RDMA channel.
> > > > > >
> > > > > > IOW: All we really need in svc_process_common() is to be
> > > > > > able to
> > > > > > run
> > > > > > rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can
> > > > > > be
> > > > > > passed
> > > > > > either as a pointer to the struct svc_xprt_ops itself.
> > > > >
> > > > > For what it's worth, I'd rather get rid of that op--it's an
> > > > > awfully
> > > > > roundabout way just to do "svc_putnl(resv, 0);" in the tcp
> > > > > case.
> > > >
> > > > Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used
> > > > ONLY
> > > > to call
> > > > svc_tcp_prep_reply_hdr() in svc_process_common() ?
> > > > And according call for rdma-bc does nothing useful at all?
> > > >
> > > > I've just tried to remove svc_create_xprt() from xs_tcp_bc_up()
> > > > and
> > > > just
> > > > provide pointer to svc_tcp_prep_reply_hdr()
> > > > in svc_process_common()
> > > > via per-netns sunrpc_net -- and seems it was enough, my
> > > > testcase
> > > > worked correctly.
> > >
> > > I don't see how that function is related to net namespaces. As
> > > far as I
> > > can tell, it only signals whether or not the type of transport
> > > uses the
> > > TCP record marking scheme.
> >
> > We need to know which kind of transport is used in specified net
> > namespace,
> > for example init_ns can use RDMA transport and netns "second" can
> > use
> > TCP transport at the same time.
> > If you do not like an idea to use function pointer as a mark -- ok
> > I can save only some boolean flag on sunrpc_net, check it in
> > svc_process_common()
> > and if it is set -- call svc_tcp_prep_reply_hdr() directly.

I'm not against the idea of using a function pointer, but I'm saying
that the transport is not unique per-netns. Instead, the transport is
usually per NFS mount, but you can always retrieve a pointer to it
directly in bc_svc_process() from req->rq_xprt.


> moreover, I can do not change sunrpc_net at all,
> I can check in bc_svc_common() which transport uses incoming svc_req
> and provide such flag as new parameter to svc_process_common().

The function or flag used by bc_svc_common() could be added to req-
>rq_xprt->ops as another 'bc_' field and then passed to
svc_process_common() as the parameter.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-24 08:59:48

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/24/18 11:21 AM, Trond Myklebust wrote:
> On Mon, 2018-12-24 at 09:05 +0300, Vasily Averin wrote:
>> On 12/24/18 8:51 AM, Vasily Averin wrote:
>>> On 12/24/18 2:56 AM, Trond Myklebust wrote:
>>>> On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
>>>>> On 12/21/18 4:00 AM, [email protected] wrote:
>>>>>> On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust
>>>>>> wrote:
>>>>>>> No. We don't care about xpt_flags for the back channel
>>>>>>> because
>>>>>>> there is
>>>>>>> no "server transport". The actual transport is stored in
>>>>>>> the
>>>>>>> 'struct
>>>>>>> rpc_rqst', and is the struct rpc_xprt corresponding to the
>>>>>>> client
>>>>>>> socket or RDMA channel.
>>>>>>>
>>>>>>> IOW: All we really need in svc_process_common() is to be
>>>>>>> able to
>>>>>>> run
>>>>>>> rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that can
>>>>>>> be
>>>>>>> passed
>>>>>>> either as a pointer to the struct svc_xprt_ops itself.
>>>>>>
>>>>>> For what it's worth, I'd rather get rid of that op--it's an
>>>>>> awfully
>>>>>> roundabout way just to do "svc_putnl(resv, 0);" in the tcp
>>>>>> case.
>>>>>
>>>>> Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was used
>>>>> ONLY
>>>>> to call
>>>>> svc_tcp_prep_reply_hdr() in svc_process_common() ?
>>>>> And according call for rdma-bc does nothing useful at all?
>>>>>
>>>>> I've just tried to remove svc_create_xprt() from xs_tcp_bc_up()
>>>>> and
>>>>> just
>>>>> provide pointer to svc_tcp_prep_reply_hdr()
>>>>> in svc_process_common()
>>>>> via per-netns sunrpc_net -- and seems it was enough, my
>>>>> testcase
>>>>> worked correctly.
>>>>
>>>> I don't see how that function is related to net namespaces. As
>>>> far as I
>>>> can tell, it only signals whether or not the type of transport
>>>> uses the
>>>> TCP record marking scheme.
>>>
>>> We need to know which kind of transport is used in specified net
>>> namespace,
>>> for example init_ns can use RDMA transport and netns "second" can
>>> use
>>> TCP transport at the same time.
>>> If you do not like an idea to use function pointer as a mark -- ok
>>> I can save only some boolean flag on sunrpc_net, check it in
>>> svc_process_common()
>>> and if it is set -- call svc_tcp_prep_reply_hdr() directly.
>
> I'm not against the idea of using a function pointer, but I'm saying
> that the transport is not unique per-netns. Instead, the transport is
> usually per NFS mount, but you can always retrieve a pointer to it
> directly in bc_svc_process() from req->rq_xprt.

You're right, I was wrong because I was focused on creation of fake transport svc_xprt.
Yes, we cannot use per-netns pointer here.

>> moreover, I can do not change sunrpc_net at all,
>> I can check in bc_svc_common() which transport uses incoming svc_req
>> and provide such flag as new parameter to svc_process_common().
>
> The function or flag used by bc_svc_common() could be added to req-
>> rq_xprt->ops as another 'bc_' field and then passed to
> svc_process_common() as the parameter.

Can I just check rqstp->rq_prot ? It is inherited from incoming svc_req,
and it seems it enough to check its propo, it isn't?

svc_process_common()
...
/* Setup reply header */
if (rqstp->rq_prot == IPPROTO_TCP)
svc_tcp_prep_reply_hdr(rqstp);

2018-12-24 09:53:17

by Trond Myklebust

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On Mon, 2018-12-24 at 11:59 +0300, Vasily Averin wrote:
> On 12/24/18 11:21 AM, Trond Myklebust wrote:
> > On Mon, 2018-12-24 at 09:05 +0300, Vasily Averin wrote:
> > > On 12/24/18 8:51 AM, Vasily Averin wrote:
> > > > On 12/24/18 2:56 AM, Trond Myklebust wrote:
> > > > > On Sat, 2018-12-22 at 20:46 +0300, Vasily Averin wrote:
> > > > > > On 12/21/18 4:00 AM, [email protected] wrote:
> > > > > > > On Tue, Dec 18, 2018 at 02:55:15PM +0000, Trond Myklebust
> > > > > > > wrote:
> > > > > > > > No. We don't care about xpt_flags for the back channel
> > > > > > > > because
> > > > > > > > there is
> > > > > > > > no "server transport". The actual transport is stored
> > > > > > > > in
> > > > > > > > the
> > > > > > > > 'struct
> > > > > > > > rpc_rqst', and is the struct rpc_xprt corresponding to
> > > > > > > > the
> > > > > > > > client
> > > > > > > > socket or RDMA channel.
> > > > > > > >
> > > > > > > > IOW: All we really need in svc_process_common() is to
> > > > > > > > be
> > > > > > > > able to
> > > > > > > > run
> > > > > > > > rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(), and that
> > > > > > > > can
> > > > > > > > be
> > > > > > > > passed
> > > > > > > > either as a pointer to the struct svc_xprt_ops itself.
> > > > > > >
> > > > > > > For what it's worth, I'd rather get rid of that op--it's
> > > > > > > an
> > > > > > > awfully
> > > > > > > roundabout way just to do "svc_putnl(resv, 0);" in the
> > > > > > > tcp
> > > > > > > case.
> > > > > >
> > > > > > Do you mean that svc_create_xprt(serv, "tcp-bc", ...) was
> > > > > > used
> > > > > > ONLY
> > > > > > to call
> > > > > > svc_tcp_prep_reply_hdr() in svc_process_common() ?
> > > > > > And according call for rdma-bc does nothing useful at all?
> > > > > >
> > > > > > I've just tried to remove svc_create_xprt() from
> > > > > > xs_tcp_bc_up()
> > > > > > and
> > > > > > just
> > > > > > provide pointer to svc_tcp_prep_reply_hdr()
> > > > > > in svc_process_common()
> > > > > > via per-netns sunrpc_net -- and seems it was enough, my
> > > > > > testcase
> > > > > > worked correctly.
> > > > >
> > > > > I don't see how that function is related to net namespaces.
> > > > > As
> > > > > far as I
> > > > > can tell, it only signals whether or not the type of
> > > > > transport
> > > > > uses the
> > > > > TCP record marking scheme.
> > > >
> > > > We need to know which kind of transport is used in specified
> > > > net
> > > > namespace,
> > > > for example init_ns can use RDMA transport and netns "second"
> > > > can
> > > > use
> > > > TCP transport at the same time.
> > > > If you do not like an idea to use function pointer as a mark --
> > > > ok
> > > > I can save only some boolean flag on sunrpc_net, check it in
> > > > svc_process_common()
> > > > and if it is set -- call svc_tcp_prep_reply_hdr() directly.
> >
> > I'm not against the idea of using a function pointer, but I'm
> > saying
> > that the transport is not unique per-netns. Instead, the transport
> > is
> > usually per NFS mount, but you can always retrieve a pointer to it
> > directly in bc_svc_process() from req->rq_xprt.
>
> You're right, I was wrong because I was focused on creation of fake
> transport svc_xprt.
> Yes, we cannot use per-netns pointer here.
>
> > > moreover, I can do not change sunrpc_net at all,
> > > I can check in bc_svc_common() which transport uses incoming
> > > svc_req
> > > and provide such flag as new parameter to svc_process_common().
> >
> > The function or flag used by bc_svc_common() could be added to req-
> > > rq_xprt->ops as another 'bc_' field and then passed to
> > svc_process_common() as the parameter.
>
> Can I just check rqstp->rq_prot ? It is inherited from incoming
> svc_req,
> and it seems it enough to check its propo, it isn't?
>
> svc_process_common()
> ...
> /* Setup reply header */
> if (rqstp->rq_prot == IPPROTO_TCP)
> svc_tcp_prep_reply_hdr(rqstp);

Yes. In these days with retpoline slowing down all indirect function
calls, then the above is probably the better solution.

--
Trond Myklebust
Linux NFS client maintainer, Hammerspace
[email protected]


2018-12-24 11:49:08

by Vasily Averin

[permalink] [raw]
Subject: Re: [PATCH 1/4] nfs: use-after-free in svc_process_common()

On 12/24/18 12:53 PM, Trond Myklebust wrote:
> On Mon, 2018-12-24 at 11:59 +0300, Vasily Averin wrote:
>> Can I just check rqstp->rq_prot ? It is inherited from incoming
>> svc_req,
>> and it seems it enough to check its propo, it isn't?
>>
>> svc_process_common()
>> ...
>> /* Setup reply header */
>> if (rqstp->rq_prot == IPPROTO_TCP)
>> svc_tcp_prep_reply_hdr(rqstp);
>
> Yes. In these days with retpoline slowing down all indirect function
> calls, then the above is probably the better solution.

I've submitted v4 patch version with these changes.