For ops with "trivial" replies, nfsd4_encode_operation will shortcut
most of the encoding work and skip to just marshalling up the status.
One of the things it skips is calling op_release. This could cause a
memory leak in the layoutget codepath if there is an error at an
inopportune time.
Have the compound processing engine always call op_release, even when
op_func sets an error in op->status. With this change, we also need
nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
on error to avoid a double free.
Reported-by: Zhi Li <[email protected]>
Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
Signed-off-by: Jeff Layton <[email protected]>
---
fs/nfsd/blocklayout.c | 1 +
fs/nfsd/nfs4xdr.c | 13 +++++++------
2 files changed, 8 insertions(+), 6 deletions(-)
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index 04697f8dc37d..01d7fd108cf3 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
out_free_dev:
kfree(dev);
+ gdp->gd_device = NULL;
return ret;
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e12e5a4ad502..6b675fbdabd0 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
p = xdr_reserve_space(xdr, 8);
if (!p) {
WARN_ON_ONCE(1);
- return;
+ goto release;
}
*p++ = cpu_to_be32(op->opnum);
post_err_offset = xdr->buf->len;
@@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
op->status = encoder(resp, op->status, &op->u);
if (op->status)
trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
- if (opdesc && opdesc->op_release)
- opdesc->op_release(&op->u);
xdr_commit_encode(xdr);
/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
}
status:
*p = op->status;
+release:
+ if (opdesc && opdesc->op_release)
+ opdesc->op_release(&op->u);
}
-/*
- * Encode the reply stored in the stateowner reply cache
- *
+/*
+ * Encode the reply stored in the stateowner reply cache
+ *
* XDR note: do not encode rp->rp_buflen: the buffer contains the
* previously sent already encoded operation.
*/
--
2.39.2
> On Mar 27, 2023, at 6:21 AM, Jeff Layton <[email protected]> wrote:
>
> For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> most of the encoding work and skip to just marshalling up the status.
> One of the things it skips is calling op_release. This could cause a
> memory leak in the layoutget codepath if there is an error at an
> inopportune time.
>
> Have the compound processing engine always call op_release, even when
> op_func sets an error in op->status. With this change, we also need
> nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> on error to avoid a double free.
>
> Reported-by: Zhi Li <[email protected]>
> Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> Signed-off-by: Jeff Layton <[email protected]>
Thanks, Jeff.
May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
compound ops") ?
> ---
> fs/nfsd/blocklayout.c | 1 +
> fs/nfsd/nfs4xdr.c | 13 +++++++------
> 2 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> index 04697f8dc37d..01d7fd108cf3 100644
> --- a/fs/nfsd/blocklayout.c
> +++ b/fs/nfsd/blocklayout.c
> @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
>
> out_free_dev:
> kfree(dev);
> + gdp->gd_device = NULL;
> return ret;
> }
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index e12e5a4ad502..6b675fbdabd0 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> p = xdr_reserve_space(xdr, 8);
> if (!p) {
> WARN_ON_ONCE(1);
> - return;
> + goto release;
> }
> *p++ = cpu_to_be32(op->opnum);
> post_err_offset = xdr->buf->len;
> @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> op->status = encoder(resp, op->status, &op->u);
> if (op->status)
> trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> - if (opdesc && opdesc->op_release)
> - opdesc->op_release(&op->u);
> xdr_commit_encode(xdr);
>
> /* nfsd4_check_resp_size guarantees enough room for error status */
> @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> }
> status:
> *p = op->status;
> +release:
> + if (opdesc && opdesc->op_release)
> + opdesc->op_release(&op->u);
> }
>
> -/*
> - * Encode the reply stored in the stateowner reply cache
> - *
> +/*
> + * Encode the reply stored in the stateowner reply cache
> + *
> * XDR note: do not encode rp->rp_buflen: the buffer contains the
> * previously sent already encoded operation.
> */
> --
> 2.39.2
>
--
Chuck Lever
On Mon, 2023-03-27 at 13:14 +0000, Chuck Lever III wrote:
>
> > On Mar 27, 2023, at 6:21 AM, Jeff Layton <[email protected]> wrote:
> >
> > For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> > most of the encoding work and skip to just marshalling up the status.
> > One of the things it skips is calling op_release. This could cause a
> > memory leak in the layoutget codepath if there is an error at an
> > inopportune time.
> >
> > Have the compound processing engine always call op_release, even when
> > op_func sets an error in op->status. With this change, we also need
> > nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> > on error to avoid a double free.
> >
> > Reported-by: Zhi Li <[email protected]>
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> > Signed-off-by: Jeff Layton <[email protected]>
>
> Thanks, Jeff.
>
> May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
> compound ops") ?
>
>
Sure. It does look like the leaks stretch back at least that far.
> > ---
> > fs/nfsd/blocklayout.c | 1 +
> > fs/nfsd/nfs4xdr.c | 13 +++++++------
> > 2 files changed, 8 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index 04697f8dc37d..01d7fd108cf3 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
> >
> > out_free_dev:
> > kfree(dev);
> > + gdp->gd_device = NULL;
> > return ret;
> > }
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index e12e5a4ad502..6b675fbdabd0 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > p = xdr_reserve_space(xdr, 8);
> > if (!p) {
> > WARN_ON_ONCE(1);
> > - return;
> > + goto release;
> > }
> > *p++ = cpu_to_be32(op->opnum);
> > post_err_offset = xdr->buf->len;
> > @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > op->status = encoder(resp, op->status, &op->u);
> > if (op->status)
> > trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> > - if (opdesc && opdesc->op_release)
> > - opdesc->op_release(&op->u);
> > xdr_commit_encode(xdr);
> >
> > /* nfsd4_check_resp_size guarantees enough room for error status */
> > @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > }
> > status:
> > *p = op->status;
> > +release:
> > + if (opdesc && opdesc->op_release)
> > + opdesc->op_release(&op->u);
> > }
> >
> > -/*
> > - * Encode the reply stored in the stateowner reply cache
> > - *
> > +/*
> > + * Encode the reply stored in the stateowner reply cache
> > + *
> > * XDR note: do not encode rp->rp_buflen: the buffer contains the
> > * previously sent already encoded operation.
> > */
> > --
> > 2.39.2
> >
>
> --
> Chuck Lever
>
>
--
Jeff Layton <[email protected]>
On Mon, 2023-03-27 at 13:14 +0000, Chuck Lever III wrote:
>
> > On Mar 27, 2023, at 6:21 AM, Jeff Layton <[email protected]> wrote:
> >
> > For ops with "trivial" replies, nfsd4_encode_operation will shortcut
> > most of the encoding work and skip to just marshalling up the status.
> > One of the things it skips is calling op_release. This could cause a
> > memory leak in the layoutget codepath if there is an error at an
> > inopportune time.
> >
> > Have the compound processing engine always call op_release, even when
> > op_func sets an error in op->status. With this change, we also need
> > nfsd4_block_get_device_info_scsi to set the gd_device pointer to NULL
> > on error to avoid a double free.
> >
> > Reported-by: Zhi Li <[email protected]>
> > Link: https://bugzilla.redhat.com/show_bug.cgi?id=2181403
> > Signed-off-by: Jeff Layton <[email protected]>
>
> Thanks, Jeff.
>
> May I add: Fixes: 34b1744c91cc ("nfsd4: define ->op_release for
> compound ops") ?
>
I've seen some problems with this patch in testing and I have a fix
forthcoming (once I finish testing it):
The root cause is the OPDESC() function which can walk off the end of
the nfsd4_ops array when passed a large value (like OP_ILLEGAL). I think
we'll want to fix that to do something more sane before merging this
patch.
>
> > ---
> > fs/nfsd/blocklayout.c | 1 +
> > fs/nfsd/nfs4xdr.c | 13 +++++++------
> > 2 files changed, 8 insertions(+), 6 deletions(-)
> >
> > diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
> > index 04697f8dc37d..01d7fd108cf3 100644
> > --- a/fs/nfsd/blocklayout.c
> > +++ b/fs/nfsd/blocklayout.c
> > @@ -297,6 +297,7 @@ nfsd4_block_get_device_info_scsi(struct super_block *sb,
> >
> > out_free_dev:
> > kfree(dev);
> > + gdp->gd_device = NULL;
> > return ret;
> > }
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index e12e5a4ad502..6b675fbdabd0 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -5402,7 +5402,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > p = xdr_reserve_space(xdr, 8);
> > if (!p) {
> > WARN_ON_ONCE(1);
> > - return;
> > + goto release;
> > }
> > *p++ = cpu_to_be32(op->opnum);
> > post_err_offset = xdr->buf->len;
> > @@ -5418,8 +5418,6 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > op->status = encoder(resp, op->status, &op->u);
> > if (op->status)
> > trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
> > - if (opdesc && opdesc->op_release)
> > - opdesc->op_release(&op->u);
> > xdr_commit_encode(xdr);
> >
> > /* nfsd4_check_resp_size guarantees enough room for error status */
> > @@ -5460,11 +5458,14 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> > }
> > status:
> > *p = op->status;
> > +release:
> > + if (opdesc && opdesc->op_release)
> > + opdesc->op_release(&op->u);
> > }
> >
> > -/*
> > - * Encode the reply stored in the stateowner reply cache
> > - *
> > +/*
> > + * Encode the reply stored in the stateowner reply cache
> > + *
> > * XDR note: do not encode rp->rp_buflen: the buffer contains the
> > * previously sent already encoded operation.
> > */
> > --
> > 2.39.2
> >
>
> --
> Chuck Lever
>
>
--
Jeff Layton <[email protected]>