2020-09-08 16:47:40

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v5 2/5] NFSD: Add READ_PLUS data support

From: Anna Schumaker <[email protected]>

This patch adds READ_PLUS support for returning a single
NFS4_CONTENT_DATA segment to the client. This is basically the same as
the READ operation, only with the extra information about data segments.

Signed-off-by: Anna Schumaker <[email protected]>

---
v5: Fix up nfsd4_read_plus_rsize() calculation
---
fs/nfsd/nfs4proc.c | 20 +++++++++++
fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 101 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index eaf50eafa935..0a3df5f10501 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}

+static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = svc_max_payload(rqstp);
+ u32 rlen = min(op->u.read.rd_length, maxcount);
+ /*
+ * Worst case is a single large data segment, so make
+ * sure we have enough room to encode that
+ */
+ u32 seg_len = 1 + 2 + 1;
+
+ return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
+}
+
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = 0, rlen = 0;
@@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_COPY",
.op_rsize_bop = nfsd4_copy_rsize,
},
+ [OP_READ_PLUS] = {
+ .op_func = nfsd4_read,
+ .op_release = nfsd4_read_release,
+ .op_name = "OP_READ_PLUS",
+ .op_rsize_bop = nfsd4_read_plus_rsize,
+ .op_get_currentstateid = nfsd4_get_readstateid,
+ },
[OP_SEEK] = {
.op_func = nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0be194de4888..26d12e3edf33 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
[OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
@@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
p = xdr_encode_hyper(p, os->count);
*p++ = cpu_to_be32(0);
+ return nfserr;
+}
+
+static __be32
+nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
+ struct nfsd4_read *read,
+ unsigned long maxcount, u32 *eof)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct file *file = read->rd_nf->nf_file;
+ int starting_len = xdr->buf->len;
+ __be32 nfserr;
+ __be32 *p, tmp;
+ __be64 tmp64;
+
+ /* Content type, offset, byte count */
+ p = xdr_reserve_space(xdr, 4 + 8 + 4);
+ if (!p)
+ return nfserr_resource;
+
+ read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
+ if (read->rd_vlen < 0)
+ return nfserr_resource;
+
+ nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
+ resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
+ if (nfserr)
+ return nfserr;
+
+ tmp = htonl(NFS4_CONTENT_DATA);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+ tmp64 = cpu_to_be64(read->rd_offset);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
+ tmp = htonl(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+{
+ unsigned long maxcount;
+ struct xdr_stream *xdr = &resp->xdr;
+ struct file *file;
+ int starting_len = xdr->buf->len;
+ int segments = 0;
+ __be32 *p, tmp;
+ u32 eof;
+
+ if (nfserr)
+ return nfserr;
+ file = read->rd_nf->nf_file;
+
+ /* eof flag, segment count */
+ p = xdr_reserve_space(xdr, 4 + 4);
+ if (!p)
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+ maxcount = svc_max_payload(resp->rqstp);
+ maxcount = min_t(unsigned long, maxcount,
+ (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount, read->rd_length);
+
+ eof = read->rd_offset >= i_size_read(file_inode(file));
+ if (!eof) {
+ nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
+ segments++;
+ }
+
+ if (nfserr)
+ xdr_truncate_encode(xdr, starting_len);
+ else {
+ tmp = htonl(eof);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+ tmp = htonl(segments);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+ }

return nfserr;
}
@@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
[OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
--
2.28.0


2020-09-08 19:45:05

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v5 2/5] NFSD: Add READ_PLUS data support

On Tue, Sep 08, 2020 at 12:25:56PM -0400, [email protected] wrote:
> From: Anna Schumaker <[email protected]>
>
> This patch adds READ_PLUS support for returning a single
> NFS4_CONTENT_DATA segment to the client. This is basically the same as
> the READ operation, only with the extra information about data segments.
>
> Signed-off-by: Anna Schumaker <[email protected]>
>
> ---
> v5: Fix up nfsd4_read_plus_rsize() calculation
> ---
> fs/nfsd/nfs4proc.c | 20 +++++++++++
> fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
> 2 files changed, 101 insertions(+), 2 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index eaf50eafa935..0a3df5f10501 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> }
>
> +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + u32 maxcount = svc_max_payload(rqstp);
> + u32 rlen = min(op->u.read.rd_length, maxcount);
> + /*
> + * Worst case is a single large data segment, so make
> + * sure we have enough room to encode that
> + */

After the last patch we allow an unlimited number of segments. So a
zillion 1-byte segments is also possible, and is a worse case.

Possible ways to avoid that kind of thing:

- when encoding, stop and return a short read if the xdr-encoded
result would exceed the limit calculated here.
- round SEEK_HOLE results up to the nearest (say) 512 bytes, and
SEEK_DATA result down to the nearest 512 bytes. May also need
some logic to avoid encoding 0-length segments.
- talk to linux-fsdevel, see if we can get a minimum hole
alignment guarantee from filesystems.

I'm thinking #1 is simplest.

--b.

> + u32 seg_len = 1 + 2 + 1;
> +
> + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
> +}
> +
> static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> {
> u32 maxcount = 0, rlen = 0;
> @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> .op_name = "OP_COPY",
> .op_rsize_bop = nfsd4_copy_rsize,
> },
> + [OP_READ_PLUS] = {
> + .op_func = nfsd4_read,
> + .op_release = nfsd4_read_release,
> + .op_name = "OP_READ_PLUS",
> + .op_rsize_bop = nfsd4_read_plus_rsize,
> + .op_get_currentstateid = nfsd4_get_readstateid,
> + },
> [OP_SEEK] = {
> .op_func = nfsd4_seek,
> .op_name = "OP_SEEK",
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 0be194de4888..26d12e3edf33 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
> [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
> - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
> + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
> [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
> [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
> @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> return nfserr_resource;
> p = xdr_encode_hyper(p, os->count);
> *p++ = cpu_to_be32(0);
> + return nfserr;
> +}
> +
> +static __be32
> +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> + struct nfsd4_read *read,
> + unsigned long maxcount, u32 *eof)
> +{
> + struct xdr_stream *xdr = &resp->xdr;
> + struct file *file = read->rd_nf->nf_file;
> + int starting_len = xdr->buf->len;
> + __be32 nfserr;
> + __be32 *p, tmp;
> + __be64 tmp64;
> +
> + /* Content type, offset, byte count */
> + p = xdr_reserve_space(xdr, 4 + 8 + 4);
> + if (!p)
> + return nfserr_resource;
> +
> + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> + if (read->rd_vlen < 0)
> + return nfserr_resource;
> +
> + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> + if (nfserr)
> + return nfserr;
> +
> + tmp = htonl(NFS4_CONTENT_DATA);
> + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> + tmp64 = cpu_to_be64(read->rd_offset);
> + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> + tmp = htonl(maxcount);
> + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> + return nfs_ok;
> +}
> +
> +static __be32
> +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> + struct nfsd4_read *read)
> +{
> + unsigned long maxcount;
> + struct xdr_stream *xdr = &resp->xdr;
> + struct file *file;
> + int starting_len = xdr->buf->len;
> + int segments = 0;
> + __be32 *p, tmp;
> + u32 eof;
> +
> + if (nfserr)
> + return nfserr;
> + file = read->rd_nf->nf_file;
> +
> + /* eof flag, segment count */
> + p = xdr_reserve_space(xdr, 4 + 4);
> + if (!p)
> + return nfserr_resource;
> + xdr_commit_encode(xdr);
> +
> + maxcount = svc_max_payload(resp->rqstp);
> + maxcount = min_t(unsigned long, maxcount,
> + (xdr->buf->buflen - xdr->buf->len));
> + maxcount = min_t(unsigned long, maxcount, read->rd_length);
> +
> + eof = read->rd_offset >= i_size_read(file_inode(file));
> + if (!eof) {
> + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> + segments++;
> + }
> +
> + if (nfserr)
> + xdr_truncate_encode(xdr, starting_len);
> + else {
> + tmp = htonl(eof);
> + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> + tmp = htonl(segments);
> + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> + }
>
> return nfserr;
> }
> @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
> - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
> + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
> [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
> [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
> --
> 2.28.0
>

2020-09-09 16:56:08

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v5 2/5] NFSD: Add READ_PLUS data support

On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <[email protected]> wrote:
>
> On Tue, Sep 08, 2020 at 12:25:56PM -0400, [email protected] wrote:
> > From: Anna Schumaker <[email protected]>
> >
> > This patch adds READ_PLUS support for returning a single
> > NFS4_CONTENT_DATA segment to the client. This is basically the same as
> > the READ operation, only with the extra information about data segments.
> >
> > Signed-off-by: Anna Schumaker <[email protected]>
> >
> > ---
> > v5: Fix up nfsd4_read_plus_rsize() calculation
> > ---
> > fs/nfsd/nfs4proc.c | 20 +++++++++++
> > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
> > 2 files changed, 101 insertions(+), 2 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index eaf50eafa935..0a3df5f10501 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > }
> >
> > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > +{
> > + u32 maxcount = svc_max_payload(rqstp);
> > + u32 rlen = min(op->u.read.rd_length, maxcount);
> > + /*
> > + * Worst case is a single large data segment, so make
> > + * sure we have enough room to encode that
> > + */
>
> After the last patch we allow an unlimited number of segments. So a
> zillion 1-byte segments is also possible, and is a worse case.
>
> Possible ways to avoid that kind of thing:
>
> - when encoding, stop and return a short read if the xdr-encoded
> result would exceed the limit calculated here.

Doesn't this happen automatically through calls to xdr_reserve_space()?

> - round SEEK_HOLE results up to the nearest (say) 512 bytes, and
> SEEK_DATA result down to the nearest 512 bytes. May also need
> some logic to avoid encoding 0-length segments.
> - talk to linux-fsdevel, see if we can get a minimum hole
> alignment guarantee from filesystems.
>
> I'm thinking #1 is simplest.
>
> --b.
>
> > + u32 seg_len = 1 + 2 + 1;
> > +
> > + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > +}
> > +
> > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > {
> > u32 maxcount = 0, rlen = 0;
> > @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> > .op_name = "OP_COPY",
> > .op_rsize_bop = nfsd4_copy_rsize,
> > },
> > + [OP_READ_PLUS] = {
> > + .op_func = nfsd4_read,
> > + .op_release = nfsd4_read_release,
> > + .op_name = "OP_READ_PLUS",
> > + .op_rsize_bop = nfsd4_read_plus_rsize,
> > + .op_get_currentstateid = nfsd4_get_readstateid,
> > + },
> > [OP_SEEK] = {
> > .op_func = nfsd4_seek,
> > .op_name = "OP_SEEK",
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 0be194de4888..26d12e3edf33 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
> > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
> > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
> > - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
> > + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
> > [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
> > [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
> > [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
> > @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> > return nfserr_resource;
> > p = xdr_encode_hyper(p, os->count);
> > *p++ = cpu_to_be32(0);
> > + return nfserr;
> > +}
> > +
> > +static __be32
> > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > + struct nfsd4_read *read,
> > + unsigned long maxcount, u32 *eof)
> > +{
> > + struct xdr_stream *xdr = &resp->xdr;
> > + struct file *file = read->rd_nf->nf_file;
> > + int starting_len = xdr->buf->len;
> > + __be32 nfserr;
> > + __be32 *p, tmp;
> > + __be64 tmp64;
> > +
> > + /* Content type, offset, byte count */
> > + p = xdr_reserve_space(xdr, 4 + 8 + 4);
> > + if (!p)
> > + return nfserr_resource;
> > +
> > + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> > + if (read->rd_vlen < 0)
> > + return nfserr_resource;
> > +
> > + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> > + if (nfserr)
> > + return nfserr;
> > +
> > + tmp = htonl(NFS4_CONTENT_DATA);
> > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > + tmp64 = cpu_to_be64(read->rd_offset);
> > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> > + tmp = htonl(maxcount);
> > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> > + return nfs_ok;
> > +}
> > +
> > +static __be32
> > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > + struct nfsd4_read *read)
> > +{
> > + unsigned long maxcount;
> > + struct xdr_stream *xdr = &resp->xdr;
> > + struct file *file;
> > + int starting_len = xdr->buf->len;
> > + int segments = 0;
> > + __be32 *p, tmp;
> > + u32 eof;
> > +
> > + if (nfserr)
> > + return nfserr;
> > + file = read->rd_nf->nf_file;
> > +
> > + /* eof flag, segment count */
> > + p = xdr_reserve_space(xdr, 4 + 4);
> > + if (!p)
> > + return nfserr_resource;
> > + xdr_commit_encode(xdr);
> > +
> > + maxcount = svc_max_payload(resp->rqstp);
> > + maxcount = min_t(unsigned long, maxcount,
> > + (xdr->buf->buflen - xdr->buf->len));
> > + maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > +
> > + eof = read->rd_offset >= i_size_read(file_inode(file));
> > + if (!eof) {
> > + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > + segments++;
> > + }
> > +
> > + if (nfserr)
> > + xdr_truncate_encode(xdr, starting_len);
> > + else {
> > + tmp = htonl(eof);
> > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > + tmp = htonl(segments);
> > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > + }
> >
> > return nfserr;
> > }
> > @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
> > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
> > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
> > - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
> > + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
> > [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
> > [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
> > [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
> > --
> > 2.28.0
> >
>

2020-09-09 20:26:04

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v5 2/5] NFSD: Add READ_PLUS data support

On Wed, Sep 09, 2020 at 12:53:18PM -0400, Anna Schumaker wrote:
> On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <[email protected]> wrote:
> >
> > On Tue, Sep 08, 2020 at 12:25:56PM -0400, [email protected] wrote:
> > > From: Anna Schumaker <[email protected]>
> > >
> > > This patch adds READ_PLUS support for returning a single
> > > NFS4_CONTENT_DATA segment to the client. This is basically the same as
> > > the READ operation, only with the extra information about data segments.
> > >
> > > Signed-off-by: Anna Schumaker <[email protected]>
> > >
> > > ---
> > > v5: Fix up nfsd4_read_plus_rsize() calculation
> > > ---
> > > fs/nfsd/nfs4proc.c | 20 +++++++++++
> > > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
> > > 2 files changed, 101 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > index eaf50eafa935..0a3df5f10501 100644
> > > --- a/fs/nfsd/nfs4proc.c
> > > +++ b/fs/nfsd/nfs4proc.c
> > > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > > }
> > >
> > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > > +{
> > > + u32 maxcount = svc_max_payload(rqstp);
> > > + u32 rlen = min(op->u.read.rd_length, maxcount);
> > > + /*
> > > + * Worst case is a single large data segment, so make
> > > + * sure we have enough room to encode that
> > > + */
> >
> > After the last patch we allow an unlimited number of segments. So a
> > zillion 1-byte segments is also possible, and is a worse case.
> >
> > Possible ways to avoid that kind of thing:
> >
> > - when encoding, stop and return a short read if the xdr-encoded
> > result would exceed the limit calculated here.
>
> Doesn't this happen automatically through calls to xdr_reserve_space()?

No, xdr_reserve_space() will keep us from running out of buffer
completely, but it won't check that ops come in under the estimates made
in read_plus_rsize().

--b.

>
> > - round SEEK_HOLE results up to the nearest (say) 512 bytes, and
> > SEEK_DATA result down to the nearest 512 bytes. May also need
> > some logic to avoid encoding 0-length segments.
> > - talk to linux-fsdevel, see if we can get a minimum hole
> > alignment guarantee from filesystems.
> >
> > I'm thinking #1 is simplest.
> >
> > --b.
> >
> > > + u32 seg_len = 1 + 2 + 1;
> > > +
> > > + return (op_encode_hdr_size + 2 + seg_len + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > > +}
> > > +
> > > static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > > {
> > > u32 maxcount = 0, rlen = 0;
> > > @@ -3163,6 +3176,13 @@ static const struct nfsd4_operation nfsd4_ops[] = {
> > > .op_name = "OP_COPY",
> > > .op_rsize_bop = nfsd4_copy_rsize,
> > > },
> > > + [OP_READ_PLUS] = {
> > > + .op_func = nfsd4_read,
> > > + .op_release = nfsd4_read_release,
> > > + .op_name = "OP_READ_PLUS",
> > > + .op_rsize_bop = nfsd4_read_plus_rsize,
> > > + .op_get_currentstateid = nfsd4_get_readstateid,
> > > + },
> > > [OP_SEEK] = {
> > > .op_func = nfsd4_seek,
> > > .op_name = "OP_SEEK",
> > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > > index 0be194de4888..26d12e3edf33 100644
> > > --- a/fs/nfsd/nfs4xdr.c
> > > +++ b/fs/nfsd/nfs4xdr.c
> > > @@ -2173,7 +2173,7 @@ static const nfsd4_dec nfsd4_dec_ops[] = {
> > > [OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
> > > [OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_offload_status,
> > > [OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_offload_status,
> > > - [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
> > > + [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
> > > [OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
> > > [OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
> > > [OP_CLONE] = (nfsd4_dec)nfsd4_decode_clone,
> > > @@ -4597,6 +4597,85 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > return nfserr_resource;
> > > p = xdr_encode_hyper(p, os->count);
> > > *p++ = cpu_to_be32(0);
> > > + return nfserr;
> > > +}
> > > +
> > > +static __be32
> > > +nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > > + struct nfsd4_read *read,
> > > + unsigned long maxcount, u32 *eof)
> > > +{
> > > + struct xdr_stream *xdr = &resp->xdr;
> > > + struct file *file = read->rd_nf->nf_file;
> > > + int starting_len = xdr->buf->len;
> > > + __be32 nfserr;
> > > + __be32 *p, tmp;
> > > + __be64 tmp64;
> > > +
> > > + /* Content type, offset, byte count */
> > > + p = xdr_reserve_space(xdr, 4 + 8 + 4);
> > > + if (!p)
> > > + return nfserr_resource;
> > > +
> > > + read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, maxcount);
> > > + if (read->rd_vlen < 0)
> > > + return nfserr_resource;
> > > +
> > > + nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > > + resp->rqstp->rq_vec, read->rd_vlen, &maxcount, eof);
> > > + if (nfserr)
> > > + return nfserr;
> > > +
> > > + tmp = htonl(NFS4_CONTENT_DATA);
> > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > > + tmp64 = cpu_to_be64(read->rd_offset);
> > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> > > + tmp = htonl(maxcount);
> > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> > > + return nfs_ok;
> > > +}
> > > +
> > > +static __be32
> > > +nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > > + struct nfsd4_read *read)
> > > +{
> > > + unsigned long maxcount;
> > > + struct xdr_stream *xdr = &resp->xdr;
> > > + struct file *file;
> > > + int starting_len = xdr->buf->len;
> > > + int segments = 0;
> > > + __be32 *p, tmp;
> > > + u32 eof;
> > > +
> > > + if (nfserr)
> > > + return nfserr;
> > > + file = read->rd_nf->nf_file;
> > > +
> > > + /* eof flag, segment count */
> > > + p = xdr_reserve_space(xdr, 4 + 4);
> > > + if (!p)
> > > + return nfserr_resource;
> > > + xdr_commit_encode(xdr);
> > > +
> > > + maxcount = svc_max_payload(resp->rqstp);
> > > + maxcount = min_t(unsigned long, maxcount,
> > > + (xdr->buf->buflen - xdr->buf->len));
> > > + maxcount = min_t(unsigned long, maxcount, read->rd_length);
> > > +
> > > + eof = read->rd_offset >= i_size_read(file_inode(file));
> > > + if (!eof) {
> > > + nfserr = nfsd4_encode_read_plus_data(resp, read, maxcount, &eof);
> > > + segments++;
> > > + }
> > > +
> > > + if (nfserr)
> > > + xdr_truncate_encode(xdr, starting_len);
> > > + else {
> > > + tmp = htonl(eof);
> > > + write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > > + tmp = htonl(segments);
> > > + write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > > + }
> > >
> > > return nfserr;
> > > }
> > > @@ -4974,7 +5053,7 @@ static const nfsd4_enc nfsd4_enc_ops[] = {
> > > [OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
> > > [OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
> > > [OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_offload_status,
> > > - [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
> > > + [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
> > > [OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
> > > [OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
> > > [OP_CLONE] = (nfsd4_enc)nfsd4_encode_noop,
> > > --
> > > 2.28.0
> > >
> >

2020-09-09 20:51:16

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v5 2/5] NFSD: Add READ_PLUS data support

On Wed, Sep 09, 2020 at 04:25:34PM -0400, bfields wrote:
> On Wed, Sep 09, 2020 at 12:53:18PM -0400, Anna Schumaker wrote:
> > On Tue, Sep 8, 2020 at 3:42 PM J. Bruce Fields <[email protected]> wrote:
> > >
> > > On Tue, Sep 08, 2020 at 12:25:56PM -0400, [email protected] wrote:
> > > > From: Anna Schumaker <[email protected]>
> > > >
> > > > This patch adds READ_PLUS support for returning a single
> > > > NFS4_CONTENT_DATA segment to the client. This is basically the same as
> > > > the READ operation, only with the extra information about data segments.
> > > >
> > > > Signed-off-by: Anna Schumaker <[email protected]>
> > > >
> > > > ---
> > > > v5: Fix up nfsd4_read_plus_rsize() calculation
> > > > ---
> > > > fs/nfsd/nfs4proc.c | 20 +++++++++++
> > > > fs/nfsd/nfs4xdr.c | 83 ++++++++++++++++++++++++++++++++++++++++++++--
> > > > 2 files changed, 101 insertions(+), 2 deletions(-)
> > > >
> > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > > > index eaf50eafa935..0a3df5f10501 100644
> > > > --- a/fs/nfsd/nfs4proc.c
> > > > +++ b/fs/nfsd/nfs4proc.c
> > > > @@ -2591,6 +2591,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > > > return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
> > > > }
> > > >
> > > > +static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> > > > +{
> > > > + u32 maxcount = svc_max_payload(rqstp);
> > > > + u32 rlen = min(op->u.read.rd_length, maxcount);
> > > > + /*
> > > > + * Worst case is a single large data segment, so make
> > > > + * sure we have enough room to encode that
> > > > + */
> > >
> > > After the last patch we allow an unlimited number of segments. So a
> > > zillion 1-byte segments is also possible, and is a worse case.
> > >
> > > Possible ways to avoid that kind of thing:
> > >
> > > - when encoding, stop and return a short read if the xdr-encoded
> > > result would exceed the limit calculated here.
> >
> > Doesn't this happen automatically through calls to xdr_reserve_space()?
>
> No, xdr_reserve_space() will keep us from running out of buffer
> completely, but it won't check that ops come in under the estimates made
> in read_plus_rsize().

If it's easier, another option might be just: if we ever get a "small"
hole (say, less than 512 bytes), just give up and encode the rest of the
result as a single big data segment.

--b.