2022-09-01 18:34:37

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 0/1] NFSD: Simplify READ_PLUS

From: Anna Schumaker <[email protected]>

When we left off with READ_PLUS, Chuck had suggested reverting the
server to reply with a single NFS4_CONTENT_DATA segment essentially
mimicing how the READ operation behaves. Then, a future sparse read
function can be added and the server modified to support it without
needing to rip out the old READ_PLUS code at the same time.

This patch takes that first step. I was even able to re-use the
nfsd4_encode_readv() function to remove some duplicate code.

Chuck, I tried to add in sparse read support by adding this extra
change. Unfortunately it leads to a bunch of new failing xfstests. Do
you have any thoughts about what might be going on? Is the patch okay
without the splice support?

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index adbff7737c14..e21e6cfd1c6d 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4733,6 +4733,7 @@ static __be32
nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
struct nfsd4_read *read)
{
+ bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
unsigned long maxcount;
struct xdr_stream *xdr = resp->xdr;
struct file *file = read->rd_nf->nf_file;
@@ -4747,7 +4748,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
maxcount = min_t(unsigned long, read->rd_length,
(xdr->buf->buflen - xdr->buf->len));

- nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
+ if (file->f_op->splice_read && splice_ok)
+ nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount)
if (nfserr)
return nfserr;


Thanks,
Anna


Anna Schumaker (1):
NFSD: Simplify READ_PLUS

fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
1 file changed, 20 insertions(+), 102 deletions(-)

--
2.37.2


2022-09-01 18:34:37

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 1/1] NFSD: Simplify READ_PLUS

From: Anna Schumaker <[email protected]>

Change the implementation to return a single DATA segment covering the
requested read range.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
1 file changed, 20 insertions(+), 102 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1e9690a061ec..adbff7737c14 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -4731,79 +4731,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof,
- loff_t *pos)
+ struct nfsd4_read *read)
{
+ unsigned long maxcount;
struct xdr_stream *xdr = resp->xdr;
struct file *file = read->rd_nf->nf_file;
- int starting_len = xdr->buf->len;
- loff_t hole_pos;
__be32 nfserr;
- __be32 *p, tmp;
- __be64 tmp64;
-
- hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- if (hole_pos > read->rd_offset)
- *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
- *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
+ __be32 *p;

/* Content type, offset, byte count */
p = xdr_reserve_space(xdr, 4 + 8 + 4);
if (!p)
return nfserr_resource;

- read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
- if (read->rd_vlen < 0)
- return nfserr_resource;
+ maxcount = min_t(unsigned long, read->rd_length,
+ (xdr->buf->buflen - xdr->buf->len));

- nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
- resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
+ nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));

- tmp = htonl(NFS4_CONTENT_DATA);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp64 = cpu_to_be64(read->rd_offset);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
- tmp = htonl(*maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
-
- tmp = xdr_zero;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
- xdr_pad_size(*maxcount));
- return nfs_ok;
-}
-
-static __be32
-nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
- struct nfsd4_read *read,
- unsigned long *maxcount, u32 *eof)
-{
- struct file *file = read->rd_nf->nf_file;
- loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
- loff_t f_size = i_size_read(file_inode(file));
- unsigned long count;
- __be32 *p;
-
- if (data_pos == -ENXIO)
- data_pos = f_size;
- else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
- return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
- count = data_pos - read->rd_offset;
-
- /* Content type, offset, byte count */
- p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
- if (!p)
- return nfserr_resource;
-
- *p++ = htonl(NFS4_CONTENT_HOLE);
+ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
p = xdr_encode_hyper(p, read->rd_offset);
- p = xdr_encode_hyper(p, count);
+ *p = cpu_to_be32(read->rd_length);

- *eof = (read->rd_offset + count) >= f_size;
- *maxcount = min_t(unsigned long, count, *maxcount);
return nfs_ok;
}

@@ -4811,20 +4762,14 @@ static __be32
nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_read *read)
{
- unsigned long maxcount, count;
struct xdr_stream *xdr = resp->xdr;
- struct file *file;
+ struct file *file = read->rd_nf->nf_file;
int starting_len = xdr->buf->len;
- int last_segment = xdr->buf->len;
int segments = 0;
- __be32 *p, tmp;
- bool is_data;
- loff_t pos;
- u32 eof;
+ __be32 *p;

if (nfserr)
return nfserr;
- file = read->rd_nf->nf_file;

/* eof flag, segment count */
p = xdr_reserve_space(xdr, 4 + 4);
@@ -4832,48 +4777,21 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
return nfserr_resource;
xdr_commit_encode(xdr);

- maxcount = min_t(unsigned long, read->rd_length,
- (xdr->buf->buflen - xdr->buf->len));
- count = maxcount;
-
- eof = read->rd_offset >= i_size_read(file_inode(file));
- if (eof)
+ read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
+ if (read->rd_eof)
goto out;

- pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
- is_data = pos > read->rd_offset;
-
- while (count > 0 && !eof) {
- maxcount = count;
- if (is_data)
- nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
- segments == 0 ? &pos : NULL);
- else
- nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
- if (nfserr)
- goto out;
- count -= maxcount;
- read->rd_offset += maxcount;
- is_data = !is_data;
- last_segment = xdr->buf->len;
- segments++;
- }
-
-out:
- if (nfserr && segments == 0)
+ nfserr = nfsd4_encode_read_plus_data(resp, read);
+ if (nfserr) {
xdr_truncate_encode(xdr, starting_len);
- else {
- if (nfserr) {
- xdr_truncate_encode(xdr, last_segment);
- nfserr = nfs_ok;
- eof = 0;
- }
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp = htonl(segments);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
+ return nfserr;
}

+ segments++;
+
+out:
+ p = xdr_encode_bool(p, read->rd_eof);
+ *p = cpu_to_be32(segments);
return nfserr;
}

--
2.37.2

2022-09-01 19:07:19

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 1/1] NFSD: Simplify READ_PLUS

Good to see this! I'll study it over the next few days.


> On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
>
> From: Anna Schumaker <[email protected]>
>
> Change the implementation to return a single DATA segment covering the
> requested read range.

The discussion in your cover letter should go in this patch
description. A good patch description explains "why"; the diff
below already explains "what".

I harp on that because the patch description is important
information that I often consult when conducting archaeology
during troubleshooting. "Why the f... did we do that?"


> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
> 1 file changed, 20 insertions(+), 102 deletions(-)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 1e9690a061ec..adbff7737c14 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4731,79 +4731,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
>
> static __be32
> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> - struct nfsd4_read *read,
> - unsigned long *maxcount, u32 *eof,
> - loff_t *pos)
> + struct nfsd4_read *read)
> {
> + unsigned long maxcount;
> struct xdr_stream *xdr = resp->xdr;
> struct file *file = read->rd_nf->nf_file;
> - int starting_len = xdr->buf->len;
> - loff_t hole_pos;
> __be32 nfserr;
> - __be32 *p, tmp;
> - __be64 tmp64;
> -
> - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> - if (hole_pos > read->rd_offset)
> - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
> - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
> + __be32 *p;
>
> /* Content type, offset, byte count */
> p = xdr_reserve_space(xdr, 4 + 8 + 4);
> if (!p)
> return nfserr_resource;
>
> - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
> - if (read->rd_vlen < 0)
> - return nfserr_resource;
> + maxcount = min_t(unsigned long, read->rd_length,
> + (xdr->buf->buflen - xdr->buf->len));
>
> - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> if (nfserr)
> return nfserr;
> - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
>
> - tmp = htonl(NFS4_CONTENT_DATA);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> - tmp64 = cpu_to_be64(read->rd_offset);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> - tmp = htonl(*maxcount);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> -
> - tmp = xdr_zero;
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
> - xdr_pad_size(*maxcount));
> - return nfs_ok;
> -}
> -
> -static __be32
> -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> - struct nfsd4_read *read,
> - unsigned long *maxcount, u32 *eof)
> -{
> - struct file *file = read->rd_nf->nf_file;
> - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> - loff_t f_size = i_size_read(file_inode(file));
> - unsigned long count;
> - __be32 *p;
> -
> - if (data_pos == -ENXIO)
> - data_pos = f_size;
> - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
> - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
> - count = data_pos - read->rd_offset;
> -
> - /* Content type, offset, byte count */
> - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
> - if (!p)
> - return nfserr_resource;
> -
> - *p++ = htonl(NFS4_CONTENT_HOLE);
> + *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
> p = xdr_encode_hyper(p, read->rd_offset);
> - p = xdr_encode_hyper(p, count);
> + *p = cpu_to_be32(read->rd_length);
>
> - *eof = (read->rd_offset + count) >= f_size;
> - *maxcount = min_t(unsigned long, count, *maxcount);
> return nfs_ok;
> }
>
> @@ -4811,20 +4762,14 @@ static __be32
> nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> struct nfsd4_read *read)
> {
> - unsigned long maxcount, count;
> struct xdr_stream *xdr = resp->xdr;
> - struct file *file;
> + struct file *file = read->rd_nf->nf_file;
> int starting_len = xdr->buf->len;
> - int last_segment = xdr->buf->len;
> int segments = 0;
> - __be32 *p, tmp;
> - bool is_data;
> - loff_t pos;
> - u32 eof;
> + __be32 *p;
>
> if (nfserr)
> return nfserr;
> - file = read->rd_nf->nf_file;
>
> /* eof flag, segment count */
> p = xdr_reserve_space(xdr, 4 + 4);
> @@ -4832,48 +4777,21 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> return nfserr_resource;
> xdr_commit_encode(xdr);
>
> - maxcount = min_t(unsigned long, read->rd_length,
> - (xdr->buf->buflen - xdr->buf->len));
> - count = maxcount;
> -
> - eof = read->rd_offset >= i_size_read(file_inode(file));
> - if (eof)
> + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
> + if (read->rd_eof)
> goto out;
>
> - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> - is_data = pos > read->rd_offset;
> -
> - while (count > 0 && !eof) {
> - maxcount = count;
> - if (is_data)
> - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
> - segments == 0 ? &pos : NULL);
> - else
> - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> - if (nfserr)
> - goto out;
> - count -= maxcount;
> - read->rd_offset += maxcount;
> - is_data = !is_data;
> - last_segment = xdr->buf->len;
> - segments++;
> - }
> -
> -out:
> - if (nfserr && segments == 0)
> + nfserr = nfsd4_encode_read_plus_data(resp, read);
> + if (nfserr) {
> xdr_truncate_encode(xdr, starting_len);
> - else {
> - if (nfserr) {
> - xdr_truncate_encode(xdr, last_segment);
> - nfserr = nfs_ok;
> - eof = 0;
> - }
> - tmp = htonl(eof);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> - tmp = htonl(segments);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> + return nfserr;
> }
>
> + segments++;
> +
> +out:
> + p = xdr_encode_bool(p, read->rd_eof);
> + *p = cpu_to_be32(segments);
> return nfserr;
> }
>
> --
> 2.37.2
>

--
Chuck Lever



2022-09-01 19:55:30

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 1/1] NFSD: Simplify READ_PLUS

On Thu, Sep 1, 2022 at 3:05 PM Chuck Lever III <[email protected]> wrote:
>
> Good to see this! I'll study it over the next few days.
>
>
> > On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
> >
> > From: Anna Schumaker <[email protected]>
> >
> > Change the implementation to return a single DATA segment covering the
> > requested read range.
>
> The discussion in your cover letter should go in this patch
> description. A good patch description explains "why"; the diff
> below already explains "what".
>
> I harp on that because the patch description is important
> information that I often consult when conducting archaeology
> during troubleshooting. "Why the f... did we do that?"

Makes sense! Do you want me to resubmit now as a v2 with some of this
moved over to the patch description?

Anna
>
>
> > Signed-off-by: Anna Schumaker <[email protected]>
> > ---
> > fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
> > 1 file changed, 20 insertions(+), 102 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 1e9690a061ec..adbff7737c14 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -4731,79 +4731,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
> >
> > static __be32
> > nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > - struct nfsd4_read *read,
> > - unsigned long *maxcount, u32 *eof,
> > - loff_t *pos)
> > + struct nfsd4_read *read)
> > {
> > + unsigned long maxcount;
> > struct xdr_stream *xdr = resp->xdr;
> > struct file *file = read->rd_nf->nf_file;
> > - int starting_len = xdr->buf->len;
> > - loff_t hole_pos;
> > __be32 nfserr;
> > - __be32 *p, tmp;
> > - __be64 tmp64;
> > -
> > - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> > - if (hole_pos > read->rd_offset)
> > - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
> > - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
> > + __be32 *p;
> >
> > /* Content type, offset, byte count */
> > p = xdr_reserve_space(xdr, 4 + 8 + 4);
> > if (!p)
> > return nfserr_resource;
> >
> > - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
> > - if (read->rd_vlen < 0)
> > - return nfserr_resource;
> > + maxcount = min_t(unsigned long, read->rd_length,
> > + (xdr->buf->buflen - xdr->buf->len));
> >
> > - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> > - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
> > + nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> > if (nfserr)
> > return nfserr;
> > - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
> >
> > - tmp = htonl(NFS4_CONTENT_DATA);
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > - tmp64 = cpu_to_be64(read->rd_offset);
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> > - tmp = htonl(*maxcount);
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> > -
> > - tmp = xdr_zero;
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
> > - xdr_pad_size(*maxcount));
> > - return nfs_ok;
> > -}
> > -
> > -static __be32
> > -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> > - struct nfsd4_read *read,
> > - unsigned long *maxcount, u32 *eof)
> > -{
> > - struct file *file = read->rd_nf->nf_file;
> > - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> > - loff_t f_size = i_size_read(file_inode(file));
> > - unsigned long count;
> > - __be32 *p;
> > -
> > - if (data_pos == -ENXIO)
> > - data_pos = f_size;
> > - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
> > - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
> > - count = data_pos - read->rd_offset;
> > -
> > - /* Content type, offset, byte count */
> > - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
> > - if (!p)
> > - return nfserr_resource;
> > -
> > - *p++ = htonl(NFS4_CONTENT_HOLE);
> > + *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
> > p = xdr_encode_hyper(p, read->rd_offset);
> > - p = xdr_encode_hyper(p, count);
> > + *p = cpu_to_be32(read->rd_length);
> >
> > - *eof = (read->rd_offset + count) >= f_size;
> > - *maxcount = min_t(unsigned long, count, *maxcount);
> > return nfs_ok;
> > }
> >
> > @@ -4811,20 +4762,14 @@ static __be32
> > nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > struct nfsd4_read *read)
> > {
> > - unsigned long maxcount, count;
> > struct xdr_stream *xdr = resp->xdr;
> > - struct file *file;
> > + struct file *file = read->rd_nf->nf_file;
> > int starting_len = xdr->buf->len;
> > - int last_segment = xdr->buf->len;
> > int segments = 0;
> > - __be32 *p, tmp;
> > - bool is_data;
> > - loff_t pos;
> > - u32 eof;
> > + __be32 *p;
> >
> > if (nfserr)
> > return nfserr;
> > - file = read->rd_nf->nf_file;
> >
> > /* eof flag, segment count */
> > p = xdr_reserve_space(xdr, 4 + 4);
> > @@ -4832,48 +4777,21 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> > return nfserr_resource;
> > xdr_commit_encode(xdr);
> >
> > - maxcount = min_t(unsigned long, read->rd_length,
> > - (xdr->buf->buflen - xdr->buf->len));
> > - count = maxcount;
> > -
> > - eof = read->rd_offset >= i_size_read(file_inode(file));
> > - if (eof)
> > + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
> > + if (read->rd_eof)
> > goto out;
> >
> > - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> > - is_data = pos > read->rd_offset;
> > -
> > - while (count > 0 && !eof) {
> > - maxcount = count;
> > - if (is_data)
> > - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
> > - segments == 0 ? &pos : NULL);
> > - else
> > - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> > - if (nfserr)
> > - goto out;
> > - count -= maxcount;
> > - read->rd_offset += maxcount;
> > - is_data = !is_data;
> > - last_segment = xdr->buf->len;
> > - segments++;
> > - }
> > -
> > -out:
> > - if (nfserr && segments == 0)
> > + nfserr = nfsd4_encode_read_plus_data(resp, read);
> > + if (nfserr) {
> > xdr_truncate_encode(xdr, starting_len);
> > - else {
> > - if (nfserr) {
> > - xdr_truncate_encode(xdr, last_segment);
> > - nfserr = nfs_ok;
> > - eof = 0;
> > - }
> > - tmp = htonl(eof);
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> > - tmp = htonl(segments);
> > - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> > + return nfserr;
> > }
> >
> > + segments++;
> > +
> > +out:
> > + p = xdr_encode_bool(p, read->rd_eof);
> > + *p = cpu_to_be32(segments);
> > return nfserr;
> > }
> >
> > --
> > 2.37.2
> >
>
> --
> Chuck Lever
>
>
>

2022-09-01 19:58:03

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 1/1] NFSD: Simplify READ_PLUS



> On Sep 1, 2022, at 3:44 PM, Anna Schumaker <[email protected]> wrote:
>
> On Thu, Sep 1, 2022 at 3:05 PM Chuck Lever III <[email protected]> wrote:
>>
>> Good to see this! I'll study it over the next few days.
>>
>>
>>> On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
>>>
>>> From: Anna Schumaker <[email protected]>
>>>
>>> Change the implementation to return a single DATA segment covering the
>>> requested read range.
>>
>> The discussion in your cover letter should go in this patch
>> description. A good patch description explains "why"; the diff
>> below already explains "what".
>>
>> I harp on that because the patch description is important
>> information that I often consult when conducting archaeology
>> during troubleshooting. "Why the f... did we do that?"
>
> Makes sense! Do you want me to resubmit now as a v2 with some of this
> moved over to the patch description?

Let me have a careful look at the diff -- give me a couple days --
so you can incorporate any review comments in v2.

Also I'll try to answer the splice question you posed in 0/1.


> Anna
>>
>>
>>> Signed-off-by: Anna Schumaker <[email protected]>
>>> ---
>>> fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
>>> 1 file changed, 20 insertions(+), 102 deletions(-)
>>>
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index 1e9690a061ec..adbff7737c14 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -4731,79 +4731,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
>>>
>>> static __be32
>>> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>>> - struct nfsd4_read *read,
>>> - unsigned long *maxcount, u32 *eof,
>>> - loff_t *pos)
>>> + struct nfsd4_read *read)
>>> {
>>> + unsigned long maxcount;
>>> struct xdr_stream *xdr = resp->xdr;
>>> struct file *file = read->rd_nf->nf_file;
>>> - int starting_len = xdr->buf->len;
>>> - loff_t hole_pos;
>>> __be32 nfserr;
>>> - __be32 *p, tmp;
>>> - __be64 tmp64;
>>> -
>>> - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
>>> - if (hole_pos > read->rd_offset)
>>> - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
>>> - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
>>> + __be32 *p;
>>>
>>> /* Content type, offset, byte count */
>>> p = xdr_reserve_space(xdr, 4 + 8 + 4);
>>> if (!p)
>>> return nfserr_resource;
>>>
>>> - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
>>> - if (read->rd_vlen < 0)
>>> - return nfserr_resource;
>>> + maxcount = min_t(unsigned long, read->rd_length,
>>> + (xdr->buf->buflen - xdr->buf->len));
>>>
>>> - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
>>> - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
>>> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>>> if (nfserr)
>>> return nfserr;
>>> - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
>>>
>>> - tmp = htonl(NFS4_CONTENT_DATA);
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
>>> - tmp64 = cpu_to_be64(read->rd_offset);
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
>>> - tmp = htonl(*maxcount);
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
>>> -
>>> - tmp = xdr_zero;
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
>>> - xdr_pad_size(*maxcount));
>>> - return nfs_ok;
>>> -}
>>> -
>>> -static __be32
>>> -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
>>> - struct nfsd4_read *read,
>>> - unsigned long *maxcount, u32 *eof)
>>> -{
>>> - struct file *file = read->rd_nf->nf_file;
>>> - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
>>> - loff_t f_size = i_size_read(file_inode(file));
>>> - unsigned long count;
>>> - __be32 *p;
>>> -
>>> - if (data_pos == -ENXIO)
>>> - data_pos = f_size;
>>> - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
>>> - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
>>> - count = data_pos - read->rd_offset;
>>> -
>>> - /* Content type, offset, byte count */
>>> - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
>>> - if (!p)
>>> - return nfserr_resource;
>>> -
>>> - *p++ = htonl(NFS4_CONTENT_HOLE);
>>> + *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
>>> p = xdr_encode_hyper(p, read->rd_offset);
>>> - p = xdr_encode_hyper(p, count);
>>> + *p = cpu_to_be32(read->rd_length);
>>>
>>> - *eof = (read->rd_offset + count) >= f_size;
>>> - *maxcount = min_t(unsigned long, count, *maxcount);
>>> return nfs_ok;
>>> }
>>>
>>> @@ -4811,20 +4762,14 @@ static __be32
>>> nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>>> struct nfsd4_read *read)
>>> {
>>> - unsigned long maxcount, count;
>>> struct xdr_stream *xdr = resp->xdr;
>>> - struct file *file;
>>> + struct file *file = read->rd_nf->nf_file;
>>> int starting_len = xdr->buf->len;
>>> - int last_segment = xdr->buf->len;
>>> int segments = 0;
>>> - __be32 *p, tmp;
>>> - bool is_data;
>>> - loff_t pos;
>>> - u32 eof;
>>> + __be32 *p;
>>>
>>> if (nfserr)
>>> return nfserr;
>>> - file = read->rd_nf->nf_file;
>>>
>>> /* eof flag, segment count */
>>> p = xdr_reserve_space(xdr, 4 + 4);
>>> @@ -4832,48 +4777,21 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
>>> return nfserr_resource;
>>> xdr_commit_encode(xdr);
>>>
>>> - maxcount = min_t(unsigned long, read->rd_length,
>>> - (xdr->buf->buflen - xdr->buf->len));
>>> - count = maxcount;
>>> -
>>> - eof = read->rd_offset >= i_size_read(file_inode(file));
>>> - if (eof)
>>> + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
>>> + if (read->rd_eof)
>>> goto out;
>>>
>>> - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
>>> - is_data = pos > read->rd_offset;
>>> -
>>> - while (count > 0 && !eof) {
>>> - maxcount = count;
>>> - if (is_data)
>>> - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
>>> - segments == 0 ? &pos : NULL);
>>> - else
>>> - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
>>> - if (nfserr)
>>> - goto out;
>>> - count -= maxcount;
>>> - read->rd_offset += maxcount;
>>> - is_data = !is_data;
>>> - last_segment = xdr->buf->len;
>>> - segments++;
>>> - }
>>> -
>>> -out:
>>> - if (nfserr && segments == 0)
>>> + nfserr = nfsd4_encode_read_plus_data(resp, read);
>>> + if (nfserr) {
>>> xdr_truncate_encode(xdr, starting_len);
>>> - else {
>>> - if (nfserr) {
>>> - xdr_truncate_encode(xdr, last_segment);
>>> - nfserr = nfs_ok;
>>> - eof = 0;
>>> - }
>>> - tmp = htonl(eof);
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
>>> - tmp = htonl(segments);
>>> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
>>> + return nfserr;
>>> }
>>>
>>> + segments++;
>>> +
>>> +out:
>>> + p = xdr_encode_bool(p, read->rd_eof);
>>> + *p = cpu_to_be32(segments);
>>> return nfserr;
>>> }
>>>
>>> --
>>> 2.37.2
>>>
>>
>> --
>> Chuck Lever

--
Chuck Lever



2022-09-02 16:18:47

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 1/1] NFSD: Simplify READ_PLUS



> On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
>
> From: Anna Schumaker <[email protected]>
>
> Change the implementation to return a single DATA segment covering the
> requested read range.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> ---
> fs/nfsd/nfs4xdr.c | 122 ++++++++--------------------------------------
> 1 file changed, 20 insertions(+), 102 deletions(-)
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 1e9690a061ec..adbff7737c14 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4731,79 +4731,30 @@ nfsd4_encode_offload_status(struct nfsd4_compoundres *resp, __be32 nfserr,
>
> static __be32
> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> - struct nfsd4_read *read,
> - unsigned long *maxcount, u32 *eof,
> - loff_t *pos)
> + struct nfsd4_read *read)

Now eventually (but not right at the moment) I don't think we
can use the rd_length and rd_offset fields in @read directly
in this function ... won't those arguments be different for
each data segment? Not a problem yet, just thinking out loud.

I think using @read directly here makes it easy to re-use
nfsd4_encode_readv(), so I'm OK with this strategy for the
moment.


> {
> + unsigned long maxcount;

Nit: reverse christmas tree, please. The declaration of
maxcount goes on the line after "struct file *file ...".


> struct xdr_stream *xdr = resp->xdr;
> struct file *file = read->rd_nf->nf_file;
> - int starting_len = xdr->buf->len;
> - loff_t hole_pos;
> __be32 nfserr;
> - __be32 *p, tmp;
> - __be64 tmp64;
> -
> - hole_pos = pos ? *pos : vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> - if (hole_pos > read->rd_offset)
> - *maxcount = min_t(unsigned long, *maxcount, hole_pos - read->rd_offset);
> - *maxcount = min_t(unsigned long, *maxcount, (xdr->buf->buflen - xdr->buf->len));
> + __be32 *p;
>
> /* Content type, offset, byte count */
> p = xdr_reserve_space(xdr, 4 + 8 + 4);
> if (!p)
> return nfserr_resource;
>
> - read->rd_vlen = xdr_reserve_space_vec(xdr, resp->rqstp->rq_vec, *maxcount);
> - if (read->rd_vlen < 0)
> - return nfserr_resource;
> + maxcount = min_t(unsigned long, read->rd_length,
> + (xdr->buf->buflen - xdr->buf->len));
>
> - nfserr = nfsd_readv(resp->rqstp, read->rd_fhp, file, read->rd_offset,
> - resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> if (nfserr)
> return nfserr;

The only problem I see with this so far is that nfsd4_encode_readv()
can return nfserr_resource, which isn't allowed after NFSv4.0, and
is not listed in RFC 7862's table of allowed errors for READ_PLUS.

I see that READ_PLUS code remaining after this patch might make the
same mistake. I think this patch needs to correct that at least for
the code that isn't shared with READ. Consult RFC 7862 to figure out
the correct status codes to return. (I'll file a bug to audit the
other encoders and come up with a plan to ensure NFSD returns an
appropriate error code for the minorversion in use).


To help answer the "merge readiness" question and to know if we need
splice read support or not, can you run some performance comparison
tests with NFSv4.2 READ (with and without a splice-enabled filesystem)
and this READ_PLUS implementation? Nothing elaborate, let's just check
our assumptions.

I've also applied the 0/1 splice change here to do some testing to
see if I can work out what's giving xfstests heartburn.


> - xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
>
> - tmp = htonl(NFS4_CONTENT_DATA);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> - tmp64 = cpu_to_be64(read->rd_offset);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
> - tmp = htonl(*maxcount);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
> -
> - tmp = xdr_zero;
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
> - xdr_pad_size(*maxcount));
> - return nfs_ok;
> -}
> -
> -static __be32
> -nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp,
> - struct nfsd4_read *read,
> - unsigned long *maxcount, u32 *eof)
> -{
> - struct file *file = read->rd_nf->nf_file;
> - loff_t data_pos = vfs_llseek(file, read->rd_offset, SEEK_DATA);
> - loff_t f_size = i_size_read(file_inode(file));
> - unsigned long count;
> - __be32 *p;
> -
> - if (data_pos == -ENXIO)
> - data_pos = f_size;
> - else if (data_pos <= read->rd_offset || (data_pos < f_size && data_pos % PAGE_SIZE))
> - return nfsd4_encode_read_plus_data(resp, read, maxcount, eof, &f_size);
> - count = data_pos - read->rd_offset;
> -
> - /* Content type, offset, byte count */
> - p = xdr_reserve_space(resp->xdr, 4 + 8 + 8);
> - if (!p)
> - return nfserr_resource;
> -
> - *p++ = htonl(NFS4_CONTENT_HOLE);
> + *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
> p = xdr_encode_hyper(p, read->rd_offset);
> - p = xdr_encode_hyper(p, count);
> + *p = cpu_to_be32(read->rd_length);
>
> - *eof = (read->rd_offset + count) >= f_size;
> - *maxcount = min_t(unsigned long, count, *maxcount);
> return nfs_ok;
> }
>
> @@ -4811,20 +4762,14 @@ static __be32
> nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> struct nfsd4_read *read)
> {
> - unsigned long maxcount, count;
> struct xdr_stream *xdr = resp->xdr;
> - struct file *file;
> + struct file *file = read->rd_nf->nf_file;

Nit: Reverse christmas tree style means this one goes at the top.


> int starting_len = xdr->buf->len;
> - int last_segment = xdr->buf->len;
> int segments = 0;

I would also say that @segments should be u32, but that's not
relevant to this changeset. Optional if you want to add that
change.


> - __be32 *p, tmp;
> - bool is_data;
> - loff_t pos;
> - u32 eof;
> + __be32 *p;
>
> if (nfserr)
> return nfserr;
> - file = read->rd_nf->nf_file;
>
> /* eof flag, segment count */
> p = xdr_reserve_space(xdr, 4 + 4);
> @@ -4832,48 +4777,21 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
> return nfserr_resource;
> xdr_commit_encode(xdr);
>
> - maxcount = min_t(unsigned long, read->rd_length,
> - (xdr->buf->buflen - xdr->buf->len));
> - count = maxcount;
> -
> - eof = read->rd_offset >= i_size_read(file_inode(file));
> - if (eof)
> + read->rd_eof = read->rd_offset >= i_size_read(file_inode(file));
> + if (read->rd_eof)
> goto out;
>
> - pos = vfs_llseek(file, read->rd_offset, SEEK_HOLE);
> - is_data = pos > read->rd_offset;
> -
> - while (count > 0 && !eof) {
> - maxcount = count;
> - if (is_data)
> - nfserr = nfsd4_encode_read_plus_data(resp, read, &maxcount, &eof,
> - segments == 0 ? &pos : NULL);
> - else
> - nfserr = nfsd4_encode_read_plus_hole(resp, read, &maxcount, &eof);
> - if (nfserr)
> - goto out;
> - count -= maxcount;
> - read->rd_offset += maxcount;
> - is_data = !is_data;
> - last_segment = xdr->buf->len;
> - segments++;
> - }
> -
> -out:
> - if (nfserr && segments == 0)
> + nfserr = nfsd4_encode_read_plus_data(resp, read);
> + if (nfserr) {
> xdr_truncate_encode(xdr, starting_len);
> - else {
> - if (nfserr) {
> - xdr_truncate_encode(xdr, last_segment);
> - nfserr = nfs_ok;
> - eof = 0;
> - }
> - tmp = htonl(eof);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
> - tmp = htonl(segments);
> - write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
> + return nfserr;
> }
>
> + segments++;
> +
> +out:
> + p = xdr_encode_bool(p, read->rd_eof);
> + *p = cpu_to_be32(segments);
> return nfserr;
> }
>
> --
> 2.37.2
>

--
Chuck Lever



2022-09-03 17:37:39

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 0/1] NFSD: Simplify READ_PLUS



> On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
>
> Chuck, I tried to add in sparse read support by adding this extra
> change. Unfortunately it leads to a bunch of new failing xfstests. Do
> you have any thoughts about what might be going on? Is the patch okay
> without the splice support?
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index adbff7737c14..e21e6cfd1c6d 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -4733,6 +4733,7 @@ static __be32
> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> struct nfsd4_read *read)
> {
> + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
> unsigned long maxcount;
> struct xdr_stream *xdr = resp->xdr;
> struct file *file = read->rd_nf->nf_file;
> @@ -4747,7 +4748,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> maxcount = min_t(unsigned long, read->rd_length,
> (xdr->buf->buflen - xdr->buf->len));
>
> - nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> + if (file->f_op->splice_read && splice_ok)
> + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> + else
> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount)
> if (nfserr)
> return nfserr;

I applied the above change to a test server, and was able to reproduce
a bunch of new test failures when using NFSv4.2. I confirmed using nfsd
tracepoints that splice read and READ_PLUS is being used.

I then expanded the test. When using an XFS-based export, I reproduced
the failures. But I was not able to reproduce these failures with
exports based on tmpfs, btrfs, or ext4. Again, I confirmed using nfsd
tracepoints that splice read was being used, and mountstats on my
client showed READ_PLUS is being used.

Then I tried testing the XFS-backed export with NFSv4.1, and found
that most of the failures appeared again. Once again, I confirmed
using nfsd tracepoints that splice read is being used during the tests.

Can you confirm that you see test failures with NFSv4.1 and XFS but
not with NFSv4.2 / READ_PLUS with btrfs, ext4, or tmpfs?


--
Chuck Lever



2022-09-06 18:24:57

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH 0/1] NFSD: Simplify READ_PLUS

Hi Chuck,

On Sat, Sep 3, 2022 at 1:36 PM Chuck Lever III <[email protected]> wrote:
>
>
>
> > On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
> >
> > Chuck, I tried to add in sparse read support by adding this extra
> > change. Unfortunately it leads to a bunch of new failing xfstests. Do
> > you have any thoughts about what might be going on? Is the patch okay
> > without the splice support?
> >
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index adbff7737c14..e21e6cfd1c6d 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -4733,6 +4733,7 @@ static __be32
> > nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > struct nfsd4_read *read)
> > {
> > + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
> > unsigned long maxcount;
> > struct xdr_stream *xdr = resp->xdr;
> > struct file *file = read->rd_nf->nf_file;
> > @@ -4747,7 +4748,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
> > maxcount = min_t(unsigned long, read->rd_length,
> > (xdr->buf->buflen - xdr->buf->len));
> >
> > - nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
> > + if (file->f_op->splice_read && splice_ok)
> > + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
> > + else
> > + nfserr = nfsd4_encode_readv(resp, read, file, maxcount)
> > if (nfserr)
> > return nfserr;
>
> I applied the above change to a test server, and was able to reproduce
> a bunch of new test failures when using NFSv4.2. I confirmed using nfsd
> tracepoints that splice read and READ_PLUS is being used.
>
> I then expanded the test. When using an XFS-based export, I reproduced
> the failures. But I was not able to reproduce these failures with
> exports based on tmpfs, btrfs, or ext4. Again, I confirmed using nfsd
> tracepoints that splice read was being used, and mountstats on my
> client showed READ_PLUS is being used.
>
> Then I tried testing the XFS-backed export with NFSv4.1, and found
> that most of the failures appeared again. Once again, I confirmed
> using nfsd tracepoints that splice read is being used during the tests.
>
> Can you confirm that you see test failures with NFSv4.1 and XFS but
> not with NFSv4.2 / READ_PLUS with btrfs, ext4, or tmpfs?

I can confirm that I'm seeing the same failures with NFS v4.1 and xfs,
but not with v4.2 and ext4. I didn't test btrfs or tmpfs, since the
ext4 test passed.

Should I re-add the splice change for v2 of this patch, in addition to
addressing the other comments you had?

Anna

>
>
> --
> Chuck Lever
>
>
>

2022-09-06 18:26:00

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH 0/1] NFSD: Simplify READ_PLUS



> On Sep 6, 2022, at 2:17 PM, Anna Schumaker <[email protected]> wrote:
>
> Hi Chuck,
>
> On Sat, Sep 3, 2022 at 1:36 PM Chuck Lever III <[email protected]> wrote:
>>
>>
>>
>>> On Sep 1, 2022, at 2:33 PM, Anna Schumaker <[email protected]> wrote:
>>>
>>> Chuck, I tried to add in sparse read support by adding this extra
>>> change. Unfortunately it leads to a bunch of new failing xfstests. Do
>>> you have any thoughts about what might be going on? Is the patch okay
>>> without the splice support?
>>>
>>> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
>>> index adbff7737c14..e21e6cfd1c6d 100644
>>> --- a/fs/nfsd/nfs4xdr.c
>>> +++ b/fs/nfsd/nfs4xdr.c
>>> @@ -4733,6 +4733,7 @@ static __be32
>>> nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>>> struct nfsd4_read *read)
>>> {
>>> + bool splice_ok = test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags);
>>> unsigned long maxcount;
>>> struct xdr_stream *xdr = resp->xdr;
>>> struct file *file = read->rd_nf->nf_file;
>>> @@ -4747,7 +4748,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
>>> maxcount = min_t(unsigned long, read->rd_length,
>>> (xdr->buf->buflen - xdr->buf->len));
>>>
>>> - nfserr = nfsd4_encode_readv(resp, read, file, maxcount);
>>> + if (file->f_op->splice_read && splice_ok)
>>> + nfserr = nfsd4_encode_splice_read(resp, read, file, maxcount);
>>> + else
>>> + nfserr = nfsd4_encode_readv(resp, read, file, maxcount)
>>> if (nfserr)
>>> return nfserr;
>>
>> I applied the above change to a test server, and was able to reproduce
>> a bunch of new test failures when using NFSv4.2. I confirmed using nfsd
>> tracepoints that splice read and READ_PLUS is being used.
>>
>> I then expanded the test. When using an XFS-based export, I reproduced
>> the failures. But I was not able to reproduce these failures with
>> exports based on tmpfs, btrfs, or ext4. Again, I confirmed using nfsd
>> tracepoints that splice read was being used, and mountstats on my
>> client showed READ_PLUS is being used.
>>
>> Then I tried testing the XFS-backed export with NFSv4.1, and found
>> that most of the failures appeared again. Once again, I confirmed
>> using nfsd tracepoints that splice read is being used during the tests.
>>
>> Can you confirm that you see test failures with NFSv4.1 and XFS but
>> not with NFSv4.2 / READ_PLUS with btrfs, ext4, or tmpfs?
>
> I can confirm that I'm seeing the same failures with NFS v4.1 and xfs,
> but not with v4.2 and ext4. I didn't test btrfs or tmpfs, since the
> ext4 test passed.
>
> Should I re-add the splice change for v2 of this patch, in addition to
> addressing the other comments you had?

Yes.

Given the comment in nfsd4_read() I think READ_PLUS can't use splice
read for anything but the last data segment and only if this READ_PLUS
is the final operation in the COMPOUND. That will need some attention
at some later point.

Meanwhile I'm going to try to bisect the XFS READ failures right now.


--
Chuck Lever