2016-03-21 14:42:27

by Benjamin Coddington

[permalink] [raw]
Subject: [PATCH] nfsd: use short read rather than i_size to set eof

Use the result of a local read to determine when to set the eof flag. This
allows us to return the location of the end of the file atomically at the
time of the read.

Signed-off-by: Benjamin Coddington <[email protected]>
---
fs/nfsd/nfs3proc.c | 10 ++++------
fs/nfsd/nfs4xdr.c | 9 +++++----
2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7..6be6092 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
{
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned long cnt = min(argp->count, max_blocksize);

dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = min(argp->count, max_blocksize);
+ resp->count = cnt;
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);

fh_copy(&resp->fh, &argp->fh);
@@ -165,11 +166,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
argp->offset,
rqstp->rq_vec, argp->vlen,
&resp->count);
- if (nfserr == 0) {
- struct inode *inode = d_inode(resp->fh.fh_dentry);
-
- resp->eof = (argp->offset + resp->count) >= inode->i_size;
- }
+ if (nfserr == 0)
+ resp->eof = cnt > resp->count;

RETURN_STATUS(nfserr);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..2e14839 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
+ long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;

+ len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
read->rd_offset, &maxcount);
if (nfserr) {
@@ -3382,8 +3384,7 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}

- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = len > maxcount;

*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3453,14 +3454,14 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;

+ len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));

- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = len > maxcount;

tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
--
1.7.1



2016-03-21 21:36:55

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH] nfsd: use short read rather than i_size to set eof

On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> Use the result of a local read to determine when to set the eof flag. This
> allows us to return the location of the end of the file atomically at the
> time of the read.

My only question is whether we should instead do something like:

eof = (res > cnt) || (offset + cnt >= i_size)

That'd fix the reported bug without changing existing behavior
otherwise.

But maybe it's unlikely any client depends on existing behavior.

The only test failure I'm seeing is on pynfs WRT13, which literally just
checks that a 6-byte read of a 6-byte file returns with eof set. The
test is correct (the spec does clearly require eof to be set in that
case), but maybe it's not important.

--b.

>
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfsd/nfs3proc.c | 10 ++++------
> fs/nfsd/nfs4xdr.c | 9 +++++----
> 2 files changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 7b755b7..6be6092 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> {
> __be32 nfserr;
> u32 max_blocksize = svc_max_payload(rqstp);
> + unsigned long cnt = min(argp->count, max_blocksize);
>
> dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
> SVCFH_fmt(&argp->fh),
> @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
> * + 1 (xdr opaque byte count) = 26
> */
> - resp->count = min(argp->count, max_blocksize);
> + resp->count = cnt;
> svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
>
> fh_copy(&resp->fh, &argp->fh);
> @@ -165,11 +166,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> argp->offset,
> rqstp->rq_vec, argp->vlen,
> &resp->count);
> - if (nfserr == 0) {
> - struct inode *inode = d_inode(resp->fh.fh_dentry);
> -
> - resp->eof = (argp->offset + resp->count) >= inode->i_size;
> - }
> + if (nfserr == 0)
> + resp->eof = cnt > resp->count;
>
> RETURN_STATUS(nfserr);
> }
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index d6ef095..2e14839 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
> struct xdr_stream *xdr = &resp->xdr;
> struct xdr_buf *buf = xdr->buf;
> u32 eof;
> + long len;
> int space_left;
> __be32 nfserr;
> __be32 *p = xdr->p - 2;
> @@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
> if (xdr->end - xdr->p < 1)
> return nfserr_resource;
>
> + len = maxcount;
> nfserr = nfsd_splice_read(read->rd_rqstp, file,
> read->rd_offset, &maxcount);
> if (nfserr) {
> @@ -3382,8 +3384,7 @@ static __be32 nfsd4_encode_splice_read(
> return nfserr;
> }
>
> - eof = (read->rd_offset + maxcount >=
> - d_inode(read->rd_fhp->fh_dentry)->i_size);
> + eof = len > maxcount;
>
> *(p++) = htonl(eof);
> *(p++) = htonl(maxcount);
> @@ -3453,14 +3454,14 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
> }
> read->rd_vlen = v;
>
> + len = maxcount;
> nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
> read->rd_vlen, &maxcount);
> if (nfserr)
> return nfserr;
> xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>
> - eof = (read->rd_offset + maxcount >=
> - d_inode(read->rd_fhp->fh_dentry)->i_size);
> + eof = len > maxcount;
>
> tmp = htonl(eof);
> write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
> --
> 1.7.1

2016-03-22 14:28:38

by Benjamin Coddington

[permalink] [raw]
Subject: [PATCH v2] nfsd: use short read rather than i_size to set eof

On Mon, 21 Mar 2016, J. Bruce Fields wrote:
> On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> > Use the result of a local read to determine when to set the eof flag.
> > This
> > allows us to return the location of the end of the file atomically at
> > the
> > time of the read.
>
> My only question is whether we should instead do something like:
>
> eof = (res > cnt) || (offset + cnt >= i_size)

Yes, let's do that.

> That'd fix the reported bug without changing existing behavior
> otherwise.
>
> But maybe it's unlikely any client depends on existing behavior.
>
> The only test failure I'm seeing is on pynfs WRT13, which literally just
> checks that a 6-byte read of a 6-byte file returns with eof set. The
> test is correct (the spec does clearly require eof to be set in that
> case), but maybe it's not important.

The above change will fix this up and be correct in the absence of races
which saves the client from having to perform another full RPC just to
retrieve eof. This passes WRT13:

8<------------------------------------------------------------------------

Use the result of a local read to determine when to set the eof flag. This
allows us to return the location of the end of the file atomically at the
time of the read.

Signed-off-by: Benjamin Coddington <[email protected]>
---
fs/nfsd/nfs3proc.c | 7 ++++---
fs/nfsd/nfs4xdr.c | 11 +++++++----
2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7..83c9abb 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
{
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned long cnt = min(argp->count, max_blocksize);

dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = min(argp->count, max_blocksize);
+ resp->count = cnt;
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);

fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
-
- resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ resp->eof = (cnt > resp->count) ||
+ ((argp->offset + resp->count) >= inode->i_size);
}

RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..1d26b2b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
+ long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;

+ len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
read->rd_offset, &maxcount);
if (nfserr) {
@@ -3382,8 +3384,8 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}

- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = (len > maxcount) ||
+ ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));

*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3453,14 +3455,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;

+ len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));

- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = (len > maxcount) ||
+ ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));

tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
--
1.7.1


2016-03-22 16:46:24

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: use short read rather than i_size to set eof

On Tue, Mar 22, 2016 at 10:28:36AM -0400, Benjamin Coddington wrote:
> On Mon, 21 Mar 2016, J. Bruce Fields wrote:
> > On Mon, Mar 21, 2016 at 10:42:25AM -0400, Benjamin Coddington wrote:
> > > Use the result of a local read to determine when to set the eof flag.
> > > This
> > > allows us to return the location of the end of the file atomically at
> > > the
> > > time of the read.
> >
> > My only question is whether we should instead do something like:
> >
> > eof = (res > cnt) || (offset + cnt >= i_size)
>
> Yes, let's do that.
>
> > That'd fix the reported bug without changing existing behavior
> > otherwise.
> >
> > But maybe it's unlikely any client depends on existing behavior.
> >
> > The only test failure I'm seeing is on pynfs WRT13, which literally just
> > checks that a 6-byte read of a 6-byte file returns with eof set. The
> > test is correct (the spec does clearly require eof to be set in that
> > case), but maybe it's not important.
>
> The above change will fix this up and be correct in the absence of races
> which saves the client from having to perform another full RPC just to
> retrieve eof.

Seems likely to be rare (and possibly papered over by caching--does the
client even send a read if it would extend past the size returned from a
recent getattr?).

But, this does seem like the most conservative approach for now.
Applying.

Thanks!

--b.

> This passes WRT13:
>
> 8<------------------------------------------------------------------------
>
> Use the result of a local read to determine when to set the eof flag. This
> allows us to return the location of the end of the file atomically at the
> time of the read.
>
> Signed-off-by: Benjamin Coddington <[email protected]>
> ---
> fs/nfsd/nfs3proc.c | 7 ++++---
> fs/nfsd/nfs4xdr.c | 11 +++++++----
> 2 files changed, 11 insertions(+), 7 deletions(-)
>
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 7b755b7..83c9abb 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> {
> __be32 nfserr;
> u32 max_blocksize = svc_max_payload(rqstp);
> + unsigned long cnt = min(argp->count, max_blocksize);
>
> dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
> SVCFH_fmt(&argp->fh),
> @@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> * 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
> * + 1 (xdr opaque byte count) = 26
> */
> - resp->count = min(argp->count, max_blocksize);
> + resp->count = cnt;
> svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
>
> fh_copy(&resp->fh, &argp->fh);
> @@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> &resp->count);
> if (nfserr == 0) {
> struct inode *inode = d_inode(resp->fh.fh_dentry);
> -
> - resp->eof = (argp->offset + resp->count) >= inode->i_size;
> + resp->eof = (cnt > resp->count) ||
> + ((argp->offset + resp->count) >= inode->i_size);
> }
>
> RETURN_STATUS(nfserr);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index d6ef095..1d26b2b 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3362,6 +3362,7 @@ static __be32 nfsd4_encode_splice_read(
> struct xdr_stream *xdr = &resp->xdr;
> struct xdr_buf *buf = xdr->buf;
> u32 eof;
> + long len;
> int space_left;
> __be32 nfserr;
> __be32 *p = xdr->p - 2;
> @@ -3370,6 +3371,7 @@ static __be32 nfsd4_encode_splice_read(
> if (xdr->end - xdr->p < 1)
> return nfserr_resource;
>
> + len = maxcount;
> nfserr = nfsd_splice_read(read->rd_rqstp, file,
> read->rd_offset, &maxcount);
> if (nfserr) {
> @@ -3382,8 +3384,8 @@ static __be32 nfsd4_encode_splice_read(
> return nfserr;
> }
>
> - eof = (read->rd_offset + maxcount >=
> - d_inode(read->rd_fhp->fh_dentry)->i_size);
> + eof = (len > maxcount) ||
> + ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
>
> *(p++) = htonl(eof);
> *(p++) = htonl(maxcount);
> @@ -3453,14 +3455,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
> }
> read->rd_vlen = v;
>
> + len = maxcount;
> nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
> read->rd_vlen, &maxcount);
> if (nfserr)
> return nfserr;
> xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>
> - eof = (read->rd_offset + maxcount >=
> - d_inode(read->rd_fhp->fh_dentry)->i_size);
> + eof = (len > maxcount) ||
> + ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
>
> tmp = htonl(eof);
> write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
> --
> 1.7.1

2016-03-22 18:53:36

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: use short read rather than i_size to set eof

Possibly overkill, but I think I'll fold in something like this if you
don't see an objection.

--b.

commit 58e18a2a14a0
Author: J. Bruce Fields <[email protected]>
Date: Tue Mar 22 14:08:11 2016 -0400

nfsd: document read eof logic

The choice of checks here is a little subtle, let's document this for
posterity.

Signed-off-by: J. Bruce Fields <[email protected]>

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 83c9abb33e8b..df0f0a86f21d 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
- resp->eof = (cnt > resp->count) ||
- ((argp->offset + resp->count) >= inode->i_size);
+ resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
}

RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 90232bd7e498..9df898ba648f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}

- eof = (len > maxcount) ||
- ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);

*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));

- eof = (len > maxcount) ||
- ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);

tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba316f23f..6244e073c137 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}

+static inline bool nfsd_eof_on_read(long requested, long read,
+ loff_t offset, loff_t size)
+{
+ /* We assume a short read means eof: */
+ if (requested > read)
+ return true;
+ /*
+ * A non-short read might also reach end of file. The spec
+ * still requires us to set eof in that case.
+ *
+ * Further operations may have modified the file size since
+ * the read, so the following check is not atomic with the read.
+ * The only case we've seen that cause a problem for a client
+ * is the case where the read returned a count of 0 without
+ * setting eof. That case was fixed by the addition of the
+ * above check.
+ */
+ return (offset + read >= size);
+}
+
#endif /* LINUX_NFSD_VFS_H */

2016-03-22 20:51:51

by Benjamin Coddington

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: use short read rather than i_size to set eof

On Tue, 22 Mar 2016, J. Bruce Fields wrote:

> Possibly overkill, but I think I'll fold in something like this if you
> don't see an objection.

No objection. To answer your earlier question (does the client even send a
read if it would extend past the size returned from a recent getattr?):

Looks to me (by testing with a server that never sets eof) the answer is no.
The client won't issue reads beyond the end of the file.

Also the previous patch's subject is now a little misleading. Maybe
s/rather than/as well as/ would do.

Ben

> commit 58e18a2a14a0
> Author: J. Bruce Fields <[email protected]>
> Date: Tue Mar 22 14:08:11 2016 -0400
>
> nfsd: document read eof logic
>
> The choice of checks here is a little subtle, let's document this for
> posterity.
>
> Signed-off-by: J. Bruce Fields <[email protected]>
>
> diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> index 83c9abb33e8b..df0f0a86f21d 100644
> --- a/fs/nfsd/nfs3proc.c
> +++ b/fs/nfsd/nfs3proc.c
> @@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> &resp->count);
> if (nfserr == 0) {
> struct inode *inode = d_inode(resp->fh.fh_dentry);
> - resp->eof = (cnt > resp->count) ||
> - ((argp->offset + resp->count) >= inode->i_size);
> + resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
> }
>
> RETURN_STATUS(nfserr);
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 90232bd7e498..9df898ba648f 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
> return nfserr;
> }
>
> - eof = (len > maxcount) ||
> - ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> + d_inode(read->rd_fhp->fh_dentry)->i_size);
>
> *(p++) = htonl(eof);
> *(p++) = htonl(maxcount);
> @@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
> return nfserr;
> xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
>
> - eof = (len > maxcount) ||
> - ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> + d_inode(read->rd_fhp->fh_dentry)->i_size);
>
> tmp = htonl(eof);
> write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index c11ba316f23f..6244e073c137 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
> || createmode == NFS4_CREATE_EXCLUSIVE4_1;
> }
>
> +static inline bool nfsd_eof_on_read(long requested, long read,
> + loff_t offset, loff_t size)
> +{
> + /* We assume a short read means eof: */
> + if (requested > read)
> + return true;
> + /*
> + * A non-short read might also reach end of file. The spec
> + * still requires us to set eof in that case.
> + *
> + * Further operations may have modified the file size since
> + * the read, so the following check is not atomic with the read.
> + * The only case we've seen that cause a problem for a client
> + * is the case where the read returned a count of 0 without
> + * setting eof. That case was fixed by the addition of the
> + * above check.
> + */
> + return (offset + read >= size);
> +}
> +
> #endif /* LINUX_NFSD_VFS_H */
>

2016-03-22 21:22:11

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v2] nfsd: use short read rather than i_size to set eof

On Tue, Mar 22, 2016 at 04:51:47PM -0400, Benjamin Coddington wrote:
> On Tue, 22 Mar 2016, J. Bruce Fields wrote:
>
> > Possibly overkill, but I think I'll fold in something like this if you
> > don't see an objection.
>
> No objection. To answer your earlier question (does the client even send a
> read if it would extend past the size returned from a recent getattr?):
>
> Looks to me (by testing with a server that never sets eof) the answer is no.
> The client won't issue reads beyond the end of the file.
>
> Also the previous patch's subject is now a little misleading. Maybe
> s/rather than/as well as/ would do.

OK.--b.

>
> Ben
>
> > commit 58e18a2a14a0
> > Author: J. Bruce Fields <[email protected]>
> > Date: Tue Mar 22 14:08:11 2016 -0400
> >
> > nfsd: document read eof logic
> >
> > The choice of checks here is a little subtle, let's document this for
> > posterity.
> >
> > Signed-off-by: J. Bruce Fields <[email protected]>
> >
> > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
> > index 83c9abb33e8b..df0f0a86f21d 100644
> > --- a/fs/nfsd/nfs3proc.c
> > +++ b/fs/nfsd/nfs3proc.c
> > @@ -168,8 +168,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
> > &resp->count);
> > if (nfserr == 0) {
> > struct inode *inode = d_inode(resp->fh.fh_dentry);
> > - resp->eof = (cnt > resp->count) ||
> > - ((argp->offset + resp->count) >= inode->i_size);
> > + resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset, inode->i_size);
> > }
> >
> > RETURN_STATUS(nfserr);
> > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> > index 90232bd7e498..9df898ba648f 100644
> > --- a/fs/nfsd/nfs4xdr.c
> > +++ b/fs/nfsd/nfs4xdr.c
> > @@ -3387,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
> > return nfserr;
> > }
> >
> > - eof = (len > maxcount) ||
> > - ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> > + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> > + d_inode(read->rd_fhp->fh_dentry)->i_size);
> >
> > *(p++) = htonl(eof);
> > *(p++) = htonl(maxcount);
> > @@ -3465,8 +3465,8 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
> > return nfserr;
> > xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
> >
> > - eof = (len > maxcount) ||
> > - ((read->rd_offset + maxcount >= d_inode(read->rd_fhp->fh_dentry)->i_size));
> > + eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
> > + d_inode(read->rd_fhp->fh_dentry)->i_size);
> >
> > tmp = htonl(eof);
> > write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
> > diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> > index c11ba316f23f..6244e073c137 100644
> > --- a/fs/nfsd/vfs.h
> > +++ b/fs/nfsd/vfs.h
> > @@ -139,4 +139,24 @@ static inline int nfsd_create_is_exclusive(int createmode)
> > || createmode == NFS4_CREATE_EXCLUSIVE4_1;
> > }
> >
> > +static inline bool nfsd_eof_on_read(long requested, long read,
> > + loff_t offset, loff_t size)
> > +{
> > + /* We assume a short read means eof: */
> > + if (requested > read)
> > + return true;
> > + /*
> > + * A non-short read might also reach end of file. The spec
> > + * still requires us to set eof in that case.
> > + *
> > + * Further operations may have modified the file size since
> > + * the read, so the following check is not atomic with the read.
> > + * The only case we've seen that cause a problem for a client
> > + * is the case where the read returned a count of 0 without
> > + * setting eof. That case was fixed by the addition of the
> > + * above check.
> > + */
> > + return (offset + read >= size);
> > +}
> > +
> > #endif /* LINUX_NFSD_VFS_H */
> >