2014-12-17 22:35:08

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 0/4] NFS: Add READ_PLUS support

From: Anna Schumaker <[email protected]>

These patches add server support for the NFS v4.2 operation READ_PLUS.

I noticed a race condition in the 3rd patch when I start needing vfs_llseek()
to determine if I should encode the next segment as either data or a hole. It
is possible that the file could change on us between each of the seek calls,
so we don't know if we are actually at a hole or data segment. I don't want to
add new locks to the NFS server for this case, so instead I've decided to
encode any "quantum data" segments as if they were actually data.

I tested these patches using xfstests, specificially generic/075, generic/091,
generic/112, generic/127, generic/210, and generic/263. Additionally, three
new tests are run once READ_PLUS support has been added: generic/213,
generic/214, and generic/228.

These patches and the corresponding client changes are available in the
[read_plus] branch of

git://git.linux-nfs.org/projects/anna/linux-nfs.git

Questions? Comments? Thoughts?

Anna


Anna Schumaker (4):
NFSD: nfsd4_encode_read() should encode eof and maxcount
NFSD: Add READ_PLUS support for data segments
NFSD: Add support for encoding holes in files
NFSD: Add support for encoding multiple segments

fs/nfsd/nfs4proc.c | 16 +++++
fs/nfsd/nfs4xdr.c | 174 +++++++++++++++++++++++++++++++++++++++++------------
2 files changed, 152 insertions(+), 38 deletions(-)

--
2.1.3



2014-12-17 22:35:16

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 1/4] NFSD: nfsd4_encode_read() should encode eof and maxcount

From: Anna Schumaker <[email protected]>

I intend to reuse nfsd4_encode_readv() for READ_PLUS, so I need an
alternate way to encode these values. I think it makes sense for
nfsd4_encode_read() to handle this in a single place for both READ
paths.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4xdr.c | 59 ++++++++++++++++++++++---------------------------------
1 file changed, 23 insertions(+), 36 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0622d4f..1943cdb 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3119,21 +3119,19 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
static __be32 nfsd4_encode_splice_read(
struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
- struct file *file, unsigned long maxcount)
+ struct file *file, unsigned long *maxcount)
{
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
- u32 eof;
int space_left;
__be32 nfserr;
- __be32 *p = xdr->p - 2;

/* Make sure there will be room for padding if needed */
if (xdr->end - xdr->p < 1)
return nfserr_resource;

nfserr = nfsd_splice_read(read->rd_rqstp, file,
- read->rd_offset, &maxcount);
+ read->rd_offset, maxcount);
if (nfserr) {
/*
* nfsd_splice_actor may have already messed with the
@@ -3144,27 +3142,21 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}

- eof = (read->rd_offset + maxcount >=
- read->rd_fhp->fh_dentry->d_inode->i_size);
-
- *(p++) = htonl(eof);
- *(p++) = htonl(maxcount);
-
- buf->page_len = maxcount;
- buf->len += maxcount;
- xdr->page_ptr += (buf->page_base + maxcount + PAGE_SIZE - 1)
+ buf->page_len = *maxcount;
+ buf->len += *maxcount;
+ xdr->page_ptr += (buf->page_base + *maxcount + PAGE_SIZE - 1)
/ PAGE_SIZE;

/* Use rest of head for padding and remaining ops: */
buf->tail[0].iov_base = xdr->p;
buf->tail[0].iov_len = 0;
xdr->iov = buf->tail;
- if (maxcount&3) {
- int pad = 4 - (maxcount&3);
+ if (*maxcount&3) {
+ int pad = 4 - (*maxcount&3);

*(xdr->p++) = 0;

- buf->tail[0].iov_base += maxcount&3;
+ buf->tail[0].iov_base += *maxcount&3;
buf->tail[0].iov_len = pad;
buf->len += pad;
}
@@ -3179,21 +3171,19 @@ static __be32 nfsd4_encode_splice_read(

static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
struct nfsd4_read *read,
- struct file *file, unsigned long maxcount)
+ struct file *file, unsigned long *maxcount)
{
struct xdr_stream *xdr = &resp->xdr;
- u32 eof;
int v;
- int starting_len = xdr->buf->len - 8;
+ int starting_len = xdr->buf->len;
long len;
int thislen;
__be32 nfserr;
- __be32 tmp;
__be32 *p;
u32 zzz = 0;
int pad;

- len = maxcount;
+ len = *maxcount;
v = 0;

thislen = min_t(long, len, ((void *)xdr->end - (void *)xdr->p));
@@ -3216,22 +3206,13 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
read->rd_vlen = v;

nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
- read->rd_vlen, &maxcount);
+ read->rd_vlen, maxcount);
if (nfserr)
return nfserr;
- xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
+ xdr_truncate_encode(xdr, starting_len + ((*maxcount+3)&~3));

- eof = (read->rd_offset + maxcount >=
- read->rd_fhp->fh_dentry->d_inode->i_size);
-
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
- tmp = htonl(maxcount);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
-
- pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
- &zzz, pad);
+ pad = (*maxcount&3) ? 4 - (*maxcount&3) : 0;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + *maxcount, &zzz, pad);
return 0;

}
@@ -3247,6 +3228,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
struct raparms *ra;
__be32 *p;
__be32 err;
+ u32 eof;

if (nfserr)
return nfserr;
@@ -3274,9 +3256,14 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
}

if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
- err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ err = nfsd4_encode_splice_read(resp, read, file, &maxcount);
else
- err = nfsd4_encode_readv(resp, read, file, maxcount);
+ err = nfsd4_encode_readv(resp, read, file, &maxcount);
+
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
+ *p++ = cpu_to_be32(eof);
+ *p++ = cpu_to_be32(maxcount);

if (!read->rd_filp)
nfsd_put_tmp_read_open(file, ra);
--
2.1.3


2014-12-17 22:35:16

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 2/4] NFSD: Add READ_PLUS support for data segments

From: Anna Schumaker <[email protected]>

This patch adds READ_PLUS support by encoding file contents ase one
single data segment, matching the behavior of the READ operation.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4proc.c | 16 +++++++++++
fs/nfsd/nfs4xdr.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 93 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74fb15e..5f112b8 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1603,6 +1603,16 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}

+static inline u32 nfsd4_read_plus_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = min(op->u.read.rd_length, maxcount);
+
+ return (op_encode_hdr_size + 2 + 5 + XDR_QUADLEN(maxcount)) * sizeof(__be32);
+}
+
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
u32 maxcount = 0, rlen = 0;
@@ -1980,6 +1990,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
},
+ [OP_READ_PLUS] = {
+ .op_func = (nfsd4op_func)nfsd4_read,
+ .op_name = "OP_READ_PLUS",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_plus_rsize,
+ .op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1943cdb..6ed22ca 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1630,7 +1630,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_CANCEL] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_OFFLOAD_STATUS] = (nfsd4_dec)nfsd4_decode_notsupp,
- [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_READ_PLUS] = (nfsd4_dec)nfsd4_decode_read,
[OP_SEEK] = (nfsd4_dec)nfsd4_decode_seek,
[OP_WRITE_SAME] = (nfsd4_dec)nfsd4_decode_notsupp,
};
@@ -3799,6 +3799,81 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
}

static __be32
+nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
+ struct file *file)
+{
+ __be32 *p, err;
+ unsigned long maxcount;
+ struct xdr_stream *xdr = &resp->xdr;
+
+ p = xdr_reserve_space(xdr, 4 + 8 + 4); /* content type, offset, maxcount */
+ if (!p)
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+ maxcount = svc_max_payload(resp->rqstp);
+ maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
+ maxcount = min_t(unsigned long, maxcount, read->rd_length);
+
+ err = nfsd4_encode_readv(resp, read, file, &maxcount);
+
+ *p++ = cpu_to_be32(NFS4_CONTENT_DATA);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ *p++ = cpu_to_be32(maxcount);
+
+ read->rd_offset += maxcount;
+ return err;
+}
+
+static __be32
+nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct file *file = read->rd_filp;
+ int starting_len = xdr->buf->len;
+ struct raparms *ra;
+ __be32 *p;
+ __be32 err;
+ u32 eof, segments = 0;
+
+ if (nfserr)
+ return nfserr;
+
+ /* eof flag, segment count */
+ p = xdr_reserve_space(xdr, 4 + 4 );
+ if (!p)
+ return nfserr_resource;
+ xdr_commit_encode(xdr);
+
+ if (!read->rd_filp) {
+ err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
+ &file, &ra);
+ if (err)
+ goto err_truncate;
+ }
+
+ if (read->rd_offset >= i_size_read(file_inode(file)))
+ goto out_encode;
+
+ err = nfsd4_encode_read_plus_data(resp, read, file);
+ segments++;
+
+out_encode:
+ eof = (read->rd_offset >= i_size_read(file_inode(file)));
+ *p++ = cpu_to_be32(eof);
+ *p++ = cpu_to_be32(segments);
+
+ if (!read->rd_filp)
+ nfsd_put_tmp_read_open(file, ra);
+
+err_truncate:
+ if (err)
+ xdr_truncate_encode(xdr, starting_len);
+ return err;
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -3897,7 +3972,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
[OP_LAYOUTSTATS] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_CANCEL] = (nfsd4_enc)nfsd4_encode_noop,
[OP_OFFLOAD_STATUS] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_READ_PLUS] = (nfsd4_enc)nfsd4_encode_read_plus,
[OP_SEEK] = (nfsd4_enc)nfsd4_encode_seek,
[OP_WRITE_SAME] = (nfsd4_enc)nfsd4_encode_noop,
};
--
2.1.3


2014-12-17 22:35:15

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 3/4] NFSD: Add support for encoding holes in files

From: Anna Schumaker <[email protected]>

This patch adds support for returning NFS4_CONTENT_HOLE segments to the
client. I did notice a race condition where the same offset can be
reported as both a hole and as data, and this patch gets around this
without a lock by encoding questionable regions as data.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4xdr.c | 34 +++++++++++++++++++++++++++++++---
1 file changed, 31 insertions(+), 3 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 6ed22ca..2ec9860 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3800,7 +3800,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,

static __be32
nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
- struct file *file)
+ struct file *file, loff_t hole_pos)
{
__be32 *p, err;
unsigned long maxcount;
@@ -3814,6 +3814,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *r
maxcount = svc_max_payload(resp->rqstp);
maxcount = min_t(unsigned long, maxcount, (xdr->buf->buflen - xdr->buf->len));
maxcount = min_t(unsigned long, maxcount, read->rd_length);
+ maxcount = min_t(unsigned long, maxcount, hole_pos - read->rd_offset);

err = nfsd4_encode_readv(resp, read, file, &maxcount);

@@ -3826,6 +3827,22 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *r
}

static __be32
+nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *read,
+ struct file *file, loff_t data_pos)
+{
+ __be32 *p;
+ unsigned long maxcount = data_pos - read->rd_offset;
+
+ p = xdr_reserve_space(&resp->xdr, 4 + 8 + 8);
+ *p++ = cpu_to_be32(NFS4_CONTENT_HOLE);
+ p = xdr_encode_hyper(p, read->rd_offset);
+ p = xdr_encode_hyper(p, maxcount);
+
+ read->rd_offset += maxcount;
+ return nfs_ok;
+}
+
+static __be32
nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_read *read)
{
@@ -3833,6 +3850,7 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
struct file *file = read->rd_filp;
int starting_len = xdr->buf->len;
struct raparms *ra;
+ loff_t hole_pos, data_pos;
__be32 *p;
__be32 err;
u32 eof, segments = 0;
@@ -3853,10 +3871,20 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
goto err_truncate;
}

- if (read->rd_offset >= i_size_read(file_inode(file)))
+ hole_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_HOLE);
+ if (hole_pos == -ENXIO)
goto out_encode;

- err = nfsd4_encode_read_plus_data(resp, read, file);
+ data_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_DATA);
+ if (data_pos == -ENXIO)
+ data_pos = i_size_read(file_inode(file));
+
+ if ((data_pos == read->rd_offset) && (hole_pos > data_pos))
+ err = nfsd4_encode_read_plus_data(resp, read, file, hole_pos);
+ else if ((hole_pos == read->rd_offset) && (data_pos > hole_pos))
+ err = nfsd4_encode_read_plus_hole(resp, read, file, data_pos);
+ else /* The file probably changed on us between seeks. */
+ err = nfsd4_encode_read_plus_data(resp, read, file, i_size_read(file_inode(file)));
segments++;

out_encode:
--
2.1.3


2014-12-17 22:35:11

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH 4/4] NFSD: Add support for encoding multiple segments

From: Anna Schumaker <[email protected]>

This patch lets us send an array of segments back to the client. In
theory this will make v4.2 reads more efficient, but only if the client
is equipped to handle multiple segment reads.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4xdr.c | 36 ++++++++++++++++++++++--------------
1 file changed, 22 insertions(+), 14 deletions(-)

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 2ec9860..b678016 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3823,6 +3823,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp, struct nfsd4_read *r
*p++ = cpu_to_be32(maxcount);

read->rd_offset += maxcount;
+ read->rd_length -= maxcount;
return err;
}

@@ -3839,6 +3840,10 @@ nfsd4_encode_read_plus_hole(struct nfsd4_compoundres *resp, struct nfsd4_read *r
p = xdr_encode_hyper(p, maxcount);

read->rd_offset += maxcount;
+ if (maxcount > read->rd_length)
+ read->rd_length = 0;
+ else
+ read->rd_length -= maxcount;
return nfs_ok;
}

@@ -3871,23 +3876,26 @@ nfsd4_encode_read_plus(struct nfsd4_compoundres *resp, __be32 nfserr,
goto err_truncate;
}

- hole_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_HOLE);
- if (hole_pos == -ENXIO)
- goto out_encode;
+ do {
+ hole_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_HOLE);
+ if (hole_pos == -ENXIO)
+ break;

- data_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_DATA);
- if (data_pos == -ENXIO)
- data_pos = i_size_read(file_inode(file));
+ data_pos = vfs_llseek(read->rd_filp, read->rd_offset, SEEK_DATA);
+ if (data_pos == -ENXIO)
+ data_pos = i_size_read(file_inode(file));

- if ((data_pos == read->rd_offset) && (hole_pos > data_pos))
- err = nfsd4_encode_read_plus_data(resp, read, file, hole_pos);
- else if ((hole_pos == read->rd_offset) && (data_pos > hole_pos))
- err = nfsd4_encode_read_plus_hole(resp, read, file, data_pos);
- else /* The file probably changed on us between seeks. */
- err = nfsd4_encode_read_plus_data(resp, read, file, i_size_read(file_inode(file)));
- segments++;
+ if ((data_pos == read->rd_offset) && (hole_pos > data_pos))
+ err = nfsd4_encode_read_plus_data(resp, read, file, hole_pos);
+ else if ((hole_pos == read->rd_offset) && (data_pos > hole_pos))
+ err = nfsd4_encode_read_plus_hole(resp, read, file, data_pos);
+ else /* The file probably changed on us between seeks. */
+ err = nfsd4_encode_read_plus_data(resp, read, file, i_size_read(file_inode(file)));
+ if (err)
+ break;
+ segments++;
+ } while (read->rd_length > 0);

-out_encode:
eof = (read->rd_offset >= i_size_read(file_inode(file)));
*p++ = cpu_to_be32(eof);
*p++ = cpu_to_be32(segments);
--
2.1.3