2015-12-03 20:55:42

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 0/4] NFSv4.2: Add support for the COPY operation

These patches add client and server support for the NFS v4.2 COPY operation,
and depend on the new copy_file_range() system call currently scheduled for
Linux 4.5. I gathered performance information by comparing the runtime and RPC
count of my test program, included as an RFC patch, against /usr/bin/cp for
various file sizes.

/usr/bin/cp:
size: 513MB 1024MB 1536MB 2048MB
------------- ------------- -------- -------- -------- --------
nfs v4 client total: 8203 16396 24588 32780
------------- ------------- -------- -------- -------- --------
nfs v4 client read: 4096 8192 12288 16384
nfs v4 client write: 4096 8192 12288 16384
nfs v4 client commit: 1 1 1 1
nfs v4 client open: 1 1 1 1
nfs v4 client open_noat: 2 2 2 2
nfs v4 client close: 1 1 1 1
nfs v4 client setattr: 2 2 2 2
nfs v4 client access: 2 3 3 3
nfs v4 client getattr: 2 2 2 2

/usr/bin/cp /nfs/test-512 /nfs/test-copy 0.00s user 0.32s system 14% cpu 2.209 total
/usr/bin/cp /nfs/test-1024 /nfs/test-copy 0.00s user 0.66s system 18% cpu 3.651 total
/usr/bin/cp /nfs/test-1536 /nfs/test-copy 0.02s user 0.97s system 18% cpu 5.477 total
/usr/bin/cp /nfs/test-2048 /nfs/test-copy 0.00s user 1.38s system 15% cpu 9.085 total


Copy system call:
size: 512MB 1024MB 1536MB 2048MB
------------- ------------- -------- -------- -------- --------
nfs v4 client total: 6 6 6 6
------------- ------------- -------- -------- -------- --------
nfs v4 client open: 2 2 2 2
nfs v4 client close: 2 2 2 2
nfs v4 client access: 1 1 1 1
nfs v4 client copy: 1 1 1 1


./nfscopy /nfs/test-512 /nfs/test-copy 0.00s user 0.00s system 0% cpu 1.148 total
./nfscopy /nfs/test-1024 /nfs/test-copy 0.00s user 0.00s system 0% cpu 2.293 total
./nfscopy /nfs/test-1536 /nfs/test-copy 0.00s user 0.00s system 0% cpu 3.037 total
./nfscopy /nfs/test-2048 /nfs/test-copy 0.00s user 0.00s system 0% cpu 4.045 total


Questions, comments, and other testing ideas would be greatly appreciated!

Thanks,
Anna


Anna Schumaker (4):
NFSD: Pass filehandle to nfs4_preprocess_stateid_op()
NFSD: Implement the COPY call
NFS: Add COPY nfs operation
vfs_copy_range() test program

fs/nfs/nfs42.h | 1 +
fs/nfs/nfs42proc.c | 58 ++++++++++++++++++++
fs/nfs/nfs42xdr.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4file.c | 8 +++
fs/nfs/nfs4proc.c | 1 +
fs/nfs/nfs4xdr.c | 1 +
fs/nfs/objlayout/Makefile | 0
fs/nfsd/nfs4proc.c | 91 ++++++++++++++++++++++++++++---
fs/nfsd/nfs4state.c | 5 +-
fs/nfsd/nfs4xdr.c | 62 ++++++++++++++++++++-
fs/nfsd/state.h | 4 +-
fs/nfsd/vfs.c | 17 ++++++
fs/nfsd/vfs.h | 1 +
fs/nfsd/xdr4.h | 23 ++++++++
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 27 +++++++++
nfscopy.c | 59 ++++++++++++++++++++
18 files changed, 482 insertions(+), 14 deletions(-)
create mode 100644 fs/nfs/objlayout/Makefile
create mode 100644 nfscopy.c

--
2.6.3



2015-12-03 20:55:44

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 2/3] NFSD: Implement the COPY call

From: Anna Schumaker <[email protected]>

I only implemented the sync version of this call, since it's the
easiest. I can simply call vfs_copy_range() and have the vfs do the
right thing for the filesystem being exported.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4proc.c | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4xdr.c | 62 ++++++++++++++++++++++++++++++++++++++++++--
fs/nfsd/vfs.c | 17 +++++++++++++
fs/nfsd/vfs.h | 1 +
fs/nfsd/xdr4.h | 23 +++++++++++++++++
5 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3ba10a3..e0b1f43 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1012,6 +1012,63 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}

static __be32
+nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy, struct file **src, struct file **dst)
+{
+ __be32 status;
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
+ &copy->cp_src_stateid, RD_STATE,
+ src, NULL);
+ if (status) {
+ dprintk("NFSD: nfsd4_copy: couldn't process src stateid!\n");
+ return status;
+ }
+
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &copy->cp_dst_stateid, WR_STATE,
+ dst, NULL);
+ if (status) {
+ dprintk("NFSD: nfsd4_copy: couldn't process dst stateid!\n");
+ fput(*src);
+ }
+
+ return status;
+}
+
+static __be32
+nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ ssize_t bytes;
+ __be32 status;
+ struct file *src = NULL, *dst = NULL;
+
+ status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst);
+ if (status)
+ return status;
+
+ bytes = nfsd_copy_range(src, copy->cp_src_pos,
+ dst, copy->cp_dst_pos,
+ copy->cp_count);
+
+ if (bytes < 0)
+ status = nfserrno(bytes);
+ else {
+ copy->cp_res.wr_bytes_written = bytes;
+ copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
+ copy->cp_consecutive = 1;
+ copy->cp_synchronous = 1;
+ gen_boot_verifier(&copy->cp_res.wr_verifier, SVC_NET(rqstp));
+ status = nfs_ok;
+ }
+
+ fput(src);
+ fput(dst);
+ return status;
+}
+
+static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
@@ -1925,6 +1982,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
op_encode_channel_attrs_maxsz) * sizeof(__be32);
}

+static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 1 /* wr_callback */ +
+ op_encode_stateid_maxsz /* wr_callback */ +
+ 2 /* wr_count */ +
+ 1 /* wr_committed */ +
+ op_encode_verifier_maxsz +
+ 1 /* cr_consecutive */ +
+ 1 /* cr_synchronous */) * sizeof(__be32);
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* At this stage we don't really know what layout driver will handle the request,
@@ -2281,6 +2350,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_DEALLOCATE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
+ [OP_COPY] = {
+ .op_func = (nfsd4op_func)nfsd4_copy,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_COPY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 51c9e9c..d86a3c7 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1675,6 +1675,30 @@ nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
}

static __be32
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+{
+ DECODE_HEAD;
+ unsigned int tmp;
+
+ status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid(argp, &copy->cp_dst_stateid);
+ if (status)
+ return status;
+
+ READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
+ p = xdr_decode_hyper(p, &copy->cp_src_pos);
+ p = xdr_decode_hyper(p, &copy->cp_dst_pos);
+ p = xdr_decode_hyper(p, &copy->cp_count);
+ copy->cp_consecutive = be32_to_cpup(p++);
+ copy->cp_synchronous = be32_to_cpup(p++);
+ tmp = be32_to_cpup(p); /* Source server list not supported */
+
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1774,7 +1798,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {

/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4183,6 +4207,40 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
#endif /* CONFIG_NFSD_PNFS */

static __be32
+nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(0);
+ p = xdr_encode_hyper(p, write->wr_bytes_written);
+ *p++ = cpu_to_be32(write->wr_stable_how);
+ p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, NFS4_VERIFIER_SIZE);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_copy *copy)
+{
+ __be32 *p, err;
+
+ if (!nfserr) {
+ err = nfsd42_encode_write_res(resp, &copy->cp_res);
+ if (err)
+ return err;
+
+ p = xdr_reserve_space(&resp->xdr, 4 + 4);
+ *p++ = cpu_to_be32(copy->cp_consecutive);
+ *p++ = cpu_to_be32(copy->cp_synchronous);
+ }
+ return nfserr;
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -4281,7 +4339,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {

/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 994d66f..225ff12 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -36,6 +36,7 @@
#endif /* CONFIG_NFSD_V3 */

#ifdef CONFIG_NFSD_V4
+#include "../internal.h"
#include "acl.h"
#include "idmap.h"
#endif /* CONFIG_NFSD_V4 */
@@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
}
#endif

+ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
+ struct file *dst, u64 dst_pos,
+ u64 count)
+{
+ ssize_t bytes;
+ u64 limit = 0x10000000;
+
+ if (count > limit)
+ count = limit;
+
+ bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+ if (bytes > 0)
+ vfs_fsync_range(dst, dst_pos, dst_pos + bytes, 0);
+ return bytes;
+}
+
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fcfc48c..0243c5a 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -91,6 +91,7 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
struct svc_fh *res);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
+ssize_t nfsd_copy_range(struct file *, u64, struct file *, u64, u64);
__be32 nfsd_rename(struct svc_rqst *,
struct svc_fh *, char *, int,
struct svc_fh *, char *, int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index ce7362c..7ef82c9 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -491,6 +491,28 @@ struct nfsd4_fallocate {
u64 falloc_length;
};

+struct nfsd42_write_res {
+ u64 wr_bytes_written;
+ u32 wr_stable_how;
+ nfs4_verifier wr_verifier;
+};
+
+struct nfsd4_copy {
+ /* request */
+ stateid_t cp_src_stateid;
+ stateid_t cp_dst_stateid;
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+
+ /* both */
+ bool cp_consecutive;
+ bool cp_synchronous;
+
+ /* response */
+ struct nfsd42_write_res cp_res;
+};
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -555,6 +577,7 @@ struct nfsd4_op {
/* NFSv4.2 */
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
+ struct nfsd4_copy copy;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
--
2.6.3


2015-12-03 20:55:43

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 1/3] NFSD: Pass filehandle to nfs4_preprocess_stateid_op()

This will be needed so COPY can look up the saved_fh in addition to the
current_fh.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfsd/nfs4proc.c | 16 +++++++++-------
fs/nfsd/nfs4state.c | 5 ++---
fs/nfsd/state.h | 4 ++--
3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index a9f096c..3ba10a3 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -774,8 +774,9 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);

/* check stateid */
- status = nfs4_preprocess_stateid_op(rqstp, cstate, &read->rd_stateid,
- RD_STATE, &read->rd_filp, &read->rd_tmp_file);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ &read->rd_stateid, RD_STATE,
+ &read->rd_filp, &read->rd_tmp_file);
if (status) {
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
@@ -921,7 +922,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,

if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
status = nfs4_preprocess_stateid_op(rqstp, cstate,
- &setattr->sa_stateid, WR_STATE, NULL, NULL);
+ &cstate->current_fh, &setattr->sa_stateid,
+ WR_STATE, NULL, NULL);
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -985,8 +987,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;

- status = nfs4_preprocess_stateid_op(rqstp, cstate, stateid, WR_STATE,
- &filp, NULL);
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
+ stateid, WR_STATE, &filp, NULL);
if (status) {
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
@@ -1016,7 +1018,7 @@ nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status = nfserr_notsupp;
struct file *file;

- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&fallocate->falloc_stateid,
WR_STATE, &file, NULL);
if (status != nfs_ok) {
@@ -1055,7 +1057,7 @@ nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
__be32 status;
struct file *file;

- status = nfs4_preprocess_stateid_op(rqstp, cstate,
+ status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
&seek->seek_stateid,
RD_STATE, &file, NULL);
if (status) {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 6b800b5..df5dba6 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -4797,10 +4797,9 @@ nfs4_check_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfs4_stid *s,
*/
__be32
nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filpp, bool *tmp_file)
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filpp, bool *tmp_file)
{
- struct svc_fh *fhp = &cstate->current_fh;
struct inode *ino = d_inode(fhp->fh_dentry);
struct net *net = SVC_NET(rqstp);
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 77fdf4d..99432b7 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -578,8 +578,8 @@ struct nfsd4_compound_state;
struct nfsd_net;

extern __be32 nfs4_preprocess_stateid_op(struct svc_rqst *rqstp,
- struct nfsd4_compound_state *cstate, stateid_t *stateid,
- int flags, struct file **filp, bool *tmp_file);
+ struct nfsd4_compound_state *cstate, struct svc_fh *fhp,
+ stateid_t *stateid, int flags, struct file **filp, bool *tmp_file);
__be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
stateid_t *stateid, unsigned char typemask,
struct nfs4_stid **s, struct nfsd_net *nn);
--
2.6.3


2015-12-03 20:55:46

by Anna Schumaker

[permalink] [raw]
Subject: [RFC v1 4/3] vfs_copy_range() test program

This is a simple C program that I used for calling the copy system call.
Usage: ./nfscopy /nfs/original.txt /nfs/copy.txt
---
nfscopy.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
create mode 100644 nfscopy.c

diff --git a/nfscopy.c b/nfscopy.c
new file mode 100644
index 0000000..3417a14
--- /dev/null
+++ b/nfscopy.c
@@ -0,0 +1,59 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+loff_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
+ loff_t *off_out, size_t len, unsigned int flags)
+{
+ return syscall(__NR_copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags);
+}
+
+int main(int argc, char **argv)
+{
+ int fd_in, fd_out;
+ struct stat stat;
+ loff_t len, ret;
+ char buf[2];
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s <source> <destination>\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ fd_in = open(argv[1], O_RDONLY);
+ if (fd_in == -1) {
+ perror("open (argv[1])");
+ exit(EXIT_FAILURE);
+ }
+
+ if (fstat(fd_in, &stat) == -1) {
+ perror("fstat");
+ exit(EXIT_FAILURE);
+ }
+ len = stat.st_size;
+
+ fd_out = open(argv[2], O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if (fd_out == -1) {
+ perror("open (argv[2])");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ ret = copy_file_range(fd_in, NULL, fd_out, NULL, len, 0);
+ if (ret == -1) {
+ perror("copy_file_range");
+ exit(EXIT_FAILURE);
+ }
+
+ len -= ret;
+ } while (len > 0);
+
+ close(fd_in);
+ close(fd_out);
+ exit(EXIT_SUCCESS);
+}
--
2.6.3


2015-12-03 20:55:46

by Anna Schumaker

[permalink] [raw]
Subject: [PATCH v1 3/3] NFS: Add COPY nfs operation

From: Anna Schumaker <[email protected]>

This adds the copy_range file_ops function pointer used by the
sys_copy_range() function call. This patch only implements sync copies,
so if an async copy happens we decode the stateid and ignore it.

Signed-off-by: Anna Schumaker <[email protected]>
---
fs/nfs/nfs42.h | 1 +
fs/nfs/nfs42proc.c | 58 ++++++++++++++++++++
fs/nfs/nfs42xdr.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4file.c | 8 +++
fs/nfs/nfs4proc.c | 1 +
fs/nfs/nfs4xdr.c | 1 +
fs/nfs/objlayout/Makefile | 0
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 27 +++++++++
10 files changed, 234 insertions(+)
create mode 100644 fs/nfs/objlayout/Makefile

diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index b587ccd..b6cd153 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@

/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
+ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 6b1ce98..a183d91 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -135,6 +135,64 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}

+static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+ struct file *dst, loff_t pos_dst,
+ size_t count)
+{
+ struct nfs42_copy_args args = {
+ .src_fh = NFS_FH(file_inode(src)),
+ .src_pos = pos_src,
+ .dst_fh = NFS_FH(file_inode(dst)),
+ .dst_pos = pos_dst,
+ .count = count,
+ };
+ struct nfs42_copy_res res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ struct nfs_server *server = NFS_SERVER(file_inode(dst));
+ int status;
+
+ status = nfs42_set_rw_stateid(&args.src_stateid, src, FMODE_READ);
+ if (status)
+ return status;
+
+ status = nfs42_set_rw_stateid(&args.dst_stateid, dst, FMODE_WRITE);
+ if (status)
+ return status;
+
+ status = nfs4_call_sync(server->client, server, &msg,
+ &args.seq_args, &res.seq_res, 0);
+ if (status == -ENOTSUPP)
+ server->caps &= ~NFS_CAP_COPY;
+ if (status)
+ return status;
+ return res.write_res.count;
+}
+
+ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
+ struct file *dst, loff_t pos_dst,
+ size_t count)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(dst));
+ struct nfs4_exception exception = { };
+ ssize_t err;
+
+ if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
+ return -EOPNOTSUPP;
+
+ do {
+ err = _nfs42_proc_copy(src, pos_src, dst, pos_dst, count);
+ if (err == -ENOTSUPP)
+ return -EOPNOTSUPP;
+ err = nfs4_handle_exception(server, err, &exception);
+ } while (exception.retry);
+
+ return err;
+}
+
static loff_t _nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
{
struct inode *inode = file_inode(filep);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0ca482a..94560a0 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -9,9 +9,22 @@
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
+#define NFS42_WRITE_RES_SIZE (1 /* wr_callback_id size */ +\
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 2 /* wr_count */ + \
+ 1 /* wr_committed */ + \
+ XDR_QUADLEN(NFS4_VERIFIER_SIZE))
#define encode_allocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_allocate_maxsz (op_decode_hdr_maxsz)
+#define encode_copy_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 2 + 2 + 2 + 1 + 1 + 1)
+#define decode_copy_maxsz (op_decode_hdr_maxsz + \
+ NFS42_WRITE_RES_SIZE + \
+ 1 /* cr_consecutive */ + \
+ 1 /* cr_synchronous */)
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
@@ -49,6 +62,16 @@
decode_putfh_maxsz + \
decode_allocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_putfh_maxsz + \
+ encode_copy_maxsz)
+#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_putfh_maxsz + \
+ decode_copy_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_deallocate_maxsz + \
@@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}

+static void encode_copy(struct xdr_stream *xdr,
+ struct nfs42_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->src_stateid);
+ encode_nfs4_stateid(xdr, &args->dst_stateid);
+
+ encode_uint64(xdr, args->src_pos);
+ encode_uint64(xdr, args->dst_pos);
+ encode_uint64(xdr, args->count);
+
+ encode_uint32(xdr, 1); /* consecutive = true */
+ encode_uint32(xdr, 1); /* synchronous = true */
+ encode_uint32(xdr, 0); /* src server list */
+}
+
static void encode_deallocate(struct xdr_stream *xdr,
struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@@ -182,6 +222,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
}

/*
+ * Encode COPY request
+ */
+static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_copy_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->src_fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->dst_fh, &hdr);
+ encode_copy(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode DEALLOCATE request
*/
static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
@@ -266,6 +326,52 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
return decode_op_hdr(xdr, OP_ALLOCATE);
}

+static int decode_write_response(struct xdr_stream *xdr,
+ struct nfs42_write_res *res)
+{
+ __be32 *p;
+ int stateids;
+
+ p = xdr_inline_decode(xdr, 4 + 8 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ stateids = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &res->count);
+ res->committed = be32_to_cpup(p);
+ return decode_verifier(xdr, &res->verifier);
+
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
+{
+ __be32 *p;
+ int status;
+
+ status = decode_op_hdr(xdr, OP_COPY);
+ if (status)
+ return status;
+
+ status = decode_write_response(xdr, &res->write_res);
+ if (status)
+ return status;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ res->consecutive = be32_to_cpup(p++);
+ res->synchronous = be32_to_cpup(p++);
+ return 0;
+
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -331,6 +437,36 @@ out:
}

/*
+ * Decode COPY response
+ */
+static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_copy_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_savefh(xdr);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_copy(xdr, res);
+out:
+ return status;
+}
+
+/*
* Decode DEALLOCATE request
*/
static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index db9b5fe..8eb7802 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -160,6 +160,13 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}

#ifdef CONFIG_NFS_V4_2
+static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
+{
+ return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+}
+
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
@@ -340,6 +347,7 @@ const struct file_operations nfs4_file_operations = {
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
#ifdef CONFIG_NFS_V4_2
+ .copy_file_range = nfs4_copy_file_range,
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
.unlocked_ioctl = nfs4_ioctl,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8981803..f9d14f1 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8721,6 +8721,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_ALLOCATE
+ | NFS_CAP_COPY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e44412..bc3a3f5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7515,6 +7515,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC(CLONE, enc_clone, dec_clone),
+ PROC(COPY, enc_copy, dec_copy),
#endif /* CONFIG_NFS_V4_2 */
};

diff --git a/fs/nfs/objlayout/Makefile b/fs/nfs/objlayout/Makefile
new file mode 100644
index 0000000..e69de29
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index e7e7853..bebafaf 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -504,6 +504,7 @@ enum {
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE,
+ NFSPROC4_CLNT_COPY,
};

/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 2469ab0..8b772ca 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -245,5 +245,6 @@ struct nfs_server {
#define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22)
#define NFS_CAP_CLONE (1U << 23)
+#define NFS_CAP_COPY (1U << 24)

#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 11bbae4..304b8e4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1342,6 +1342,33 @@ struct nfs42_falloc_res {
const struct nfs_server *falloc_server;
};

+struct nfs42_copy_args {
+ struct nfs4_sequence_args seq_args;
+
+ struct nfs_fh *src_fh;
+ nfs4_stateid src_stateid;
+ u64 src_pos;
+
+ struct nfs_fh *dst_fh;
+ nfs4_stateid dst_stateid;
+ u64 dst_pos;
+
+ u64 count;
+};
+
+struct nfs42_write_res {
+ u64 count;
+ u32 committed;
+ nfs4_verifier verifier;
+};
+
+struct nfs42_copy_res {
+ struct nfs4_sequence_res seq_res;
+ struct nfs42_write_res write_res;
+ bool consecutive;
+ bool synchronous;
+};
+
struct nfs42_seek_args {
struct nfs4_sequence_args seq_args;

--
2.6.3


2015-12-04 15:45:50

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On Thu, Dec 03, 2015 at 03:55:35PM -0500, Anna Schumaker wrote:
> @@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
> }
> #endif
>
> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
> + struct file *dst, u64 dst_pos,
> + u64 count)
> +{
> + ssize_t bytes;
> + u64 limit = 0x10000000;

Why that value? Could I get a comment here?

> +
> + if (count > limit)
> + count = limit;
> +
> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);

Sorry, I lost track of the copy discussions: does this only work on
filesystems with special support, or does it fall back on doing the copy
by hand? Which filesystems (of the exportable filesystems) support
this?

--b.

> + if (bytes > 0)
> + vfs_fsync_range(dst, dst_pos, dst_pos + bytes, 0);
> + return bytes;
> +}
> +
> __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
> struct file *file, loff_t offset, loff_t len,
> int flags)

2015-12-04 15:49:32

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On 12/04/2015 10:45 AM, J. Bruce Fields wrote:
> On Thu, Dec 03, 2015 at 03:55:35PM -0500, Anna Schumaker wrote:
>> @@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> }
>> #endif
>>
>> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
>> + struct file *dst, u64 dst_pos,
>> + u64 count)
>> +{
>> + ssize_t bytes;
>> + u64 limit = 0x10000000;
>
> Why that value? Could I get a comment here?

Whoops! I had a comment there at one point, but I must have deleted it :(. That value is to cap copies to 256MB.
>
>> +
>> + if (count > limit)
>> + count = limit;
>> +
>> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
>
> Sorry, I lost track of the copy discussions: does this only work on
> filesystems with special support, or does it fall back on doing the copy
> by hand? Which filesystems (of the exportable filesystems) support
> this?

The system call falls back on doing the copy by hand if there is no filesystem acceleration.

Anna

>
> --b.
>
>> + if (bytes > 0)
>> + vfs_fsync_range(dst, dst_pos, dst_pos + bytes, 0);
>> + return bytes;
>> +}
>> +
>> __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
>> struct file *file, loff_t offset, loff_t len,
>> int flags)
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>


2015-12-04 16:49:45

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On Fri, Dec 04, 2015 at 10:49:24AM -0500, Anna Schumaker wrote:
> On 12/04/2015 10:45 AM, J. Bruce Fields wrote:
> > On Thu, Dec 03, 2015 at 03:55:35PM -0500, Anna Schumaker wrote:
> >> @@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
> >> }
> >> #endif
> >>
> >> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
> >> + struct file *dst, u64 dst_pos,
> >> + u64 count)
> >> +{
> >> + ssize_t bytes;
> >> + u64 limit = 0x10000000;
> >
> > Why that value? Could I get a comment here?
>
> Whoops! I had a comment there at one point, but I must have deleted it :(. That value is to cap copies to 256MB.

Could you include some justification for the choice of that particular
value?

> >> + if (count > limit)
> >> + count = limit;
> >> +
> >> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> >
> > Sorry, I lost track of the copy discussions: does this only work on
> > filesystems with special support, or does it fall back on doing the copy
> > by hand? Which filesystems (of the exportable filesystems) support
> > this?
>
> The system call falls back on doing the copy by hand if there is no filesystem acceleration.

Is this practical? It means a huge range in possible latency of the
single COPY call depending on filesystem.

I guess I can live with it and we can see if people run into problems in
practice. But let's make sure this is documented.

--b.

2015-12-04 17:05:16

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On 12/04/2015 11:49 AM, J. Bruce Fields wrote:
> On Fri, Dec 04, 2015 at 10:49:24AM -0500, Anna Schumaker wrote:
>> On 12/04/2015 10:45 AM, J. Bruce Fields wrote:
>>> On Thu, Dec 03, 2015 at 03:55:35PM -0500, Anna Schumaker wrote:
>>>> @@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
>>>> }
>>>> #endif
>>>>
>>>> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
>>>> + struct file *dst, u64 dst_pos,
>>>> + u64 count)
>>>> +{
>>>> + ssize_t bytes;
>>>> + u64 limit = 0x10000000;
>>>
>>> Why that value? Could I get a comment here?
>>
>> Whoops! I had a comment there at one point, but I must have deleted it :(. That value is to cap copies to 256MB.
>
> Could you include some justification for the choice of that particular
> value?

Yeah, I can run tests with different values and include the results in v2.

>
>>>> + if (count > limit)
>>>> + count = limit;
>>>> +
>>>> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
>>>
>>> Sorry, I lost track of the copy discussions: does this only work on
>>> filesystems with special support, or does it fall back on doing the copy
>>> by hand? Which filesystems (of the exportable filesystems) support
>>> this?
>>
>> The system call falls back on doing the copy by hand if there is no filesystem acceleration.
>
> Is this practical? It means a huge range in possible latency of the
> single COPY call depending on filesystem.

That's why I'm breaking copies into smaller chunks, rather than doing everything at once.

>
> I guess I can live with it and we can see if people run into problems in
> practice. But let's make sure this is documented.

Okay. I'll add documentation about this!

Anna

>
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>


2015-12-04 17:14:07

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On Fri, Dec 04, 2015 at 12:05:05PM -0500, Anna Schumaker wrote:
> On 12/04/2015 11:49 AM, J. Bruce Fields wrote:
> > On Fri, Dec 04, 2015 at 10:49:24AM -0500, Anna Schumaker wrote:
> >> On 12/04/2015 10:45 AM, J. Bruce Fields wrote:
> >>> On Thu, Dec 03, 2015 at 03:55:35PM -0500, Anna Schumaker wrote:
> >>>> @@ -498,6 +499,22 @@ __be32 nfsd4_set_nfs4_label(struct svc_rqst *rqstp, struct svc_fh *fhp,
> >>>> }
> >>>> #endif
> >>>>
> >>>> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos,
> >>>> + struct file *dst, u64 dst_pos,
> >>>> + u64 count)
> >>>> +{
> >>>> + ssize_t bytes;
> >>>> + u64 limit = 0x10000000;
> >>>
> >>> Why that value? Could I get a comment here?
> >>
> >> Whoops! I had a comment there at one point, but I must have deleted it :(. That value is to cap copies to 256MB.
> >
> > Could you include some justification for the choice of that particular
> > value?
>
> Yeah, I can run tests with different values and include the results in v2.
>
> >
> >>>> + if (count > limit)
> >>>> + count = limit;
> >>>> +
> >>>> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> >>>
> >>> Sorry, I lost track of the copy discussions: does this only work on
> >>> filesystems with special support, or does it fall back on doing the copy
> >>> by hand? Which filesystems (of the exportable filesystems) support
> >>> this?
> >>
> >> The system call falls back on doing the copy by hand if there is no filesystem acceleration.
> >
> > Is this practical? It means a huge range in possible latency of the
> > single COPY call depending on filesystem.
>
> That's why I'm breaking copies into smaller chunks, rather than doing everything at once.
>
> >
> > I guess I can live with it and we can see if people run into problems in
> > practice. But let's make sure this is documented.
>
> Okay. I'll add documentation about this!

OK, thanks. I think those are my only concerns, good to see that this
is all the new code we need now.--b.

2015-12-07 19:23:19

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v1 0/4] NFSv4.2: Add support for the COPY operation

FYI, Al has merged the clone work in for-next, so this might need
a rebase on top of that.

2015-12-07 19:26:47

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

> static __be32
> +nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> + struct nfsd4_copy *copy, struct file **src, struct file **dst)
> +{
> + __be32 status;
> +
> + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
> + &copy->cp_src_stateid, RD_STATE,
> + src, NULL);
> + if (status) {
> + dprintk("NFSD: nfsd4_copy: couldn't process src stateid!\n");
> + return status;
> + }
> +
> + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> + &copy->cp_dst_stateid, WR_STATE,
> + dst, NULL);
> + if (status) {
> + dprintk("NFSD: nfsd4_copy: couldn't process dst stateid!\n");
> + fput(*src);
> + }

This is missing a return status. On the clone side that caused really
hard to debug crashes when xfstests hit this case. While you're at it
I'd suggest to also kill the nfsd4_verify_copy heper. You might also
need a check for invalid file types that maps to the correct NFS error
code, similar to clone.

> + if (bytes < 0)
> + status = nfserrno(bytes);

maybe use a goto here?

> + else {
> + copy->cp_res.wr_bytes_written = bytes;
> + copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
> + copy->cp_consecutive = 1;

is there anything in the linux semantics that guarantees consecutive
operation?

> + u64 count)
> +{
> + ssize_t bytes;
> + u64 limit = 0x10000000;
> +
> + if (count > limit)
> + count = limit;
> +
> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> + if (bytes > 0)
> + vfs_fsync_range(dst, dst_pos, dst_pos + bytes, 0);
> + return bytes;

How about returning NFS_UNSTABLE above and avoiding the fsync here?

2015-12-07 19:28:07

by Christoph Hellwig

[permalink] [raw]
Subject: Re: [PATCH v1 3/3] NFS: Add COPY nfs operation

On Thu, Dec 03, 2015 at 03:55:36PM -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> This adds the copy_range file_ops function pointer used by the
> sys_copy_range() function call. This patch only implements sync copies,
> so if an async copy happens we decode the stateid and ignore it.

What is the user visible behavior if a server doesn't set
cr_synchronous? I can't see any explicit handling of it, which makes
me a little sceptic that everything is just going to work fine.

2015-12-07 21:03:57

by Anna Schumaker

[permalink] [raw]
Subject: Re: [PATCH v1 2/3] NFSD: Implement the COPY call

On 12/07/2015 02:26 PM, Christoph Hellwig wrote:
>> static __be32
>> +nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>> + struct nfsd4_copy *copy, struct file **src, struct file **dst)
>> +{
>> + __be32 status;
>> +
>> + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
>> + &copy->cp_src_stateid, RD_STATE,
>> + src, NULL);
>> + if (status) {
>> + dprintk("NFSD: nfsd4_copy: couldn't process src stateid!\n");
>> + return status;
>> + }
>> +
>> + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
>> + &copy->cp_dst_stateid, WR_STATE,
>> + dst, NULL);
>> + if (status) {
>> + dprintk("NFSD: nfsd4_copy: couldn't process dst stateid!\n");
>> + fput(*src);
>> + }
>
> This is missing a return status. On the clone side that caused really
> hard to debug crashes when xfstests hit this case. While you're at it
> I'd suggest to also kill the nfsd4_verify_copy heper. You might also
> need a check for invalid file types that maps to the correct NFS error
> code, similar to clone.

I just updated against the clone code, and I made some of these changes earlier this afternoon. I kept the verify_copy() helper around and changed clone to call it, since all of the stateid verification code would be almost identical.

>
>> + if (bytes < 0)
>> + status = nfserrno(bytes);
>
> maybe use a goto here?

Maybe. I'll see how the code looks!

>
>> + else {
>> + copy->cp_res.wr_bytes_written = bytes;
>> + copy->cp_res.wr_stable_how = NFS_FILE_SYNC;
>> + copy->cp_consecutive = 1;
>
> is there anything in the linux semantics that guarantees consecutive
> operation?

I think so. The splice fallback iterates starting at the beginning of the file, so if something goes wrong later on then the earlier pages should at least be copied.

>
>> + u64 count)
>> +{
>> + ssize_t bytes;
>> + u64 limit = 0x10000000;
>> +
>> + if (count > limit)
>> + count = limit;
>> +
>> + bytes = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
>> + if (bytes > 0)
>> + vfs_fsync_range(dst, dst_pos, dst_pos + bytes, 0);
>> + return bytes;
>
> How about returning NFS_UNSTABLE above and avoiding the fsync here?

I was just looking into this, too. I'm trying to figure out the right way to handle this on the client side, since right now we ignore this value. I have gotten as far as "if the file is open with O_SYNC, then we should commit after copying."

Anna

> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>