These patches add client and server support for the NFS v4.2 COPY operation,
and make use of the new copy_file_range() system call.
Changes in v3:
- Add nfs_commit_file() function to sent a commit after a copy.
- Update exception handling to match Christoph's recent changes.
- Server should check for copying beyond the end of the file.
Questions, comments, and other testing ideas would be greatly appreciated!
Thanks,
Anna
Anna Schumaker (4):
NFS: Add nfs_commit_file()
NFS: Add COPY nfs operation
NFSD: Implement the COPY call
vfs_copy_range() test program
fs/nfs/internal.h | 1 +
fs/nfs/nfs42.h | 1 +
fs/nfs/nfs42proc.c | 105 +++++++++++++++++++++++++++++++++
fs/nfs/nfs42xdr.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4file.c | 23 ++++++++
fs/nfs/nfs4proc.c | 1 +
fs/nfs/nfs4xdr.c | 1 +
fs/nfs/pagelist.c | 6 +-
fs/nfs/write.c | 41 +++++++++++--
fs/nfsd/nfs4proc.c | 98 +++++++++++++++++++++++++++----
fs/nfsd/nfs4xdr.c | 63 +++++++++++++++++++-
fs/nfsd/vfs.c | 6 ++
fs/nfsd/vfs.h | 1 +
fs/nfsd/xdr4.h | 23 ++++++++
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 26 +++++++++
nfscopy.c | 59 +++++++++++++++++++
18 files changed, 584 insertions(+), 19 deletions(-)
create mode 100644 nfscopy.c
--
2.7.2
From: Anna Schumaker <[email protected]>
This adds the copy_range file_ops function pointer used by the
sys_copy_range() function call. This patch only implements sync copies,
so if an async copy happens we decode the stateid and ignore it.
Signed-off-by: Anna Schumaker <[email protected]>
--
v3:
- Update exception handling to match the rest of the v4.2 code
- Use nfs_commit_file() to send the commit through the standard
commit path
---
fs/nfs/nfs42.h | 1 +
fs/nfs/nfs42proc.c | 105 +++++++++++++++++++++++++++++++++
fs/nfs/nfs42xdr.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++
fs/nfs/nfs4file.c | 23 ++++++++
fs/nfs/nfs4proc.c | 1 +
fs/nfs/nfs4xdr.c | 1 +
include/linux/nfs4.h | 1 +
include/linux/nfs_fs_sb.h | 1 +
include/linux/nfs_xdr.h | 26 +++++++++
9 files changed, 305 insertions(+)
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index b587ccd..b6cd153 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -13,6 +13,7 @@
/* nfs4.2proc.c */
int nfs42_proc_allocate(struct file *, loff_t, loff_t);
+ssize_t nfs42_proc_copy(struct file *, loff_t, struct file *, loff_t, size_t);
int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
loff_t nfs42_proc_llseek(struct file *, loff_t, int);
int nfs42_proc_layoutstats_generic(struct nfs_server *,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index dff8346..579ee20 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -126,6 +126,111 @@ int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
return err;
}
+static ssize_t _nfs42_proc_copy(struct file *src, loff_t pos_src,
+ struct nfs_lock_context *src_lock,
+ struct file *dst, loff_t pos_dst,
+ struct nfs_lock_context *dst_lock,
+ size_t count)
+{
+ struct nfs42_copy_args args = {
+ .src_fh = NFS_FH(file_inode(src)),
+ .src_pos = pos_src,
+ .dst_fh = NFS_FH(file_inode(dst)),
+ .dst_pos = pos_dst,
+ .count = count,
+ };
+ struct nfs42_copy_res res;
+ struct rpc_message msg = {
+ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COPY],
+ .rpc_argp = &args,
+ .rpc_resp = &res,
+ };
+ struct inode *dst_inode = file_inode(dst);
+ struct nfs_server *server = NFS_SERVER(dst_inode);
+ int status;
+
+ status = nfs4_set_rw_stateid(&args.src_stateid, src_lock->open_context,
+ src_lock, FMODE_READ);
+ if (status)
+ return status;
+
+ status = nfs4_set_rw_stateid(&args.dst_stateid, dst_lock->open_context,
+ dst_lock, FMODE_WRITE);
+ if (status)
+ return status;
+
+ status = nfs4_call_sync(server->client, server, &msg,
+ &args.seq_args, &res.seq_res, 0);
+ if (status == -ENOTSUPP)
+ server->caps &= ~NFS_CAP_COPY;
+ if (status)
+ return status;
+
+ if (res.write_res.verifier.committed != NFS_FILE_SYNC) {
+ status = nfs_commit_file(dst, &res.write_res.verifier.verifier);
+ if (status)
+ return status;
+ }
+
+ truncate_pagecache_range(dst_inode, pos_dst,
+ pos_dst + res.write_res.count);
+
+ return res.write_res.count;
+}
+
+ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
+ struct file *dst, loff_t pos_dst,
+ size_t count)
+{
+ struct nfs_server *server = NFS_SERVER(file_inode(dst));
+ struct nfs_lock_context *src_lock;
+ struct nfs_lock_context *dst_lock;
+ struct nfs4_exception src_exception = { };
+ struct nfs4_exception dst_exception = { };
+ ssize_t err, err2;
+
+ if (!nfs_server_capable(file_inode(dst), NFS_CAP_COPY))
+ return -EOPNOTSUPP;
+
+ src_lock = nfs_get_lock_context(nfs_file_open_context(src));
+ if (IS_ERR(src_lock))
+ return PTR_ERR(src_lock);
+
+ src_exception.inode = file_inode(src);
+ src_exception.state = src_lock->open_context->state;
+
+ dst_lock = nfs_get_lock_context(nfs_file_open_context(dst));
+ if (IS_ERR(dst_lock)) {
+ err = PTR_ERR(dst_lock);
+ goto out_put_src_lock;
+ }
+
+ dst_exception.inode = file_inode(dst);
+ dst_exception.state = dst_lock->open_context->state;
+
+ do {
+ mutex_lock(&file_inode(dst)->i_mutex);
+ err = _nfs42_proc_copy(src, pos_src, src_lock,
+ dst, pos_dst, dst_lock, count);
+ mutex_unlock(&file_inode(dst)->i_mutex);
+
+ if (err == -ENOTSUPP) {
+ err = -EOPNOTSUPP;
+ break;
+ }
+
+ err2 = nfs4_handle_exception(server, err, &src_exception);
+ err = nfs4_handle_exception(server, err, &dst_exception);
+ if (!err)
+ err = err2;
+ } while (src_exception.retry || dst_exception.retry);
+
+ nfs_put_lock_context(dst_lock);
+out_put_src_lock:
+ nfs_put_lock_context(src_lock);
+ return err;
+}
+
static loff_t _nfs42_proc_llseek(struct file *filep,
struct nfs_lock_context *lock, loff_t offset, int whence)
{
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 0ca482a..6dc6f2a 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -9,9 +9,22 @@
#define encode_fallocate_maxsz (encode_stateid_maxsz + \
2 /* offset */ + \
2 /* length */)
+#define NFS42_WRITE_RES_SIZE (1 /* wr_callback_id size */ +\
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 2 /* wr_count */ + \
+ 1 /* wr_committed */ + \
+ XDR_QUADLEN(NFS4_VERIFIER_SIZE))
#define encode_allocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_allocate_maxsz (op_decode_hdr_maxsz)
+#define encode_copy_maxsz (op_encode_hdr_maxsz + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ XDR_QUADLEN(NFS4_STATEID_SIZE) + \
+ 2 + 2 + 2 + 1 + 1 + 1)
+#define decode_copy_maxsz (op_decode_hdr_maxsz + \
+ NFS42_WRITE_RES_SIZE + \
+ 1 /* cr_consecutive */ + \
+ 1 /* cr_synchronous */)
#define encode_deallocate_maxsz (op_encode_hdr_maxsz + \
encode_fallocate_maxsz)
#define decode_deallocate_maxsz (op_decode_hdr_maxsz)
@@ -49,6 +62,16 @@
decode_putfh_maxsz + \
decode_allocate_maxsz + \
decode_getattr_maxsz)
+#define NFS4_enc_copy_sz (compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_savefh_maxsz + \
+ encode_putfh_maxsz + \
+ encode_copy_maxsz)
+#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
+ decode_putfh_maxsz + \
+ decode_savefh_maxsz + \
+ decode_putfh_maxsz + \
+ decode_copy_maxsz)
#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
encode_putfh_maxsz + \
encode_deallocate_maxsz + \
@@ -102,6 +125,23 @@ static void encode_allocate(struct xdr_stream *xdr,
encode_fallocate(xdr, args);
}
+static void encode_copy(struct xdr_stream *xdr,
+ struct nfs42_copy_args *args,
+ struct compound_hdr *hdr)
+{
+ encode_op_hdr(xdr, OP_COPY, decode_copy_maxsz, hdr);
+ encode_nfs4_stateid(xdr, &args->src_stateid);
+ encode_nfs4_stateid(xdr, &args->dst_stateid);
+
+ encode_uint64(xdr, args->src_pos);
+ encode_uint64(xdr, args->dst_pos);
+ encode_uint64(xdr, args->count);
+
+ encode_uint32(xdr, 1); /* consecutive = true */
+ encode_uint32(xdr, 1); /* synchronous = true */
+ encode_uint32(xdr, 0); /* src server list */
+}
+
static void encode_deallocate(struct xdr_stream *xdr,
struct nfs42_falloc_args *args,
struct compound_hdr *hdr)
@@ -182,6 +222,26 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
}
/*
+ * Encode COPY request
+ */
+static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
+ struct xdr_stream *xdr,
+ struct nfs42_copy_args *args)
+{
+ struct compound_hdr hdr = {
+ .minorversion = nfs4_xdr_minorversion(&args->seq_args),
+ };
+
+ encode_compound_hdr(xdr, req, &hdr);
+ encode_sequence(xdr, &args->seq_args, &hdr);
+ encode_putfh(xdr, args->src_fh, &hdr);
+ encode_savefh(xdr, &hdr);
+ encode_putfh(xdr, args->dst_fh, &hdr);
+ encode_copy(xdr, args, &hdr);
+ encode_nops(&hdr);
+}
+
+/*
* Encode DEALLOCATE request
*/
static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
@@ -266,6 +326,62 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
return decode_op_hdr(xdr, OP_ALLOCATE);
}
+static int decode_write_response(struct xdr_stream *xdr,
+ struct nfs42_write_res *res)
+{
+ __be32 *p;
+ int stateids;
+
+ p = xdr_inline_decode(xdr, 4 + 8 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ stateids = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &res->count);
+ res->verifier.committed = be32_to_cpup(p);
+ return decode_verifier(xdr, &res->verifier.verifier);
+
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_copy_requirements(struct xdr_stream *xdr,
+ struct nfs42_copy_res *res) {
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 4 + 4);
+ if (unlikely(!p))
+ goto out_overflow;
+
+ res->consecutive = be32_to_cpup(p++);
+ res->synchronous = be32_to_cpup(p++);
+ return 0;
+out_overflow:
+ print_overflow_msg(__func__, xdr);
+ return -EIO;
+}
+
+static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
+{
+ int status;
+
+ status = decode_op_hdr(xdr, OP_COPY);
+ if (status == NFS4ERR_OFFLOAD_NO_REQS) {
+ status = decode_copy_requirements(xdr, res);
+ if (status)
+ return status;
+ return NFS4ERR_OFFLOAD_NO_REQS;
+ } else if (status)
+ return status;
+
+ status = decode_write_response(xdr, &res->write_res);
+ if (status)
+ return status;
+
+ return decode_copy_requirements(xdr, res);
+}
+
static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
{
return decode_op_hdr(xdr, OP_DEALLOCATE);
@@ -331,6 +447,36 @@ out:
}
/*
+ * Decode COPY response
+ */
+static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr,
+ struct nfs42_copy_res *res)
+{
+ struct compound_hdr hdr;
+ int status;
+
+ status = decode_compound_hdr(xdr, &hdr);
+ if (status)
+ goto out;
+ status = decode_sequence(xdr, &res->seq_res, rqstp);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_savefh(xdr);
+ if (status)
+ goto out;
+ status = decode_putfh(xdr);
+ if (status)
+ goto out;
+ status = decode_copy(xdr, res);
+out:
+ return status;
+}
+
+/*
* Decode DEALLOCATE request
*/
static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 57ca1c8..c2047ef 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -160,6 +160,28 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
}
#ifdef CONFIG_NFS_V4_2
+static ssize_t nfs4_copy_file_range(struct file *file_in, loff_t pos_in,
+ struct file *file_out, loff_t pos_out,
+ size_t count, unsigned int flags)
+{
+ struct inode *in_inode = file_inode(file_in);
+ struct inode *out_inode = file_inode(file_out);
+ int ret;
+
+ if (in_inode == out_inode)
+ return -EINVAL;
+
+ /* flush any pending writes */
+ ret = nfs_sync_inode(in_inode);
+ if (ret)
+ return ret;
+ ret = nfs_sync_inode(out_inode);
+ if (ret)
+ return ret;
+
+ return nfs42_proc_copy(file_in, pos_in, file_out, pos_out, count);
+}
+
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
loff_t ret;
@@ -274,6 +296,7 @@ const struct file_operations nfs4_file_operations = {
.check_flags = nfs_check_flags,
.setlease = simple_nosetlease,
#ifdef CONFIG_NFS_V4_2
+ .copy_file_range = nfs4_copy_file_range,
.llseek = nfs4_file_llseek,
.fallocate = nfs42_fallocate,
.clone_file_range = nfs42_clone_file_range,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index c70de30..eb08640 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8792,6 +8792,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
| NFS_CAP_STATEID_NFSV41
| NFS_CAP_ATOMIC_OPEN_V1
| NFS_CAP_ALLOCATE
+ | NFS_CAP_COPY
| NFS_CAP_DEALLOCATE
| NFS_CAP_SEEK
| NFS_CAP_LAYOUTSTATS
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e44412..bc3a3f5 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7515,6 +7515,7 @@ struct rpc_procinfo nfs4_procedures[] = {
PROC(DEALLOCATE, enc_deallocate, dec_deallocate),
PROC(LAYOUTSTATS, enc_layoutstats, dec_layoutstats),
PROC(CLONE, enc_clone, dec_clone),
+ PROC(COPY, enc_copy, dec_copy),
#endif /* CONFIG_NFS_V4_2 */
};
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index d6f9b4e..ed14d1a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -504,6 +504,7 @@ enum {
NFSPROC4_CLNT_DEALLOCATE,
NFSPROC4_CLNT_LAYOUTSTATS,
NFSPROC4_CLNT_CLONE,
+ NFSPROC4_CLNT_COPY,
};
/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 7fcc13c..14a762d 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -246,5 +246,6 @@ struct nfs_server {
#define NFS_CAP_DEALLOCATE (1U << 21)
#define NFS_CAP_LAYOUTSTATS (1U << 22)
#define NFS_CAP_CLONE (1U << 23)
+#define NFS_CAP_COPY (1U << 24)
#endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index d320906..d7adcab 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1343,6 +1343,32 @@ struct nfs42_falloc_res {
const struct nfs_server *falloc_server;
};
+struct nfs42_copy_args {
+ struct nfs4_sequence_args seq_args;
+
+ struct nfs_fh *src_fh;
+ nfs4_stateid src_stateid;
+ u64 src_pos;
+
+ struct nfs_fh *dst_fh;
+ nfs4_stateid dst_stateid;
+ u64 dst_pos;
+
+ u64 count;
+};
+
+struct nfs42_write_res {
+ u64 count;
+ struct nfs_writeverf verifier;
+};
+
+struct nfs42_copy_res {
+ struct nfs4_sequence_res seq_res;
+ struct nfs42_write_res write_res;
+ bool consecutive;
+ bool synchronous;
+};
+
struct nfs42_seek_args {
struct nfs4_sequence_args seq_args;
--
2.7.2
From: Anna Schumaker <[email protected]>
I only implemented the sync version of this call, since it's the
easiest. I can simply call vfs_copy_range() and have the vfs do the
right thing for the filesystem being exported.
Signed-off-by: Anna Schumaker <[email protected]>
--
v3:
- Check for copying beyond the end of the file
---
fs/nfsd/nfs4proc.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++------
fs/nfsd/nfs4xdr.c | 63 +++++++++++++++++++++++++++++++++--
fs/nfsd/vfs.c | 6 ++++
fs/nfsd/vfs.h | 1 +
fs/nfsd/xdr4.h | 23 +++++++++++++
5 files changed, 178 insertions(+), 13 deletions(-)
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4cba786..0a1aa2f 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1012,47 +1012,105 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
}
static __be32
-nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
- struct nfsd4_clone *clone)
+nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ stateid_t *src_stateid, struct file **src,
+ stateid_t *dst_stateid, struct file **dst)
{
- struct file *src, *dst;
__be32 status;
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
- &clone->cl_src_stateid, RD_STATE,
- &src, NULL);
+ src_stateid, RD_STATE, src, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
goto out;
}
status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
- &clone->cl_dst_stateid, WR_STATE,
- &dst, NULL);
+ dst_stateid, WR_STATE, dst, NULL);
if (status) {
dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
goto out_put_src;
}
/* fix up for NFS-specific error code */
- if (!S_ISREG(file_inode(src)->i_mode) ||
- !S_ISREG(file_inode(dst)->i_mode)) {
+ if (!S_ISREG(file_inode(*src)->i_mode) ||
+ !S_ISREG(file_inode(*dst)->i_mode)) {
status = nfserr_wrong_type;
goto out_put_dst;
}
+out:
+ return status;
+out_put_dst:
+ fput(*dst);
+out_put_src:
+ fput(*src);
+ goto out;
+}
+
+static __be32
+nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_clone *clone)
+{
+ struct file *src, *dst;
+ __be32 status;
+
+ status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
+ &clone->cl_dst_stateid, &dst);
+ if (status)
+ goto out;
+
status = nfsd4_clone_file_range(src, clone->cl_src_pos,
dst, clone->cl_dst_pos, clone->cl_count);
-out_put_dst:
fput(dst);
-out_put_src:
fput(src);
out:
return status;
}
static __be32
+nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+ struct nfsd4_copy *copy)
+{
+ struct file *src, *dst;
+ __be32 status;
+ ssize_t bytes;
+ loff_t max;
+
+ status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src,
+ ©->cp_dst_stateid, &dst);
+ if (status)
+ goto out;
+
+ max = i_size_read(file_inode(src));
+ if ((copy->cp_src_pos + copy->cp_count) > max) {
+ status = nfserr_inval;
+ goto out_put;
+ }
+
+ bytes = nfsd_copy_range(src, copy->cp_src_pos, dst, copy->cp_dst_pos,
+ copy->cp_count);
+
+ if (bytes < 0)
+ status = nfserrno(bytes);
+ else {
+ copy->cp_res.wr_bytes_written = bytes;
+ copy->cp_res.wr_stable_how = NFS_UNSTABLE;
+ copy->cp_consecutive = 1;
+ copy->cp_synchronous = 1;
+ gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp));
+ status = nfs_ok;
+ }
+
+out_put:
+ fput(src);
+ fput(dst);
+out:
+ return status;
+}
+
+static __be32
nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_fallocate *fallocate, int flags)
{
@@ -1966,6 +2024,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
op_encode_channel_attrs_maxsz) * sizeof(__be32);
}
+static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size +
+ 1 /* wr_callback */ +
+ op_encode_stateid_maxsz /* wr_callback */ +
+ 2 /* wr_count */ +
+ 1 /* wr_committed */ +
+ op_encode_verifier_maxsz +
+ 1 /* cr_consecutive */ +
+ 1 /* cr_synchronous */) * sizeof(__be32);
+}
+
#ifdef CONFIG_NFSD_PNFS
/*
* At this stage we don't really know what layout driver will handle the request,
@@ -2328,6 +2398,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_name = "OP_CLONE",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
+ [OP_COPY] = {
+ .op_func = (nfsd4op_func)nfsd4_copy,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_name = "OP_COPY",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
+ },
[OP_SEEK] = {
.op_func = (nfsd4op_func)nfsd4_seek,
.op_name = "OP_SEEK",
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d6ef095..2a73c7e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1694,6 +1694,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
}
static __be32
+nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
+{
+ DECODE_HEAD;
+ unsigned int tmp;
+
+ status = nfsd4_decode_stateid(argp, ©->cp_src_stateid);
+ if (status)
+ return status;
+ status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid);
+ if (status)
+ return status;
+
+ READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
+ p = xdr_decode_hyper(p, ©->cp_src_pos);
+ p = xdr_decode_hyper(p, ©->cp_dst_pos);
+ p = xdr_decode_hyper(p, ©->cp_count);
+ copy->cp_consecutive = be32_to_cpup(p++);
+ copy->cp_synchronous = be32_to_cpup(p++);
+ tmp = be32_to_cpup(p); /* Source server list not supported */
+
+ DECODE_TAIL;
+}
+
+static __be32
nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
DECODE_HEAD;
@@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
/* new operations for NFSv4.2 */
[OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
- [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
+ [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
[OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
[OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
[OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -4203,6 +4227,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
#endif /* CONFIG_NFSD_PNFS */
static __be32
+nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
+{
+ __be32 *p;
+
+ p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+
+ *p++ = cpu_to_be32(0);
+ p = xdr_encode_hyper(p, write->wr_bytes_written);
+ *p++ = cpu_to_be32(write->wr_stable_how);
+ p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
+ NFS4_VERIFIER_SIZE);
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_copy *copy)
+{
+ __be32 *p;
+
+ if (!nfserr) {
+ nfserr = nfsd42_encode_write_res(resp, ©->cp_res);
+ if (nfserr)
+ return nfserr;
+
+ p = xdr_reserve_space(&resp->xdr, 4 + 4);
+ *p++ = cpu_to_be32(copy->cp_consecutive);
+ *p++ = cpu_to_be32(copy->cp_synchronous);
+ }
+ return nfserr;
+}
+
+static __be32
nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_seek *seek)
{
@@ -4301,7 +4360,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
/* NFSv4.2 operations */
[OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
- [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
+ [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
[OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
[OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
[OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5d2a57e..28802a7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -513,6 +513,12 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
count));
}
+ssize_t nfsd_copy_range(struct file *src, u64 src_pos, struct file *dst,
+ u64 dst_pos, u64 count)
+{
+ return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+}
+
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
struct file *file, loff_t offset, loff_t len,
int flags)
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba31..e36c497 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -93,6 +93,7 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
struct svc_fh *res);
__be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
char *, int, struct svc_fh *);
+ssize_t nfsd_copy_range(struct file *, u64, struct file *, u64, u64);
__be32 nfsd_rename(struct svc_rqst *,
struct svc_fh *, char *, int,
struct svc_fh *, char *, int);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d955481..2cad349 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -500,6 +500,28 @@ struct nfsd4_clone {
u64 cl_count;
};
+struct nfsd42_write_res {
+ u64 wr_bytes_written;
+ u32 wr_stable_how;
+ nfs4_verifier wr_verifier;
+};
+
+struct nfsd4_copy {
+ /* request */
+ stateid_t cp_src_stateid;
+ stateid_t cp_dst_stateid;
+ u64 cp_src_pos;
+ u64 cp_dst_pos;
+ u64 cp_count;
+
+ /* both */
+ bool cp_consecutive;
+ bool cp_synchronous;
+
+ /* response */
+ struct nfsd42_write_res cp_res;
+};
+
struct nfsd4_seek {
/* request */
stateid_t seek_stateid;
@@ -565,6 +587,7 @@ struct nfsd4_op {
struct nfsd4_fallocate allocate;
struct nfsd4_fallocate deallocate;
struct nfsd4_clone clone;
+ struct nfsd4_copy copy;
struct nfsd4_seek seek;
} u;
struct nfs4_replay * replay;
--
2.7.2
Copy will use this to set up a commit request for a generic range. I
don't want to allocate a new pagecache entry for the file, so I needed
to change parts of the commit path to handle requests with a null
wb_page.
Signed-off-by: Anna Schumaker <[email protected]>
---
Checking for wb_page everywhere feels like a hack, but I don't really see
any other way of doing this. Suggestions welcome!
---
fs/nfs/internal.h | 1 +
fs/nfs/pagelist.c | 6 ++++--
fs/nfs/write.c | 41 +++++++++++++++++++++++++++++++++++++----
3 files changed, 42 insertions(+), 6 deletions(-)
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9a547aa..7a3778c 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -477,6 +477,7 @@ void nfs_mark_request_commit(struct nfs_page *req,
u32 ds_commit_idx);
int nfs_write_need_commit(struct nfs_pgio_header *);
void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
+int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
int how, struct nfs_commit_info *cinfo);
void nfs_retry_commit(struct list_head *page_list,
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8ce4f61..c3c3555 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -341,8 +341,10 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
- req->wb_index = page_file_index(page);
- page_cache_get(page);
+ if (page) {
+ req->wb_index = page_file_index(page);
+ page_cache_get(page);
+ }
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5754835..a82765b9 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -737,7 +737,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
head = req->wb_head;
spin_lock(&inode->i_lock);
- if (likely(!PageSwapCache(head->wb_page))) {
+ if (likely(head->wb_page && !PageSwapCache(head->wb_page))) {
set_page_private(head->wb_page, 0);
ClearPagePrivate(head->wb_page);
smp_mb__after_atomic();
@@ -759,7 +759,8 @@ static void nfs_inode_remove_request(struct nfs_page *req)
static void
nfs_mark_request_dirty(struct nfs_page *req)
{
- __set_page_dirty_nobuffers(req->wb_page);
+ if (req->wb_page)
+ __set_page_dirty_nobuffers(req->wb_page);
}
/*
@@ -835,7 +836,8 @@ nfs_request_add_commit_list(struct nfs_page *req, struct nfs_commit_info *cinfo)
spin_lock(cinfo->lock);
nfs_request_add_commit_list_locked(req, &cinfo->mds->list, cinfo);
spin_unlock(cinfo->lock);
- nfs_mark_page_unstable(req->wb_page, cinfo);
+ if (req->wb_page)
+ nfs_mark_page_unstable(req->wb_page, cinfo);
}
EXPORT_SYMBOL_GPL(nfs_request_add_commit_list);
@@ -1724,6 +1726,36 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
return -ENOMEM;
}
+int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
+{
+ struct inode *inode = file_inode(file);
+ struct nfs_open_context *open;
+ struct nfs_commit_info cinfo;
+ struct nfs_page *req;
+ int ret;
+
+ open = get_nfs_open_context(nfs_file_open_context(file));
+ req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
+ if (!req) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+
+ nfs_init_cinfo_from_inode(&cinfo, inode);
+
+ memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
+ nfs_request_add_commit_list(req, &cinfo);
+ ret = nfs_commit_inode(inode, FLUSH_SYNC);
+ if (ret > 0)
+ ret = 0;
+
+ nfs_free_request(req);
+out_put:
+ put_nfs_open_context(open);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_commit_file);
+
/*
* COMMIT call returned
*/
@@ -1748,7 +1780,8 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
- nfs_clear_page_commit(req->wb_page);
+ if (req->wb_page)
+ nfs_clear_page_commit(req->wb_page);
dprintk("NFS: commit (%s/%llu %d@%lld)",
req->wb_context->dentry->d_sb->s_id,
--
2.7.2
This is a simple C program that I used for calling the copy system call.
Usage: ./nfscopy /nfs/original.txt /nfs/copy.txt
---
nfscopy.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 59 insertions(+)
create mode 100644 nfscopy.c
diff --git a/nfscopy.c b/nfscopy.c
new file mode 100644
index 0000000..3417a14
--- /dev/null
+++ b/nfscopy.c
@@ -0,0 +1,59 @@
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+loff_t copy_file_range(int fd_in, loff_t *off_in, int fd_out,
+ loff_t *off_out, size_t len, unsigned int flags)
+{
+ return syscall(__NR_copy_file_range, fd_in, off_in, fd_out,
+ off_out, len, flags);
+}
+
+int main(int argc, char **argv)
+{
+ int fd_in, fd_out;
+ struct stat stat;
+ loff_t len, ret;
+ char buf[2];
+
+ if (argc != 3) {
+ fprintf(stderr, "Usage: %s <source> <destination>\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ fd_in = open(argv[1], O_RDONLY);
+ if (fd_in == -1) {
+ perror("open (argv[1])");
+ exit(EXIT_FAILURE);
+ }
+
+ if (fstat(fd_in, &stat) == -1) {
+ perror("fstat");
+ exit(EXIT_FAILURE);
+ }
+ len = stat.st_size;
+
+ fd_out = open(argv[2], O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if (fd_out == -1) {
+ perror("open (argv[2])");
+ exit(EXIT_FAILURE);
+ }
+
+ do {
+ ret = copy_file_range(fd_in, NULL, fd_out, NULL, len, 0);
+ if (ret == -1) {
+ perror("copy_file_range");
+ exit(EXIT_FAILURE);
+ }
+
+ len -= ret;
+ } while (len > 0);
+
+ close(fd_in);
+ close(fd_out);
+ exit(EXIT_SUCCESS);
+}
--
2.7.2
On Tue, Mar 08, 2016 at 04:55:57PM -0500, Anna Schumaker wrote:
> From: Anna Schumaker <[email protected]>
>
> I only implemented the sync version of this call, since it's the
> easiest. I can simply call vfs_copy_range() and have the vfs do the
> right thing for the filesystem being exported.
>
> Signed-off-by: Anna Schumaker <[email protected]>
> --
> v3:
> - Check for copying beyond the end of the file
> ---
> fs/nfsd/nfs4proc.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++++------
> fs/nfsd/nfs4xdr.c | 63 +++++++++++++++++++++++++++++++++--
> fs/nfsd/vfs.c | 6 ++++
> fs/nfsd/vfs.h | 1 +
> fs/nfsd/xdr4.h | 23 +++++++++++++
> 5 files changed, 178 insertions(+), 13 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 4cba786..0a1aa2f 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -1012,47 +1012,105 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> }
>
> static __be32
> -nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> - struct nfsd4_clone *clone)
> +nfsd4_verify_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> + stateid_t *src_stateid, struct file **src,
> + stateid_t *dst_stateid, struct file **dst)
> {
> - struct file *src, *dst;
> __be32 status;
>
> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->save_fh,
> - &clone->cl_src_stateid, RD_STATE,
> - &src, NULL);
> + src_stateid, RD_STATE, src, NULL);
> if (status) {
> dprintk("NFSD: %s: couldn't process src stateid!\n", __func__);
> goto out;
> }
>
> status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> - &clone->cl_dst_stateid, WR_STATE,
> - &dst, NULL);
> + dst_stateid, WR_STATE, dst, NULL);
> if (status) {
> dprintk("NFSD: %s: couldn't process dst stateid!\n", __func__);
> goto out_put_src;
> }
>
> /* fix up for NFS-specific error code */
> - if (!S_ISREG(file_inode(src)->i_mode) ||
> - !S_ISREG(file_inode(dst)->i_mode)) {
> + if (!S_ISREG(file_inode(*src)->i_mode) ||
> + !S_ISREG(file_inode(*dst)->i_mode)) {
> status = nfserr_wrong_type;
> goto out_put_dst;
> }
>
> +out:
> + return status;
> +out_put_dst:
> + fput(*dst);
> +out_put_src:
> + fput(*src);
> + goto out;
> +}
> +
> +static __be32
> +nfsd4_clone(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> + struct nfsd4_clone *clone)
> +{
> + struct file *src, *dst;
> + __be32 status;
> +
> + status = nfsd4_verify_copy(rqstp, cstate, &clone->cl_src_stateid, &src,
> + &clone->cl_dst_stateid, &dst);
> + if (status)
> + goto out;
> +
> status = nfsd4_clone_file_range(src, clone->cl_src_pos,
> dst, clone->cl_dst_pos, clone->cl_count);
>
> -out_put_dst:
> fput(dst);
> -out_put_src:
> fput(src);
> out:
> return status;
> }
>
> static __be32
> +nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> + struct nfsd4_copy *copy)
> +{
> + struct file *src, *dst;
> + __be32 status;
> + ssize_t bytes;
> + loff_t max;
> +
> + status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src,
> + ©->cp_dst_stateid, &dst);
> + if (status)
> + goto out;
> +
> + max = i_size_read(file_inode(src));
> + if ((copy->cp_src_pos + copy->cp_count) > max) {
> + status = nfserr_inval;
> + goto out_put;
> + }
If we care about the race between this check and the copy, then we need
some locking. If we don't, then a comment explaining why not would be
useful here.
(Looks like this check, and the INVAL error, are both mandated by the
spec. The behavior looks wrong to me--usually EINVAL's reserved for
things the client had to know was a problem, but here the client doesn't
necessarily know the file size. I would have expected just success and
a short read.)
--b.
> +
> + bytes = nfsd_copy_range(src, copy->cp_src_pos, dst, copy->cp_dst_pos,
> + copy->cp_count);
> +
> + if (bytes < 0)
> + status = nfserrno(bytes);
> + else {
> + copy->cp_res.wr_bytes_written = bytes;
> + copy->cp_res.wr_stable_how = NFS_UNSTABLE;
> + copy->cp_consecutive = 1;
> + copy->cp_synchronous = 1;
> + gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp));
> + status = nfs_ok;
> + }
> +
> +out_put:
> + fput(src);
> + fput(dst);
> +out:
> + return status;
> +}
> +
> +static __be32
> nfsd4_fallocate(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> struct nfsd4_fallocate *fallocate, int flags)
> {
> @@ -1966,6 +2024,18 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
> op_encode_channel_attrs_maxsz) * sizeof(__be32);
> }
>
> +static inline u32 nfsd4_copy_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size +
> + 1 /* wr_callback */ +
> + op_encode_stateid_maxsz /* wr_callback */ +
> + 2 /* wr_count */ +
> + 1 /* wr_committed */ +
> + op_encode_verifier_maxsz +
> + 1 /* cr_consecutive */ +
> + 1 /* cr_synchronous */) * sizeof(__be32);
> +}
> +
> #ifdef CONFIG_NFSD_PNFS
> /*
> * At this stage we don't really know what layout driver will handle the request,
> @@ -2328,6 +2398,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
> .op_name = "OP_CLONE",
> .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> + [OP_COPY] = {
> + .op_func = (nfsd4op_func)nfsd4_copy,
> + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
> + .op_name = "OP_COPY",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_copy_rsize,
> + },
> [OP_SEEK] = {
> .op_func = (nfsd4op_func)nfsd4_seek,
> .op_name = "OP_SEEK",
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index d6ef095..2a73c7e 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1694,6 +1694,30 @@ nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
> }
>
> static __be32
> +nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
> +{
> + DECODE_HEAD;
> + unsigned int tmp;
> +
> + status = nfsd4_decode_stateid(argp, ©->cp_src_stateid);
> + if (status)
> + return status;
> + status = nfsd4_decode_stateid(argp, ©->cp_dst_stateid);
> + if (status)
> + return status;
> +
> + READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
> + p = xdr_decode_hyper(p, ©->cp_src_pos);
> + p = xdr_decode_hyper(p, ©->cp_dst_pos);
> + p = xdr_decode_hyper(p, ©->cp_count);
> + copy->cp_consecutive = be32_to_cpup(p++);
> + copy->cp_synchronous = be32_to_cpup(p++);
> + tmp = be32_to_cpup(p); /* Source server list not supported */
> +
> + DECODE_TAIL;
> +}
> +
> +static __be32
> nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
> {
> DECODE_HEAD;
> @@ -1793,7 +1817,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
>
> /* new operations for NFSv4.2 */
> [OP_ALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
> - [OP_COPY] = (nfsd4_dec)nfsd4_decode_notsupp,
> + [OP_COPY] = (nfsd4_dec)nfsd4_decode_copy,
> [OP_COPY_NOTIFY] = (nfsd4_dec)nfsd4_decode_notsupp,
> [OP_DEALLOCATE] = (nfsd4_dec)nfsd4_decode_fallocate,
> [OP_IO_ADVISE] = (nfsd4_dec)nfsd4_decode_notsupp,
> @@ -4203,6 +4227,41 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
> #endif /* CONFIG_NFSD_PNFS */
>
> static __be32
> +nfsd42_encode_write_res(struct nfsd4_compoundres *resp, struct nfsd42_write_res *write)
> +{
> + __be32 *p;
> +
> + p = xdr_reserve_space(&resp->xdr, 4 + 8 + 4 + NFS4_VERIFIER_SIZE);
> + if (!p)
> + return nfserr_resource;
> +
> + *p++ = cpu_to_be32(0);
> + p = xdr_encode_hyper(p, write->wr_bytes_written);
> + *p++ = cpu_to_be32(write->wr_stable_how);
> + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
> + NFS4_VERIFIER_SIZE);
> + return nfs_ok;
> +}
> +
> +static __be32
> +nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
> + struct nfsd4_copy *copy)
> +{
> + __be32 *p;
> +
> + if (!nfserr) {
> + nfserr = nfsd42_encode_write_res(resp, ©->cp_res);
> + if (nfserr)
> + return nfserr;
> +
> + p = xdr_reserve_space(&resp->xdr, 4 + 4);
> + *p++ = cpu_to_be32(copy->cp_consecutive);
> + *p++ = cpu_to_be32(copy->cp_synchronous);
> + }
> + return nfserr;
> +}
> +
> +static __be32
> nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
> struct nfsd4_seek *seek)
> {
> @@ -4301,7 +4360,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
>
> /* NFSv4.2 operations */
> [OP_ALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
> - [OP_COPY] = (nfsd4_enc)nfsd4_encode_noop,
> + [OP_COPY] = (nfsd4_enc)nfsd4_encode_copy,
> [OP_COPY_NOTIFY] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_DEALLOCATE] = (nfsd4_enc)nfsd4_encode_noop,
> [OP_IO_ADVISE] = (nfsd4_enc)nfsd4_encode_noop,
> diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
> index 5d2a57e..28802a7 100644
> --- a/fs/nfsd/vfs.c
> +++ b/fs/nfsd/vfs.c
> @@ -513,6 +513,12 @@ __be32 nfsd4_clone_file_range(struct file *src, u64 src_pos, struct file *dst,
> count));
> }
>
> +ssize_t nfsd_copy_range(struct file *src, u64 src_pos, struct file *dst,
> + u64 dst_pos, u64 count)
> +{
> + return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
> +}
> +
> __be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
> struct file *file, loff_t offset, loff_t len,
> int flags)
> diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
> index c11ba31..e36c497 100644
> --- a/fs/nfsd/vfs.h
> +++ b/fs/nfsd/vfs.h
> @@ -93,6 +93,7 @@ __be32 nfsd_symlink(struct svc_rqst *, struct svc_fh *,
> struct svc_fh *res);
> __be32 nfsd_link(struct svc_rqst *, struct svc_fh *,
> char *, int, struct svc_fh *);
> +ssize_t nfsd_copy_range(struct file *, u64, struct file *, u64, u64);
> __be32 nfsd_rename(struct svc_rqst *,
> struct svc_fh *, char *, int,
> struct svc_fh *, char *, int);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index d955481..2cad349 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -500,6 +500,28 @@ struct nfsd4_clone {
> u64 cl_count;
> };
>
> +struct nfsd42_write_res {
> + u64 wr_bytes_written;
> + u32 wr_stable_how;
> + nfs4_verifier wr_verifier;
> +};
> +
> +struct nfsd4_copy {
> + /* request */
> + stateid_t cp_src_stateid;
> + stateid_t cp_dst_stateid;
> + u64 cp_src_pos;
> + u64 cp_dst_pos;
> + u64 cp_count;
> +
> + /* both */
> + bool cp_consecutive;
> + bool cp_synchronous;
> +
> + /* response */
> + struct nfsd42_write_res cp_res;
> +};
> +
> struct nfsd4_seek {
> /* request */
> stateid_t seek_stateid;
> @@ -565,6 +587,7 @@ struct nfsd4_op {
> struct nfsd4_fallocate allocate;
> struct nfsd4_fallocate deallocate;
> struct nfsd4_clone clone;
> + struct nfsd4_copy copy;
> struct nfsd4_seek seek;
> } u;
> struct nfs4_replay * replay;
> --
> 2.7.2
Can you try to wire up varius corner case tests in xfstests, please?
We have 123 clone tests in the generic xfstests group, plus a lot of
btrfs and xfs specific ones. It would be great to have at least the
most important ones ported to test copy as well.
On Mon, Mar 14, 2016 at 05:34:26PM -0400, J. Bruce Fields wrote:
> On Tue, Mar 08, 2016 at 04:55:57PM -0500, Anna Schumaker wrote:
> > From: Anna Schumaker <[email protected]>
> > static __be32
> > +nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> > + struct nfsd4_copy *copy)
> > +{
> > + struct file *src, *dst;
> > + __be32 status;
> > + ssize_t bytes;
> > + loff_t max;
> > +
> > + status = nfsd4_verify_copy(rqstp, cstate, ©->cp_src_stateid, &src,
> > + ©->cp_dst_stateid, &dst);
> > + if (status)
> > + goto out;
> > +
> > + max = i_size_read(file_inode(src));
> > + if ((copy->cp_src_pos + copy->cp_count) > max) {
> > + status = nfserr_inval;
> > + goto out_put;
> > + }
>
> If we care about the race between this check and the copy, then we need
> some locking. If we don't, then a comment explaining why not would be
> useful here.
>
> (Looks like this check, and the INVAL error, are both mandated by the
> spec. The behavior looks wrong to me--usually EINVAL's reserved for
> things the client had to know was a problem, but here the client doesn't
> necessarily know the file size. I would have expected just success and
> a short read.)
Now that I look at it, that behavior's what's documented for the system
call too; from the man page:
EINVAL Requested range extends beyond the end of the source
file; or the flags argument is not 0.
Still seems like really strange behavior to me.
But if that's what we chose, then I think vfs_copy_range should be doing
that check for us.
--b.