Return-Path: linux-nfs-owner@vger.kernel.org Received: from mx11.netapp.com ([216.240.18.76]:33933 "EHLO mx11.netapp.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S932676Ab3GVTRc (ORCPT ); Mon, 22 Jul 2013 15:17:32 -0400 Message-ID: <51ED8549.3040308@netapp.com> Date: Mon, 22 Jul 2013 15:17:29 -0400 From: Bryan Schumaker MIME-Version: 1.0 To: "J. Bruce Fields" CC: , Subject: Re: [RFC 4/5] NFSD: Defer copying References: <1374267830-30154-1-git-send-email-bjschuma@netapp.com> <1374267830-30154-5-git-send-email-bjschuma@netapp.com> <20130722185002.GB10109@fieldses.org> In-Reply-To: <20130722185002.GB10109@fieldses.org> Content-Type: text/plain; charset="ISO-8859-1" Sender: linux-nfs-owner@vger.kernel.org List-ID: On 07/22/2013 02:50 PM, J. Bruce Fields wrote: > On Fri, Jul 19, 2013 at 05:03:49PM -0400, bjschuma@netapp.com wrote: >> From: Bryan Schumaker >> >> Rather than performing the copy right away, schedule it to run later and >> reply to the client. Later, send a callback to notify the client that >> the copy has finished. > > I believe you need to implement the referring triple support described > in http://tools.ietf.org/html/rfc5661#section-2.10.6.3 to fix the race > described in > http://tools.ietf.org/html/draft-ietf-nfsv4-minorversion2-19#section-15.1.3 > . I'll re-read and re-write. > > I see cb_delay initialized below, but not otherwise used. Am I missing > anything? Whoops! I was using that earlier to try to fake up a callback, but I eventually decided it's easier to just do the copy asynchronously. I must have forgotten to take it out :( > > What about OFFLOAD_STATUS and OFFLOAD_ABORT? I haven't thought out those too much... I haven't thought about a use for them on the client yet. > > In some common cases the reply will be very quick, and we might be > better off handling it synchronously. Could we implement a heuristic > like "copy synchronously if the filesystem has special support or the > range is less than the maximum iosize, otherwise copy asynchronously"? I'm sure that can be done, I'm just not sure how to do it yet... > > --b. > > >> Signed-off-by: Bryan Schumaker >> --- >> fs/nfsd/nfs4callback.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++ >> fs/nfsd/nfs4proc.c | 59 +++++++++++++++------ >> fs/nfsd/nfs4state.c | 11 ++++ >> fs/nfsd/state.h | 21 ++++++++ >> fs/nfsd/xdr4cb.h | 9 ++++ >> 5 files changed, 221 insertions(+), 15 deletions(-) >> >> diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c >> index 7f05cd1..8f797e1 100644 >> --- a/fs/nfsd/nfs4callback.c >> +++ b/fs/nfsd/nfs4callback.c >> @@ -52,6 +52,9 @@ enum { >> NFSPROC4_CLNT_CB_NULL = 0, >> NFSPROC4_CLNT_CB_RECALL, >> NFSPROC4_CLNT_CB_SEQUENCE, >> + >> + /* NFS v4.2 callback */ >> + NFSPROC4_CLNT_CB_OFFLOAD, >> }; >> >> struct nfs4_cb_compound_hdr { >> @@ -110,6 +113,7 @@ enum nfs_cb_opnum4 { >> OP_CB_WANTS_CANCELLED = 12, >> OP_CB_NOTIFY_LOCK = 13, >> OP_CB_NOTIFY_DEVICEID = 14, >> + OP_CB_OFFLOAD = 15, >> OP_CB_ILLEGAL = 10044 >> }; >> >> @@ -469,6 +473,31 @@ out_default: >> return nfs_cb_stat_to_errno(nfserr); >> } >> >> +static void encode_cb_offload4args(struct xdr_stream *xdr, >> + const struct nfs4_cb_offload *offload, >> + struct nfs4_cb_compound_hdr *hdr) >> +{ >> + __be32 *p; >> + >> + if (hdr->minorversion < 2) >> + return; >> + >> + encode_nfs_cb_opnum4(xdr, OP_CB_OFFLOAD); >> + encode_nfs_fh4(xdr, &offload->co_dst_fh); >> + encode_stateid4(xdr, &offload->co_stid->sc_stateid); >> + >> + p = xdr_reserve_space(xdr, 4); >> + *p = cpu_to_be32(1); >> + encode_stateid4(xdr, &offload->co_stid->sc_stateid); >> + >> + p = xdr_reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE); >> + p = xdr_encode_hyper(p, offload->co_count); >> + *p++ = cpu_to_be32(offload->co_stable_how); >> + xdr_encode_opaque_fixed(p, offload->co_verifier.data, NFS4_VERIFIER_SIZE); >> + >> + hdr->nops++; >> +} >> + >> /* >> * NFSv4.0 and NFSv4.1 XDR encode functions >> * >> @@ -505,6 +534,23 @@ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, >> encode_cb_nops(&hdr); >> } >> >> +/* >> + * CB_OFFLOAD >> + */ >> +static void nfs4_xdr_enc_cb_offload(struct rpc_rqst *req, struct xdr_stream *xdr, >> + const struct nfsd4_callback *cb) >> +{ >> + const struct nfs4_cb_offload *args = cb->cb_op; >> + struct nfs4_cb_compound_hdr hdr = { >> + .ident = cb->cb_clp->cl_cb_ident, >> + .minorversion = cb->cb_minorversion, >> + }; >> + >> + encode_cb_compound4args(xdr, &hdr); >> + encode_cb_sequence4args(xdr, cb, &hdr); >> + encode_cb_offload4args(xdr, args, &hdr); >> + encode_cb_nops(&hdr); >> +} >> >> /* >> * NFSv4.0 and NFSv4.1 XDR decode functions >> @@ -552,6 +598,36 @@ out: >> } >> >> /* >> + * CB_OFFLOAD >> + */ >> +static int nfs4_xdr_dec_cb_offload(struct rpc_rqst *rqstp, struct xdr_stream *xdr, >> + struct nfsd4_callback *cb) >> +{ >> + struct nfs4_cb_compound_hdr hdr; >> + enum nfsstat4 nfserr; >> + int status; >> + >> + status = decode_cb_compound4res(xdr, &hdr); >> + if (unlikely(status)) >> + goto out; >> + >> + if (cb != NULL) { >> + status = decode_cb_sequence4res(xdr, cb); >> + if (unlikely(status)) >> + goto out; >> + } >> + >> + status = decode_cb_op_status(xdr, OP_CB_OFFLOAD, &nfserr); >> + if (unlikely(status)) >> + goto out; >> + if (unlikely(nfserr != NFS4_OK)) >> + status = nfs_cb_stat_to_errno(nfserr); >> + >> +out: >> + return status; >> +} >> + >> +/* >> * RPC procedure tables >> */ >> #define PROC(proc, call, argtype, restype) \ >> @@ -568,6 +644,7 @@ out: >> static struct rpc_procinfo nfs4_cb_procedures[] = { >> PROC(CB_NULL, NULL, cb_null, cb_null), >> PROC(CB_RECALL, COMPOUND, cb_recall, cb_recall), >> + PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), >> }; >> >> static struct rpc_version nfs_cb_version4 = { >> @@ -1017,6 +1094,11 @@ void nfsd4_init_callback(struct nfsd4_callback *cb) >> INIT_WORK(&cb->cb_work, nfsd4_do_callback_rpc); >> } >> >> +void nfsd4_init_delayed_callback(struct nfsd4_callback *cb) >> +{ >> + INIT_DELAYED_WORK(&cb->cb_delay, nfsd4_do_callback_rpc); >> +} >> + >> void nfsd4_cb_recall(struct nfs4_delegation *dp) >> { >> struct nfsd4_callback *cb = &dp->dl_recall; >> @@ -1036,3 +1118,57 @@ void nfsd4_cb_recall(struct nfs4_delegation *dp) >> >> run_nfsd4_cb(&dp->dl_recall); >> } >> + >> +static void nfsd4_cb_offload_done(struct rpc_task *task, void *calldata) >> +{ >> + struct nfsd4_callback *cb = calldata; >> + struct nfs4_client *clp = cb->cb_clp; >> + struct rpc_clnt *current_rpc_client = clp->cl_cb_client; >> + >> + nfsd4_cb_done(task, calldata); >> + >> + if (current_rpc_client != task->tk_client) >> + return; >> + >> + if (cb->cb_done) >> + return; >> + >> + if (task->tk_status != 0) >> + nfsd4_mark_cb_down(clp, task->tk_status); >> + cb->cb_done = true; >> +} >> + >> +static void nfsd4_cb_offload_release(void *calldata) >> +{ >> + struct nfsd4_callback *cb = calldata; >> + struct nfs4_cb_offload *offload = container_of(cb, struct nfs4_cb_offload, co_callback); >> + >> + if (cb->cb_done) { >> + nfs4_free_offload_stateid(offload->co_stid); >> + kfree(offload); >> + } >> +} >> + >> +static const struct rpc_call_ops nfsd4_cb_offload_ops = { >> + .rpc_call_prepare = nfsd4_cb_prepare, >> + .rpc_call_done = nfsd4_cb_offload_done, >> + .rpc_release = nfsd4_cb_offload_release, >> +}; >> + >> +void nfsd4_cb_offload(struct nfs4_cb_offload *offload) >> +{ >> + struct nfsd4_callback *cb = &offload->co_callback; >> + >> + cb->cb_op = offload; >> + cb->cb_clp = offload->co_stid->sc_client; >> + cb->cb_msg.rpc_proc = &nfs4_cb_procedures[NFSPROC4_CLNT_CB_OFFLOAD]; >> + cb->cb_msg.rpc_argp = cb; >> + cb->cb_msg.rpc_resp = cb; >> + >> + cb->cb_ops = &nfsd4_cb_offload_ops; >> + >> + INIT_LIST_HEAD(&cb->cb_per_client); >> + cb->cb_done = true; >> + >> + run_nfsd4_cb(cb); >> +} >> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c >> index d4584ea..66a787f 100644 >> --- a/fs/nfsd/nfs4proc.c >> +++ b/fs/nfsd/nfs4proc.c >> @@ -35,6 +35,7 @@ >> #include >> #include >> >> +#include "state.h" >> #include "idmap.h" >> #include "cache.h" >> #include "xdr4.h" >> @@ -1062,29 +1063,57 @@ out: >> return status; >> } >> >> +static void >> +nfsd4_copy_async(struct work_struct *w) >> +{ >> + __be32 status; >> + struct nfs4_cb_offload *offload; >> + >> + offload = container_of(w, struct nfs4_cb_offload, co_work); >> + status = nfsd_copy_range(offload->co_src_file, offload->co_src_pos, >> + offload->co_dst_file, offload->co_dst_pos, >> + offload->co_count); >> + >> + if (status == nfs_ok) { >> + offload->co_stable_how = NFS_FILE_SYNC; >> + gen_boot_verifier(&offload->co_verifier, offload->co_net); >> + fput(offload->co_src_file); >> + fput(offload->co_dst_file); >> + } >> + nfsd4_cb_offload(offload); >> +} >> + >> static __be32 >> nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, >> struct nfsd4_copy *copy) >> { >> - __be32 status; >> struct file *src = NULL, *dst = NULL; >> + struct nfs4_cb_offload *offload; >> >> - status = nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst); >> - if (status) >> - return status; >> - >> - status = nfsd_copy_range(src, copy->cp_src_pos, >> - dst, copy->cp_dst_pos, >> - copy->cp_count); >> + if (nfsd4_verify_copy(rqstp, cstate, copy, &src, &dst)) >> + return nfserr_jukebox; >> >> - if (status == nfs_ok) { >> - copy->cp_res.wr_stateid = NULL; >> - copy->cp_res.wr_bytes_written = copy->cp_count; >> - copy->cp_res.wr_stable_how = NFS_FILE_SYNC; >> - gen_boot_verifier(©->cp_res.wr_verifier, SVC_NET(rqstp)); >> - } >> + offload = kmalloc(sizeof(struct nfs4_cb_offload), GFP_KERNEL); >> + if (!offload) >> + return nfserr_jukebox; >> >> - return status; >> + offload->co_src_file = get_file(src); >> + offload->co_dst_file = get_file(dst); >> + offload->co_src_pos = copy->cp_src_pos; >> + offload->co_dst_pos = copy->cp_dst_pos; >> + offload->co_count = copy->cp_count; >> + offload->co_stid = nfs4_alloc_offload_stateid(cstate->session->se_client); >> + offload->co_net = SVC_NET(rqstp); >> + INIT_WORK(&offload->co_work, nfsd4_copy_async); >> + nfsd4_init_callback(&offload->co_callback); >> + memcpy(&offload->co_dst_fh, &cstate->current_fh, sizeof(struct knfsd_fh)); >> + >> + copy->cp_res.wr_stateid = &offload->co_stid->sc_stateid; >> + copy->cp_res.wr_bytes_written = 0; >> + copy->cp_res.wr_stable_how = NFS_UNSTABLE; >> + >> + schedule_work(&offload->co_work); >> + return nfs_ok; >> } >> >> /* This routine never returns NFS_OK! If there are no other errors, it >> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c >> index c4e270e..582edb5 100644 >> --- a/fs/nfsd/nfs4state.c >> +++ b/fs/nfsd/nfs4state.c >> @@ -364,6 +364,11 @@ static struct nfs4_ol_stateid * nfs4_alloc_stateid(struct nfs4_client *clp) >> return openlockstateid(nfs4_alloc_stid(clp, stateid_slab)); >> } >> >> +struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *clp) >> +{ >> + return nfs4_alloc_stid(clp, stateid_slab); >> +} >> + >> static struct nfs4_delegation * >> alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct svc_fh *current_fh) >> { >> @@ -617,6 +622,12 @@ static void free_generic_stateid(struct nfs4_ol_stateid *stp) >> kmem_cache_free(stateid_slab, stp); >> } >> >> +void nfs4_free_offload_stateid(struct nfs4_stid *stid) >> +{ >> + remove_stid(stid); >> + kmem_cache_free(stateid_slab, stid); >> +} >> + >> static void release_lock_stateid(struct nfs4_ol_stateid *stp) >> { >> struct file *file; >> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h >> index 2478805..56682fb 100644 >> --- a/fs/nfsd/state.h >> +++ b/fs/nfsd/state.h >> @@ -70,6 +70,7 @@ struct nfsd4_callback { >> struct rpc_message cb_msg; >> const struct rpc_call_ops *cb_ops; >> struct work_struct cb_work; >> + struct delayed_work cb_delay; >> bool cb_done; >> }; >> >> @@ -101,6 +102,22 @@ struct nfs4_delegation { >> struct nfsd4_callback dl_recall; >> }; >> >> +struct nfs4_cb_offload { >> + struct file *co_src_file; >> + struct file *co_dst_file; >> + u64 co_src_pos; >> + u64 co_dst_pos; >> + u64 co_count; >> + u32 co_stable_how; >> + struct knfsd_fh co_dst_fh; >> + nfs4_verifier co_verifier; >> + struct net *co_net; >> + >> + struct nfs4_stid *co_stid; >> + struct work_struct co_work; >> + struct nfsd4_callback co_callback; >> +}; >> + >> /* client delegation callback info */ >> struct nfs4_cb_conn { >> /* SETCLIENTID info */ >> @@ -468,10 +485,12 @@ extern void nfs4_free_openowner(struct nfs4_openowner *); >> extern void nfs4_free_lockowner(struct nfs4_lockowner *); >> extern int set_callback_cred(void); >> extern void nfsd4_init_callback(struct nfsd4_callback *); >> +extern void nfsd4_init_delayed_callback(struct nfsd4_callback *); >> extern void nfsd4_probe_callback(struct nfs4_client *clp); >> extern void nfsd4_probe_callback_sync(struct nfs4_client *clp); >> extern void nfsd4_change_callback(struct nfs4_client *clp, struct nfs4_cb_conn *); >> extern void nfsd4_cb_recall(struct nfs4_delegation *dp); >> +extern void nfsd4_cb_offload(struct nfs4_cb_offload *); >> extern int nfsd4_create_callback_queue(void); >> extern void nfsd4_destroy_callback_queue(void); >> extern void nfsd4_shutdown_callback(struct nfs4_client *); >> @@ -480,6 +499,8 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name, >> struct nfsd_net *nn); >> extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn); >> extern void put_client_renew(struct nfs4_client *clp); >> +extern struct nfs4_stid *nfs4_alloc_offload_stateid(struct nfs4_client *); >> +extern void nfs4_free_offload_stateid(struct nfs4_stid *); >> >> /* nfs4recover operations */ >> extern int nfsd4_client_tracking_init(struct net *net); >> diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h >> index c5c55df..75b0ef7 100644 >> --- a/fs/nfsd/xdr4cb.h >> +++ b/fs/nfsd/xdr4cb.h >> @@ -21,3 +21,12 @@ >> #define NFS4_dec_cb_recall_sz (cb_compound_dec_hdr_sz + \ >> cb_sequence_dec_sz + \ >> op_dec_sz) >> + >> +#define NFS4_enc_cb_offload_sz (cb_compound_enc_hdr_sz + \ >> + cb_sequence_enc_sz + \ >> + 1 + enc_stateid_sz + 2 + 1 + \ >> + XDR_QUADLEN(NFS4_VERIFIER_SIZE)) >> + >> +#define NFS4_dec_cb_offload_sz (cb_compound_dec_hdr_sz + \ >> + cb_sequence_dec_sz + \ >> + op_dec_sz) >> -- >> 1.8.3.3 >> >> -- >> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in >> the body of a message to majordomo@vger.kernel.org >> More majordomo info at http://vger.kernel.org/majordomo-info.html