2008-10-15 21:00:28

by Andy Adamson

[permalink] [raw]
Subject: [PATCH 0/3] NFSD EOS deferral

Here's a patch set for review - it compiles and seems to work, but I haven't
done stress testing, nor testing of all of the combinations of deferral cases.

A deferral occurs when NFSD needs information from an rpc cache, and an upcall
is required. Instead of NFSD waiting for the cache to be filled by the upcall,
the RPC request is inserted back into the receive stream for processing at a
later time.

Exactly once semantics require that NFSD compound RPC deferral processing
restart at the operation that caused the deferral, instead of reprocessing the
full compound RPC from the start possibly repeating operation processing.
These patches add three callbacks, a data pointer, and page pointer storage
to the sunrpc svc deferral architecture that NFSD uses to accomplish this goal.

Deferrals that do not define the callbacks act as before. Care has been taken
to ensure that combinations of deferrals - those from the NFSv4 server with
the callbacks defined, and those from the RPC layer without the callbacks
defined work together correctly.

Thoughts, comments and suggestions are really appreciated...

-->Andy Adamson <[email protected]>




2008-10-15 21:00:31

by Andy Adamson

[permalink] [raw]
Subject: [PATCH 2/3] NFSD save, restore, and release deferred result pages

From: Andy Adamson <[email protected]>

Implement the rq_save_state, resume_state, and release_state RPC deferral
callbacks.

Save the reply pages in struct svc_deferred_req.

Clear the svc_deferred_req respages in the save_state callback to setup
for another NFSD operation deferral.

Signed-off-by: Andy Adamson<[email protected]>
---
fs/nfsd/nfs4proc.c | 40 ++++++++++++++++++++++++++++++++++++++++
fs/nfsd/nfs4state.c | 45 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/nfsd/xdr4.h | 5 +++++
3 files changed, 90 insertions(+), 0 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e5b51ff..1c19ff9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -836,6 +836,46 @@ static struct nfsd4_compound_state *cstate_alloc(void)
return cstate;
}

+/*
+ * RPC deferral callbacks
+ */
+static void
+nfsd4_release_deferred_state(struct svc_deferred_req *dreq)
+{
+ nfsd4_clear_respages(dreq->respages, dreq->resused);
+ cstate_free(dreq->defer_data);
+}
+
+static void
+nfsd4_restore_deferred_state(struct svc_rqst *rqstp,
+ struct svc_deferred_req *dreq)
+{
+ nfsd4_restore_rqst_pages(rqstp, dreq->respages, dreq->resused);
+ /* Reset defer_data for a NFSD deferral revisit interrupted
+ * by a non-NFSD deferral */
+ rqstp->rq_defer_data = dreq->defer_data;
+}
+
+static void
+nfsd4_save_deferred_state(struct svc_rqst *rqstp,
+ struct svc_deferred_req *dreq)
+ {
+ struct nfsd4_compound_state *cstate =
+ (struct nfsd4_compound_state *)rqstp->rq_defer_data;
+
+ fh_put(&cstate->current_fh);
+ fh_put(&cstate->save_fh);
+
+ /* In case of an NFSD deferral on a previous operation */
+ nfsd4_clear_respages(dreq->respages, dreq->resused);
+
+ nfsd4_cache_rqst_pages(rqstp, dreq->respages, &dreq->resused);
+
+ dreq->defer_data = rqstp->rq_defer_data;
+ dreq->restore_state = nfsd4_restore_deferred_state;
+ dreq->release_state = nfsd4_release_deferred_state;
+}
+
typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
void *);

diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 1578d7a..f266e45 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -684,6 +684,51 @@ out_err:
return;
}

+void
+nfsd4_move_pages(struct page **topages, struct page **frompages, short count)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+ topages[i] = frompages[i];
+ if (!topages[i])
+ continue;
+ get_page(topages[i]);
+ }
+}
+
+void
+nfsd4_cache_rqst_pages(struct svc_rqst *rqstp, struct page **respages,
+ short *resused)
+{
+ *resused = rqstp->rq_resused;
+ nfsd4_move_pages(respages, rqstp->rq_respages, rqstp->rq_resused);
+}
+
+void
+nfsd4_restore_rqst_pages(struct svc_rqst *rqstp, struct page **respages,
+ short resused)
+{
+ /* release allocated result pages to be replaced from the cache */
+ svc_free_res_pages(rqstp);
+
+ rqstp->rq_resused = resused;
+ nfsd4_move_pages(rqstp->rq_respages, respages, resused);
+}
+
+void
+nfsd4_clear_respages(struct page **respages, short resused)
+{
+ int i;
+
+ for (i = 0; i < resused; i++) {
+ if (!respages[i])
+ continue;
+ put_page(respages[i]);
+ respages[i] = NULL;
+ }
+}
+
__be32
nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_setclientid *setclid)
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 27bd3e3..f3495ae 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -442,6 +442,11 @@ void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct dentry *dentry, __be32 *buffer, int *countp,
u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+extern void nfsd4_clear_respages(struct page **respages, short resused);
+extern void nfsd4_cache_rqst_pages(struct svc_rqst *rqstp,
+ struct page **respages, short *resused);
+extern void nfsd4_restore_rqst_pages(struct svc_rqst *rqstp,
+ struct page **respages, short resused);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid);
--
1.5.4.3


2008-10-15 21:00:32

by Andy Adamson

[permalink] [raw]
Subject: [PATCH 3/3] NFSD deferral processing

From: Andy Adamson <[email protected]>

Use a slab cache for nfsd4_compound_state allocation

Save the struct nfsd4_compound_state and set the save_state callback for
each request for potential deferral handling.

If an NFSv4 operation causes a deferral, the save_state callback is called
by svc_defer which saves the defer_data with the deferral, and sets the
restore_state deferral callback.

fh_put is called so that the deferral is not referencing the file handles,
allowing umount of the file system.

Signed-off-by: Andy Adamson<[email protected]>
---
fs/nfsd/nfs4proc.c | 47 +++++++++++++++++++-------------------------
fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++
include/linux/nfsd/xdr4.h | 5 ++++
3 files changed, 62 insertions(+), 27 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 1c19ff9..e60b359 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -813,37 +813,13 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
nfsdstats.nfs4_opcount[opnum]++;
}

-static void cstate_free(struct nfsd4_compound_state *cstate)
-{
- if (cstate == NULL)
- return;
- fh_put(&cstate->current_fh);
- fh_put(&cstate->save_fh);
- BUG_ON(cstate->replay_owner);
- kfree(cstate);
-}
-
-static struct nfsd4_compound_state *cstate_alloc(void)
-{
- struct nfsd4_compound_state *cstate;
-
- cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL);
- if (cstate == NULL)
- return NULL;
- fh_init(&cstate->current_fh, NFS4_FHSIZE);
- fh_init(&cstate->save_fh, NFS4_FHSIZE);
- cstate->replay_owner = NULL;
- return cstate;
-}
-
/*
* RPC deferral callbacks
*/
static void
nfsd4_release_deferred_state(struct svc_deferred_req *dreq)
{
- nfsd4_clear_respages(dreq->respages, dreq->resused);
- cstate_free(dreq->defer_data);
+ nfsd4_cstate_free(dreq->defer_data, dreq);
}

static void
@@ -926,12 +902,22 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto out;

status = nfserr_resource;
- cstate = cstate_alloc();
+ cstate = nfsd4_cstate_alloc(rqstp);
if (cstate == NULL)
goto out;

+ if (rqstp->rq_deferred && rqstp->rq_deferred->defer_data) {
+ resp->opcnt = cstate->last_op_cnt;
+ resp->p = cstate->last_op_p;
+ fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
+ fh_verify(rqstp, &cstate->save_fh, 0, NFSD_MAY_NOP);
+ }
+ rqstp->rq_defer_data = cstate;
+ rqstp->rq_save_state = nfsd4_save_deferred_state;
+
status = nfs_ok;
while (!status && resp->opcnt < args->opcnt) {
+ cstate->last_op_p = resp->p;
op = &args->ops[resp->opcnt++];

dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
@@ -996,8 +982,15 @@ encode_op:

nfsd4_increment_op_stats(op->opnum);
}
+ rqstp->rq_defer_data = NULL;
+ rqstp->rq_save_state = NULL;
+
+ if (status == nfserr_dropit) {
+ cstate->last_op_cnt = resp->opcnt - 1;
+ return status;
+ }

- cstate_free(cstate);
+ nfsd4_cstate_free(cstate, rqstp->rq_deferred);
out:
nfsd4_release_compoundargs(args);
dprintk("nfsv4 compound returned %d\n", ntohl(status));
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index f266e45..002a57c 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -91,6 +91,7 @@ static struct kmem_cache *stateowner_slab = NULL;
static struct kmem_cache *file_slab = NULL;
static struct kmem_cache *stateid_slab = NULL;
static struct kmem_cache *deleg_slab = NULL;
+static struct kmem_cache *cstate_slab;

void
nfs4_lock_state(void)
@@ -442,6 +443,37 @@ static struct nfs4_client *create_client(struct xdr_netobj name, char *recdir)
return clp;
}

+void nfsd4_cstate_free(struct nfsd4_compound_state *cstate,
+ struct svc_deferred_req *dreq)
+{
+ if (dreq && dreq->release_state)
+ nfsd4_clear_respages(dreq->respages, dreq->resused);
+ if (cstate == NULL)
+ return;
+ fh_put(&cstate->current_fh);
+ fh_put(&cstate->save_fh);
+ BUG_ON(cstate->replay_owner);
+ kmem_cache_free(cstate_slab, cstate);
+}
+
+struct nfsd4_compound_state *nfsd4_cstate_alloc(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compound_state *cstate;
+
+ if (rqstp->rq_deferred && rqstp->rq_deferred->defer_data) {
+ cstate = rqstp->rq_deferred->defer_data;
+ goto out;
+ }
+ cstate = kmem_cache_alloc(cstate_slab, GFP_KERNEL);
+ if (cstate == NULL)
+ return NULL;
+ fh_init(&cstate->current_fh, NFS4_FHSIZE);
+ fh_init(&cstate->save_fh, NFS4_FHSIZE);
+ cstate->replay_owner = NULL;
+out:
+ return cstate;
+}
+
static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
{
memcpy(target->cl_verifier.data, source->data,
@@ -986,6 +1018,7 @@ nfsd4_free_slabs(void)
nfsd4_free_slab(&file_slab);
nfsd4_free_slab(&stateid_slab);
nfsd4_free_slab(&deleg_slab);
+ nfsd4_free_slab(&cstate_slab);
}

static int
@@ -1007,6 +1040,10 @@ nfsd4_init_slabs(void)
sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL)
goto out_nomem;
+ cstate_slab = kmem_cache_create("nfsd4_compound_states",
+ sizeof(struct nfsd4_compound_state), 0, 0, NULL);
+ if (cstate_slab == NULL)
+ goto out_nomem;
return 0;
out_nomem:
nfsd4_free_slabs();
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index f3495ae..3407f4b 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -48,6 +48,8 @@ struct nfsd4_compound_state {
struct svc_fh current_fh;
struct svc_fh save_fh;
struct nfs4_stateowner *replay_owner;
+ __be32 *last_op_p;
+ u32 last_op_cnt;
};

struct nfsd4_change_info {
@@ -447,6 +449,9 @@ extern void nfsd4_cache_rqst_pages(struct svc_rqst *rqstp,
struct page **respages, short *resused);
extern void nfsd4_restore_rqst_pages(struct svc_rqst *rqstp,
struct page **respages, short resused);
+extern struct nfsd4_compound_state *nfsd4_cstate_alloc(struct svc_rqst *rqstp);
+extern void nfsd4_cstate_free(struct nfsd4_compound_state *cstate,
+ struct svc_deferred_req *dreq);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid);
--
1.5.4.3


2008-10-15 21:00:32

by Andy Adamson

[permalink] [raw]
Subject: [PATCH 1/3] SUNRPC add deferral processing callbacks

From: Andy Adamson <[email protected]>

For EOS, NFSD compound RPC deferred processing should restart operation which
caused the deferral.

Add a callback and a defer_data pointer in svc_rqst to enable svc_defer to
save partial result state in the deferral request.

Add page pointer storage to svc_deferred_req to cache the pages holding the
partially processed request.

Add callbacks and a defer_data pointer in svc_deferred_request to enable
svc_deferred_recv to restore and release the partial result state.

Signed-off-by: Andy Adamson<[email protected]>
---
include/linux/sunrpc/svc.h | 10 ++++++++++
net/sunrpc/svc_xprt.c | 20 +++++++++++++++-----
2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index dc69068..46a4097 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -215,6 +215,9 @@ struct svc_rqst {
struct svc_cred rq_cred; /* auth info */
void * rq_xprt_ctxt; /* transport specific context ptr */
struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
+ /* callback to save deferred request state */
+ void (*rq_save_state)(struct svc_rqst *, struct svc_deferred_req *);
+ void *rq_defer_data; /* defer state data to save */

size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
@@ -323,6 +326,13 @@ struct svc_deferred_req {
union svc_addr_u daddr; /* where reply must come from */
struct cache_deferred_req handle;
size_t xprt_hlen;
+ /* callbacks to restore and release deferred request state
+ * set in rq_save_state */
+ void (*restore_state)(struct svc_rqst *, struct svc_deferred_req *);
+ void (*release_state)(struct svc_deferred_req *);
+ void *defer_data; /* defer state data */
+ struct page *respages[RPCSVC_MAXPAGES];
+ short resused;
int argslen;
__be32 args[0];
};
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index e46c825..531b325 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -876,6 +876,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
struct svc_xprt *xprt = dr->xprt;

if (too_many) {
+ if (dr->release_state)
+ dr->release_state(dr);
svc_xprt_put(xprt);
kfree(dr);
return;
@@ -902,10 +904,10 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
static struct cache_deferred_req *svc_defer(struct cache_req *req)
{
struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
- struct svc_deferred_req *dr;
+ struct svc_deferred_req *dr = NULL;

if (rqstp->rq_arg.page_len)
- return NULL; /* if more than a page, give up FIXME */
+ goto out; /* if more than a page, give up FIXME */
if (rqstp->rq_deferred) {
dr = rqstp->rq_deferred;
rqstp->rq_deferred = NULL;
@@ -914,9 +916,9 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
size_t size;
/* FIXME maybe discard if size too large */
size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
- dr = kmalloc(size, GFP_KERNEL);
+ dr = kzalloc(size, GFP_KERNEL);
if (dr == NULL)
- return NULL;
+ goto out;

dr->handle.owner = rqstp->rq_server;
dr->prot = rqstp->rq_prot;
@@ -935,7 +937,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
dr->xprt = rqstp->rq_xprt;

dr->handle.revisit = svc_revisit;
- return &dr->handle;
+ if (rqstp->rq_save_state)
+ rqstp->rq_save_state(rqstp, dr);
+out:
+ rqstp->rq_defer_data = NULL;
+ rqstp->rq_save_state = NULL;
+
+ return dr ? &dr->handle: NULL;
}

/*
@@ -959,6 +967,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
rqstp->rq_xprt_hlen = dr->xprt_hlen;
rqstp->rq_daddr = dr->daddr;
rqstp->rq_respages = rqstp->rq_pages;
+ if (dr->restore_state)
+ dr->restore_state(rqstp, dr);
return (dr->argslen<<2) - dr->xprt_hlen;
}

--
1.5.4.3


2008-10-17 17:44:56

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 0/3] NFSD EOS deferral

On Wed, Oct 15, 2008 at 05:00:23PM -0400, [email protected] wrote:
> Here's a patch set for review - it compiles and seems to work, but I haven't
> done stress testing, nor testing of all of the combinations of deferral cases.
>
> A deferral occurs when NFSD needs information from an rpc cache, and an upcall
> is required. Instead of NFSD waiting for the cache to be filled by the upcall,
> the RPC request is inserted back into the receive stream for processing at a
> later time.
>
> Exactly once semantics require that NFSD compound RPC deferral processing
> restart at the operation that caused the deferral, instead of reprocessing the
> full compound RPC from the start possibly repeating operation processing.
> These patches add three callbacks, a data pointer, and page pointer storage
> to the sunrpc svc deferral architecture that NFSD uses to accomplish this goal.
>
> Deferrals that do not define the callbacks act as before. Care has been taken
> to ensure that combinations of deferrals - those from the NFSv4 server with
> the callbacks defined, and those from the RPC layer without the callbacks
> defined work together correctly.
>
> Thoughts, comments and suggestions are really appreciated...

Requests longer than a page are still not deferred, so large writes that
trigger upcalls still get an ERR_DELAY. OK, probably no big deal.

I don't think we can apply this until we have some way to track the
number and size of deferred requests outstanding and fall back on
ERR_DELAY if it's too much.

I do sometimes wonder whether continuing with the current
deferred-request approach is best, though:

- If we're saving out large parts of the request anyway (the
response pages), then maybe we should just keep rqstp's
on the deferred request queue instead of copying to a separate
deferred_request structure.
- Then as long as we're saving all that request data, is there
really significant savings from not keeping a thread around
too?

So I wonder if it'd be better just to let threads sleep (and be more
aggressive about starting up new threads if appropriate, and add some
other heuristics to avoid a situation where the whole server stalls on a
temporarily wedged userspace daemon).

--b.

2008-10-17 17:48:05

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC add deferral processing callbacks

On Wed, Oct 15, 2008 at 05:00:24PM -0400, [email protected] wrote:
> From: Andy Adamson <[email protected]>
>
> For EOS, NFSD compound RPC deferred processing should restart operation which
> caused the deferral.
>
> Add a callback and a defer_data pointer in svc_rqst to enable svc_defer to
> save partial result state in the deferral request.
>
> Add page pointer storage to svc_deferred_req to cache the pages holding the
> partially processed request.
>
> Add callbacks and a defer_data pointer in svc_deferred_request to enable
> svc_deferred_recv to restore and release the partial result state.
>
> Signed-off-by: Andy Adamson<[email protected]>

Could you fix the missing space there?

> ---
> include/linux/sunrpc/svc.h | 10 ++++++++++
> net/sunrpc/svc_xprt.c | 20 +++++++++++++++-----
> 2 files changed, 25 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index dc69068..46a4097 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -215,6 +215,9 @@ struct svc_rqst {
> struct svc_cred rq_cred; /* auth info */
> void * rq_xprt_ctxt; /* transport specific context ptr */
> struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
> + /* callback to save deferred request state */
> + void (*rq_save_state)(struct svc_rqst *, struct svc_deferred_req *);
> + void *rq_defer_data; /* defer state data to save */
>
> size_t rq_xprt_hlen; /* xprt header len */
> struct xdr_buf rq_arg;
> @@ -323,6 +326,13 @@ struct svc_deferred_req {
> union svc_addr_u daddr; /* where reply must come from */
> struct cache_deferred_req handle;
> size_t xprt_hlen;
> + /* callbacks to restore and release deferred request state
> + * set in rq_save_state */
> + void (*restore_state)(struct svc_rqst *, struct svc_deferred_req *);
> + void (*release_state)(struct svc_deferred_req *);
> + void *defer_data; /* defer state data */
> + struct page *respages[RPCSVC_MAXPAGES];
> + short resused;
> int argslen;
> __be32 args[0];
> };
> diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> index e46c825..531b325 100644
> --- a/net/sunrpc/svc_xprt.c
> +++ b/net/sunrpc/svc_xprt.c
> @@ -876,6 +876,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> struct svc_xprt *xprt = dr->xprt;
>
> if (too_many) {
> + if (dr->release_state)
> + dr->release_state(dr);
> svc_xprt_put(xprt);
> kfree(dr);
> return;
> @@ -902,10 +904,10 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> static struct cache_deferred_req *svc_defer(struct cache_req *req)
> {
> struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
> - struct svc_deferred_req *dr;
> + struct svc_deferred_req *dr = NULL;
>
> if (rqstp->rq_arg.page_len)
> - return NULL; /* if more than a page, give up FIXME */
> + goto out; /* if more than a page, give up FIXME */
> if (rqstp->rq_deferred) {
> dr = rqstp->rq_deferred;
> rqstp->rq_deferred = NULL;
> @@ -914,9 +916,9 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> size_t size;
> /* FIXME maybe discard if size too large */
> size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
> - dr = kmalloc(size, GFP_KERNEL);
> + dr = kzalloc(size, GFP_KERNEL);
> if (dr == NULL)
> - return NULL;
> + goto out;
>
> dr->handle.owner = rqstp->rq_server;
> dr->prot = rqstp->rq_prot;
> @@ -935,7 +937,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> dr->xprt = rqstp->rq_xprt;
>
> dr->handle.revisit = svc_revisit;
> - return &dr->handle;
> + if (rqstp->rq_save_state)
> + rqstp->rq_save_state(rqstp, dr);
> +out:
> + rqstp->rq_defer_data = NULL;
> + rqstp->rq_save_state = NULL;
> +
> + return dr ? &dr->handle: NULL;

I may not be thinking this through clearly, but: I think the correct
place to initialize rq_defer_data and rq_save_state is not here but at
the very start of request processing (in svc_recv or at the top of
svc_process). We shouldn't depend on svc_defer()--it may never get
called.

--b.

> }
>
> /*
> @@ -959,6 +967,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
> rqstp->rq_xprt_hlen = dr->xprt_hlen;
> rqstp->rq_daddr = dr->daddr;
> rqstp->rq_respages = rqstp->rq_pages;
> + if (dr->restore_state)
> + dr->restore_state(rqstp, dr);
> return (dr->argslen<<2) - dr->xprt_hlen;
> }
>
> --
> 1.5.4.3
>

2008-10-17 18:43:27

by Andy Adamson

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC add deferral processing callbacks


On Fri, 2008-10-17 at 13:48 -0400, J. Bruce Fields wrote:
> On Wed, Oct 15, 2008 at 05:00:24PM -0400, [email protected] wrote:
> > From: Andy Adamson <[email protected]>
> >
> > For EOS, NFSD compound RPC deferred processing should restart operation which
> > caused the deferral.
> >
> > Add a callback and a defer_data pointer in svc_rqst to enable svc_defer to
> > save partial result state in the deferral request.
> >
> > Add page pointer storage to svc_deferred_req to cache the pages holding the
> > partially processed request.
> >
> > Add callbacks and a defer_data pointer in svc_deferred_request to enable
> > svc_deferred_recv to restore and release the partial result state.
> >
> > Signed-off-by: Andy Adamson<[email protected]>
>
> Could you fix the missing space there?

Oops..:)
>
> > ---
> > include/linux/sunrpc/svc.h | 10 ++++++++++
> > net/sunrpc/svc_xprt.c | 20 +++++++++++++++-----
> > 2 files changed, 25 insertions(+), 5 deletions(-)
> >
> > diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> > index dc69068..46a4097 100644
> > --- a/include/linux/sunrpc/svc.h
> > +++ b/include/linux/sunrpc/svc.h
> > @@ -215,6 +215,9 @@ struct svc_rqst {
> > struct svc_cred rq_cred; /* auth info */
> > void * rq_xprt_ctxt; /* transport specific context ptr */
> > struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
> > + /* callback to save deferred request state */
> > + void (*rq_save_state)(struct svc_rqst *, struct svc_deferred_req *);
> > + void *rq_defer_data; /* defer state data to save */
> >
> > size_t rq_xprt_hlen; /* xprt header len */
> > struct xdr_buf rq_arg;
> > @@ -323,6 +326,13 @@ struct svc_deferred_req {
> > union svc_addr_u daddr; /* where reply must come from */
> > struct cache_deferred_req handle;
> > size_t xprt_hlen;
> > + /* callbacks to restore and release deferred request state
> > + * set in rq_save_state */
> > + void (*restore_state)(struct svc_rqst *, struct svc_deferred_req *);
> > + void (*release_state)(struct svc_deferred_req *);
> > + void *defer_data; /* defer state data */
> > + struct page *respages[RPCSVC_MAXPAGES];
> > + short resused;
> > int argslen;
> > __be32 args[0];
> > };
> > diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
> > index e46c825..531b325 100644
> > --- a/net/sunrpc/svc_xprt.c
> > +++ b/net/sunrpc/svc_xprt.c
> > @@ -876,6 +876,8 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> > struct svc_xprt *xprt = dr->xprt;
> >
> > if (too_many) {
> > + if (dr->release_state)
> > + dr->release_state(dr);
> > svc_xprt_put(xprt);
> > kfree(dr);
> > return;
> > @@ -902,10 +904,10 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
> > static struct cache_deferred_req *svc_defer(struct cache_req *req)
> > {
> > struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
> > - struct svc_deferred_req *dr;
> > + struct svc_deferred_req *dr = NULL;
> >
> > if (rqstp->rq_arg.page_len)
> > - return NULL; /* if more than a page, give up FIXME */
> > + goto out; /* if more than a page, give up FIXME */
> > if (rqstp->rq_deferred) {
> > dr = rqstp->rq_deferred;
> > rqstp->rq_deferred = NULL;
> > @@ -914,9 +916,9 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> > size_t size;
> > /* FIXME maybe discard if size too large */
> > size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
> > - dr = kmalloc(size, GFP_KERNEL);
> > + dr = kzalloc(size, GFP_KERNEL);
> > if (dr == NULL)
> > - return NULL;
> > + goto out;
> >
> > dr->handle.owner = rqstp->rq_server;
> > dr->prot = rqstp->rq_prot;
> > @@ -935,7 +937,13 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
> > dr->xprt = rqstp->rq_xprt;
> >
> > dr->handle.revisit = svc_revisit;
> > - return &dr->handle;
> > + if (rqstp->rq_save_state)
> > + rqstp->rq_save_state(rqstp, dr);
> > +out:
> > + rqstp->rq_defer_data = NULL;
> > + rqstp->rq_save_state = NULL;
> > +
> > + return dr ? &dr->handle: NULL;
>
> I may not be thinking this through clearly, but: I think the correct
> place to initialize rq_defer_data and rq_save_state is not here but at
> the very start of request processing (in svc_recv or at the top of
> svc_process). We shouldn't depend on svc_defer()--it may never get
> called.

In patch 3/3, nfsd4_proc_compound sets the rq_defer_data and
rq_defer_state prior to operation processing, and then sets them back to
NULL after operation processing, so the initialization to NULL in
svc_defer is not needed.

We could indeed initialize these fields at the top of svc_process in
which case the setting to NULL at the end of operation processing in
nfsd4_proc_compound would not be needed. Your call.

-->Andy

> --b.
>
> > }
> >
> > /*
> > @@ -959,6 +967,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
> > rqstp->rq_xprt_hlen = dr->xprt_hlen;
> > rqstp->rq_daddr = dr->daddr;
> > rqstp->rq_respages = rqstp->rq_pages;
> > + if (dr->restore_state)
> > + dr->restore_state(rqstp, dr);
> > return (dr->argslen<<2) - dr->xprt_hlen;
> > }
> >
> > --
> > 1.5.4.3
> >

2008-10-17 18:45:53

by Andy Adamson

[permalink] [raw]
Subject: Re: [PATCH 0/3] NFSD EOS deferral


On Fri, 2008-10-17 at 13:44 -0400, J. Bruce Fields wrote:
> On Wed, Oct 15, 2008 at 05:00:23PM -0400, [email protected] wrote:
> > Here's a patch set for review - it compiles and seems to work, but I haven't
> > done stress testing, nor testing of all of the combinations of deferral cases.
> >
> > A deferral occurs when NFSD needs information from an rpc cache, and an upcall
> > is required. Instead of NFSD waiting for the cache to be filled by the upcall,
> > the RPC request is inserted back into the receive stream for processing at a
> > later time.
> >
> > Exactly once semantics require that NFSD compound RPC deferral processing
> > restart at the operation that caused the deferral, instead of reprocessing the
> > full compound RPC from the start possibly repeating operation processing.
> > These patches add three callbacks, a data pointer, and page pointer storage
> > to the sunrpc svc deferral architecture that NFSD uses to accomplish this goal.
> >
> > Deferrals that do not define the callbacks act as before. Care has been taken
> > to ensure that combinations of deferrals - those from the NFSv4 server with
> > the callbacks defined, and those from the RPC layer without the callbacks
> > defined work together correctly.
> >
> > Thoughts, comments and suggestions are really appreciated...
>
> Requests longer than a page are still not deferred, so large writes that
> trigger upcalls still get an ERR_DELAY. OK, probably no big deal.
>
> I don't think we can apply this until we have some way to track the
> number and size of deferred requests outstanding and fall back on
> ERR_DELAY if it's too much.
>
> I do sometimes wonder whether continuing with the current
> deferred-request approach is best, though:
>
> - If we're saving out large parts of the request anyway (the
> response pages), then maybe we should just keep rqstp's
> on the deferred request queue instead of copying to a separate
> deferred_request structure.
> - Then as long as we're saving all that request data, is there
> really significant savings from not keeping a thread around
> too?

True, especially if we also save large arg data, as in large writes that
trigger upcalls.
>
> So I wonder if it'd be better just to let threads sleep (and be more
> aggressive about starting up new threads if appropriate, and add some
> other heuristics to avoid a situation where the whole server stalls on a
> temporarily wedged userspace daemon).
>
> --b.

2008-10-17 18:59:09

by Marc Eshel

[permalink] [raw]
Subject: Re: [PATCH 0/3] NFSD EOS deferral

[email protected] wrote on 10/17/2008 10:44:54 AM:

> "J. Bruce Fields" <[email protected]>
>
> On Wed, Oct 15, 2008 at 05:00:23PM -0400, [email protected] wrote:
> > Here's a patch set for review - it compiles and seems to work, butI
haven't
> > done stress testing, nor testing of all of the combinations of
> deferral cases.
> >
> > A deferral occurs when NFSD needs information from an rpc cache,
> and an upcall
> > is required. Instead of NFSD waiting for the cache to be filled by
> the upcall,
> > the RPC request is inserted back into the receive stream for
processing at a
> > later time.
> >
> > Exactly once semantics require that NFSD compound RPC deferral
processing
> > restart at the operation that caused the deferral, instead of
> reprocessing the
> > full compound RPC from the start possibly repeating operation
processing.
> > These patches add three callbacks, a data pointer, and page pointer
storage
> > to the sunrpc svc deferral architecture that NFSD uses to
> accomplish this goal.
> >
> > Deferrals that do not define the callbacks act as before. Care hasbeen
taken
> > to ensure that combinations of deferrals - those from the NFSv4 server
with
> > the callbacks defined, and those from the RPC layer without the
callbacks
> > defined work together correctly.
> >
> > Thoughts, comments and suggestions are really appreciated...
>
> Requests longer than a page are still not deferred, so large writes that
> trigger upcalls still get an ERR_DELAY. OK, probably no big deal.
>
> I don't think we can apply this until we have some way to track the
> number and size of deferred requests outstanding and fall back on
> ERR_DELAY if it's too much.

But I thought that the problem here is that the Linux NFS client doesn't
handle this return code properly.
Marc.

>
> I do sometimes wonder whether continuing with the current
> deferred-request approach is best, though:
>
> - If we're saving out large parts of the request anyway (the
> response pages), then maybe we should just keep rqstp's
> on the deferred request queue instead of copying to a separate
> deferred_request structure.
> - Then as long as we're saving all that request data, is there
> really significant savings from not keeping a thread around
> too?
>
> So I wonder if it'd be better just to let threads sleep (and be more
> aggressive about starting up new threads if appropriate, and add some
> other heuristics to avoid a situation where the whole server stalls on a
> temporarily wedged userspace daemon).
>
> --b.
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-10-17 20:35:36

by Talpey, Thomas

[permalink] [raw]
Subject: Re: [PATCH 1/3] SUNRPC add deferral processing callbacks

At 05:00 PM 10/15/2008, [email protected] wrote:
>From: Andy Adamson <[email protected]>
>
>For EOS, NFSD compound RPC deferred processing should restart operation which
>caused the deferral.
>
>Add a callback and a defer_data pointer in svc_rqst to enable svc_defer to
>save partial result state in the deferral request.
>
>Add page pointer storage to svc_deferred_req to cache the pages holding the
>partially processed request.
>
>Add callbacks and a defer_data pointer in svc_deferred_request to enable
>svc_deferred_recv to restore and release the partial result state.
>
>Signed-off-by: Andy Adamson<[email protected]>
>---
> include/linux/sunrpc/svc.h | 10 ++++++++++
> net/sunrpc/svc_xprt.c | 20 +++++++++++++++-----
> 2 files changed, 25 insertions(+), 5 deletions(-)
>
>diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>index dc69068..46a4097 100644
>--- a/include/linux/sunrpc/svc.h
>+++ b/include/linux/sunrpc/svc.h
>@@ -215,6 +215,9 @@ struct svc_rqst {
> struct svc_cred rq_cred; /* auth info */
> void * rq_xprt_ctxt; /* transport specific context ptr */
> struct svc_deferred_req*rq_deferred; /* deferred request we are replaying */
>+ /* callback to save deferred request state */
>+ void (*rq_save_state)(struct svc_rqst *, struct svc_deferred_req *);
>+ void *rq_defer_data; /* defer state data to save */
>
> size_t rq_xprt_hlen; /* xprt header len */
> struct xdr_buf rq_arg;
>@@ -323,6 +326,13 @@ struct svc_deferred_req {
> union svc_addr_u daddr; /* where reply must come from */
> struct cache_deferred_req handle;
> size_t xprt_hlen;
>+ /* callbacks to restore and release deferred request state
>+ * set in rq_save_state */
>+ void (*restore_state)(struct svc_rqst *, struct svc_deferred_req *);
>+ void (*release_state)(struct svc_deferred_req *);
>+ void *defer_data; /* defer state data */
>+ struct page *respages[RPCSVC_MAXPAGES];

Is it worthwhile to make this a dynamically allocated pointer? If the max
pages size is large (1MB means 256), it's quite a large array for something
that may rarely be so full.

>+ short resused;

I find "resused" to be a little bit confusing. It looks a lot like "reused" and
in any case seems to be a pagecount. Maybe, respagecount?


> int argslen;
> __be32 args[0];
> };
>diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
>index e46c825..531b325 100644
>--- a/net/sunrpc/svc_xprt.c
>+++ b/net/sunrpc/svc_xprt.c
>@@ -876,6 +876,8 @@ static void svc_revisit(struct cache_deferred_req
>*dreq, int too_many)
> struct svc_xprt *xprt = dr->xprt;
>
> if (too_many) {
>+ if (dr->release_state)
>+ dr->release_state(dr);
> svc_xprt_put(xprt);
> kfree(dr);
> return;
>@@ -902,10 +904,10 @@ static void svc_revisit(struct
>cache_deferred_req *dreq, int too_many)
> static struct cache_deferred_req *svc_defer(struct cache_req *req)
> {
> struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
>- struct svc_deferred_req *dr;
>+ struct svc_deferred_req *dr = NULL;
>
> if (rqstp->rq_arg.page_len)
>- return NULL; /* if more than a page, give up FIXME */
>+ goto out; /* if more than a page, give up FIXME */
> if (rqstp->rq_deferred) {
> dr = rqstp->rq_deferred;
> rqstp->rq_deferred = NULL;
>@@ -914,9 +916,9 @@ static struct cache_deferred_req *svc_defer(struct
>cache_req *req)
> size_t size;
> /* FIXME maybe discard if size too large */
> size = sizeof(struct svc_deferred_req) + rqstp->rq_arg.len;
>- dr = kmalloc(size, GFP_KERNEL);
>+ dr = kzalloc(size, GFP_KERNEL);

Does the entire buffer need to be zeroed? Again, if the rq_arg.len is large,
this may be a needless overhead.

Tom.


> if (dr == NULL)
>- return NULL;
>+ goto out;
>
> dr->handle.owner = rqstp->rq_server;
> dr->prot = rqstp->rq_prot;
>@@ -935,7 +937,13 @@ static struct cache_deferred_req
>*svc_defer(struct cache_req *req)
> dr->xprt = rqstp->rq_xprt;
>
> dr->handle.revisit = svc_revisit;
>- return &dr->handle;
>+ if (rqstp->rq_save_state)
>+ rqstp->rq_save_state(rqstp, dr);
>+out:
>+ rqstp->rq_defer_data = NULL;
>+ rqstp->rq_save_state = NULL;
>+
>+ return dr ? &dr->handle: NULL;
> }
>
> /*
>@@ -959,6 +967,8 @@ static int svc_deferred_recv(struct svc_rqst *rqstp)
> rqstp->rq_xprt_hlen = dr->xprt_hlen;
> rqstp->rq_daddr = dr->daddr;
> rqstp->rq_respages = rqstp->rq_pages;
>+ if (dr->restore_state)
>+ dr->restore_state(rqstp, dr);
> return (dr->argslen<<2) - dr->xprt_hlen;
> }
>
>--
>1.5.4.3
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>the body of a message to [email protected]
>More majordomo info at http://vger.kernel.org/majordomo-info.html


2008-10-17 20:46:59

by Talpey, Thomas

[permalink] [raw]
Subject: Re: [PATCH 3/3] NFSD deferral processing

At 05:00 PM 10/15/2008, [email protected] wrote:
>From: Andy Adamson <[email protected]>
>
>Use a slab cache for nfsd4_compound_state allocation
>
>Save the struct nfsd4_compound_state and set the save_state callback for
>each request for potential deferral handling.
>
>If an NFSv4 operation causes a deferral, the save_state callback is called
>by svc_defer which saves the defer_data with the deferral, and sets the
>restore_state deferral callback.
>
>fh_put is called so that the deferral is not referencing the file handles,
>allowing umount of the file system.

Can you explain the safety of this? If the COMPOUND starts off with an
operation that uses them, what's the implication of one going stale
partway through? Is fh_verify() when the deferral continues enough to
ensure the fh is protected until the end of the op? There's a comment
at the top of fh_verify() that it is "only called at the start of an nfsproc
call", with some assumptions, for instance.

Tom

>
>Signed-off-by: Andy Adamson<[email protected]>
>---
> fs/nfsd/nfs4proc.c | 47 +++++++++++++++++++-------------------------
> fs/nfsd/nfs4state.c | 37 +++++++++++++++++++++++++++++++++++
> include/linux/nfsd/xdr4.h | 5 ++++
> 3 files changed, 62 insertions(+), 27 deletions(-)
>
>diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
>index 1c19ff9..e60b359 100644
>--- a/fs/nfsd/nfs4proc.c
>+++ b/fs/nfsd/nfs4proc.c
>@@ -813,37 +813,13 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
> nfsdstats.nfs4_opcount[opnum]++;
> }
>
>-static void cstate_free(struct nfsd4_compound_state *cstate)
>-{
>- if (cstate == NULL)
>- return;
>- fh_put(&cstate->current_fh);
>- fh_put(&cstate->save_fh);
>- BUG_ON(cstate->replay_owner);
>- kfree(cstate);
>-}
>-
>-static struct nfsd4_compound_state *cstate_alloc(void)
>-{
>- struct nfsd4_compound_state *cstate;
>-
>- cstate = kmalloc(sizeof(struct nfsd4_compound_state), GFP_KERNEL);
>- if (cstate == NULL)
>- return NULL;
>- fh_init(&cstate->current_fh, NFS4_FHSIZE);
>- fh_init(&cstate->save_fh, NFS4_FHSIZE);
>- cstate->replay_owner = NULL;
>- return cstate;
>-}
>-
> /*
> * RPC deferral callbacks
> */
> static void
> nfsd4_release_deferred_state(struct svc_deferred_req *dreq)
> {
>- nfsd4_clear_respages(dreq->respages, dreq->resused);
>- cstate_free(dreq->defer_data);
>+ nfsd4_cstate_free(dreq->defer_data, dreq);
> }
>
> static void
>@@ -926,12 +902,22 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
> goto out;
>
> status = nfserr_resource;
>- cstate = cstate_alloc();
>+ cstate = nfsd4_cstate_alloc(rqstp);
> if (cstate == NULL)
> goto out;
>
>+ if (rqstp->rq_deferred && rqstp->rq_deferred->defer_data) {
>+ resp->opcnt = cstate->last_op_cnt;
>+ resp->p = cstate->last_op_p;
>+ fh_verify(rqstp, &cstate->current_fh, 0, NFSD_MAY_NOP);
>+ fh_verify(rqstp, &cstate->save_fh, 0, NFSD_MAY_NOP);
>+ }
>+ rqstp->rq_defer_data = cstate;
>+ rqstp->rq_save_state = nfsd4_save_deferred_state;
>+
> status = nfs_ok;
> while (!status && resp->opcnt < args->opcnt) {
>+ cstate->last_op_p = resp->p;
> op = &args->ops[resp->opcnt++];
>
> dprintk("nfsv4 compound op #%d/%d: %d (%s)\n",
>@@ -996,8 +982,15 @@ encode_op:
>
> nfsd4_increment_op_stats(op->opnum);
> }
>+ rqstp->rq_defer_data = NULL;
>+ rqstp->rq_save_state = NULL;
>+
>+ if (status == nfserr_dropit) {
>+ cstate->last_op_cnt = resp->opcnt - 1;
>+ return status;
>+ }
>
>- cstate_free(cstate);
>+ nfsd4_cstate_free(cstate, rqstp->rq_deferred);
> out:
> nfsd4_release_compoundargs(args);
> dprintk("nfsv4 compound returned %d\n", ntohl(status));
>diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
>index f266e45..002a57c 100644
>--- a/fs/nfsd/nfs4state.c
>+++ b/fs/nfsd/nfs4state.c
>@@ -91,6 +91,7 @@ static struct kmem_cache *stateowner_slab = NULL;
> static struct kmem_cache *file_slab = NULL;
> static struct kmem_cache *stateid_slab = NULL;
> static struct kmem_cache *deleg_slab = NULL;
>+static struct kmem_cache *cstate_slab;
>
> void
> nfs4_lock_state(void)
>@@ -442,6 +443,37 @@ static struct nfs4_client *create_client(struct
>xdr_netobj name, char *recdir)
> return clp;
> }
>
>+void nfsd4_cstate_free(struct nfsd4_compound_state *cstate,
>+ struct svc_deferred_req *dreq)
>+{
>+ if (dreq && dreq->release_state)
>+ nfsd4_clear_respages(dreq->respages, dreq->resused);
>+ if (cstate == NULL)
>+ return;
>+ fh_put(&cstate->current_fh);
>+ fh_put(&cstate->save_fh);
>+ BUG_ON(cstate->replay_owner);
>+ kmem_cache_free(cstate_slab, cstate);
>+}
>+
>+struct nfsd4_compound_state *nfsd4_cstate_alloc(struct svc_rqst *rqstp)
>+{
>+ struct nfsd4_compound_state *cstate;
>+
>+ if (rqstp->rq_deferred && rqstp->rq_deferred->defer_data) {
>+ cstate = rqstp->rq_deferred->defer_data;
>+ goto out;
>+ }
>+ cstate = kmem_cache_alloc(cstate_slab, GFP_KERNEL);
>+ if (cstate == NULL)
>+ return NULL;
>+ fh_init(&cstate->current_fh, NFS4_FHSIZE);
>+ fh_init(&cstate->save_fh, NFS4_FHSIZE);
>+ cstate->replay_owner = NULL;
>+out:
>+ return cstate;
>+}
>+
> static void copy_verf(struct nfs4_client *target, nfs4_verifier *source)
> {
> memcpy(target->cl_verifier.data, source->data,
>@@ -986,6 +1018,7 @@ nfsd4_free_slabs(void)
> nfsd4_free_slab(&file_slab);
> nfsd4_free_slab(&stateid_slab);
> nfsd4_free_slab(&deleg_slab);
>+ nfsd4_free_slab(&cstate_slab);
> }
>
> static int
>@@ -1007,6 +1040,10 @@ nfsd4_init_slabs(void)
> sizeof(struct nfs4_delegation), 0, 0, NULL);
> if (deleg_slab == NULL)
> goto out_nomem;
>+ cstate_slab = kmem_cache_create("nfsd4_compound_states",
>+ sizeof(struct nfsd4_compound_state), 0, 0, NULL);
>+ if (cstate_slab == NULL)
>+ goto out_nomem;
> return 0;
> out_nomem:
> nfsd4_free_slabs();
>diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
>index f3495ae..3407f4b 100644
>--- a/include/linux/nfsd/xdr4.h
>+++ b/include/linux/nfsd/xdr4.h
>@@ -48,6 +48,8 @@ struct nfsd4_compound_state {
> struct svc_fh current_fh;
> struct svc_fh save_fh;
> struct nfs4_stateowner *replay_owner;
>+ __be32 *last_op_p;
>+ u32 last_op_cnt;
> };
>
> struct nfsd4_change_info {
>@@ -447,6 +449,9 @@ extern void nfsd4_cache_rqst_pages(struct svc_rqst *rqstp,
> struct page **respages, short *resused);
> extern void nfsd4_restore_rqst_pages(struct svc_rqst *rqstp,
> struct page **respages, short resused);
>+extern struct nfsd4_compound_state *nfsd4_cstate_alloc(struct
>svc_rqst *rqstp);
>+extern void nfsd4_cstate_free(struct nfsd4_compound_state *cstate,
>+ struct svc_deferred_req *dreq);
> extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
> struct nfsd4_compound_state *,
> struct nfsd4_setclientid *setclid);
>--
>1.5.4.3
>
>--
>To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
>the body of a message to [email protected]
>More majordomo info at http://vger.kernel.org/majordomo-info.html