2011-06-30 08:16:52

by Mi Jinlong

[permalink] [raw]
Subject: [RFC][PATCH 2/2] nfsd41: try to check reply size before operation

Implementing each operation's enc_size, and try to check reply size
before operation.

Signed-off-by: Mi Jinlong <[email protected]>
---
fs/nfsd/nfs4proc.c | 2 +-
fs/nfsd/nfs4xdr.c | 210 +++++++++++++++++++++++++++++++++++++++++++++++-----
2 files changed, 193 insertions(+), 19 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 3a6dbd7..06ca11c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1165,7 +1165,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}

- if (opdesc->op_func)
+ if (op->status == 0 && opdesc->op_func)
op->status = opdesc->op_func(rqstp, cstate, &op->u);
else
BUG_ON(op->status == nfs_ok);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1273661..ae35f7e 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3153,6 +3153,89 @@ struct nfsd4_enc_op {
u32 enc_size;
};

+#define op_encode_hdr_size (2)
+
+#define encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+
+#define nfsd4_enc_access_sz (op_encode_hdr_size + 2)
+#define nfsd4_enc_close_sz (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_commit_sz (op_encode_hdr_size + encode_verifier_maxsz)
+#define nfsd4_enc_create_sz (op_encode_hdr_size + encode_stateid_maxsz)
+
+#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_fattr_bitmap_maxsz (3)
+#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+ 3 + 3 + 3 + nfs4_owner_maxsz + \
+ nfs4_group_maxsz))
+#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
+ nfs4_fattr_value_maxsz)
+#define nfsd4_enc_getattr_sz (op_encode_hdr_size + nfs4_fattr_maxsz)
+#define nfsd4_enc_getfh_sz (op_encode_hdr_size + 1 + \
+ ((3+NFS4_FHSIZE) >> 2))
+
+#define encode_change_info_maxsz (5)
+#define nfsd4_enc_link_sz (op_encode_hdr_size + encode_change_info_maxsz)
+
+#define encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define encode_lock_denied_maxsz \
+ (8 + encode_lockowner_maxsz)
+#define nfsd4_enc_lock_sz (op_encode_hdr_size + encode_lock_denied_maxsz)
+#define nfsd4_enc_lockt_sz (op_encode_hdr_size + encode_lock_denied_maxsz)
+#define nfsd4_enc_locku_sz (op_encode_hdr_size + encode_stateid_maxsz)
+
+#define encode_ace_maxsz (3 + nfs4_owner_maxsz)
+#define encode_delegation_maxsz (1 + encode_stateid_maxsz + 1 + \
+ encode_ace_maxsz)
+#define nfsd4_enc_open_sz (op_encode_hdr_size + encode_stateid_maxsz + \
+ encode_change_info_maxsz + 1 + \
+ nfs4_fattr_bitmap_maxsz + \
+ encode_delegation_maxsz)
+#define nfsd4_enc_open_confirm_sz \
+ (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_open_downgrade_sz \
+ (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_read_sz (op_encode_hdr_size + 2)
+#define nfsd4_enc_readdir_sz (op_encode_hdr_size + encode_verifier_maxsz)
+#define nfsd4_enc_readlink_sz (op_encode_hdr_size + 1)
+#define nfsd4_enc_remove_sz (op_encode_hdr_size + encode_change_info_maxsz)
+#define nfsd4_enc_rename_sz (op_encode_hdr_size + \
+ encode_change_info_maxsz + \
+ encode_change_info_maxsz)
+
+#define NFS_MAX_SECFLAVORS (12)
+#define nfsd4_enc_secinfo_sz (op_encode_hdr_size + 4 + \
+ (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
+
+#define nfsd4_enc_setattr_sz (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz)
+#define nfsd4_enc_setclientid_sz \
+ (op_encode_hdr_size + 2 + 1024)
+#define nfsd4_enc_write_sz (op_encode_hdr_size + 2 + encode_verifier_maxsz)
+
+/* For NFSv4.1*/
+#define nfsd4_enc_bind_conn_to_session_sz \
+ (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 2)
+#define nfsd4_enc_exchange_id_sz \
+ (op_encode_hdr_size + 2 + 1 + 1 + 1 + 0 + \
+ 2 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+ 1 + 0)
+
+#define encode_channel_attrs_maxsz (6 + 1 + 1)
+#define nfsd4_enc_create_session_sz \
+ (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 + 1 + encode_channel_attrs_maxsz + \
+ encode_channel_attrs_maxsz)
+
+#define nfsd4_enc_destroy_session_sz \
+ (op_encode_hdr_size)
+#define nfsd4_enc_secinfo_no_name_sz nfsd4_enc_secinfo_sz
+#define nfsd4_enc_sequence_sz (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+
/*
* Note: nfsd4_enc_ops vector is shared for v4.0 and v4.1
* since we don't need to filter out obsolete ops as this is
@@ -3161,222 +3244,313 @@ struct nfsd4_enc_op {
static struct nfsd4_enc_op nfsd4_enc_ops[] = {
[OP_ACCESS] = {
.enc_func = (nfsd4_enc)nfsd4_encode_access,
+ .enc_size = nfsd4_enc_access_sz,
},
[OP_CLOSE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_close,
+ .enc_size = nfsd4_enc_close_sz,
},
[OP_COMMIT] = {
.enc_func = (nfsd4_enc)nfsd4_encode_commit,
+ .enc_size = nfsd4_enc_commit_sz,
},
[OP_CREATE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_create,
+ .enc_size = nfsd4_enc_create_sz,
},
[OP_DELEGPURGE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_DELEGRETURN] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_GETATTR] = {
.enc_func = (nfsd4_enc)nfsd4_encode_getattr,
+ .enc_size = nfsd4_enc_getattr_sz,
},
[OP_GETFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_getfh,
+ .enc_size = nfsd4_enc_getfh_sz,
},
[OP_LINK] = {
.enc_func = (nfsd4_enc)nfsd4_encode_link,
+ .enc_size = nfsd4_enc_link_sz,
},
[OP_LOCK] = {
.enc_func = (nfsd4_enc)nfsd4_encode_lock,
+ .enc_size = nfsd4_enc_lock_sz,
},
[OP_LOCKT] = {
.enc_func = (nfsd4_enc)nfsd4_encode_lockt,
+ .enc_size = nfsd4_enc_lockt_sz,
},
[OP_LOCKU] = {
.enc_func = (nfsd4_enc)nfsd4_encode_locku,
+ .enc_size = nfsd4_enc_locku_sz,
},
[OP_LOOKUP] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_LOOKUPP] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_NVERIFY] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_OPEN] = {
.enc_func = (nfsd4_enc)nfsd4_encode_open,
+ .enc_size = nfsd4_enc_open_sz,
},
[OP_OPENATTR] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_OPEN_CONFIRM] = {
.enc_func = (nfsd4_enc)nfsd4_encode_open_confirm,
+ .enc_size = nfsd4_enc_open_confirm_sz,
},
[OP_OPEN_DOWNGRADE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_open_downgrade,
+ .enc_size = nfsd4_enc_open_downgrade_sz,
},
[OP_PUTFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_PUTPUBFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_PUTROOTFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_READ] = {
.enc_func = (nfsd4_enc)nfsd4_encode_read,
+ .enc_size = nfsd4_enc_read_sz,
},
[OP_READDIR] = {
.enc_func = (nfsd4_enc)nfsd4_encode_readdir,
+ .enc_size = nfsd4_enc_readdir_sz,
},
[OP_READLINK] = {
.enc_func = (nfsd4_enc)nfsd4_encode_readlink,
+ .enc_size = nfsd4_enc_readlink_sz,
},
[OP_REMOVE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_remove,
+ .enc_size = nfsd4_enc_remove_sz,
},
[OP_RENAME] = {
.enc_func = (nfsd4_enc)nfsd4_encode_rename,
+ .enc_size = nfsd4_enc_rename_sz,
},
[OP_RENEW] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_RESTOREFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_SAVEFH] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_SECINFO] = {
.enc_func = (nfsd4_enc)nfsd4_encode_secinfo,
+ .enc_size = nfsd4_enc_secinfo_sz,
},
[OP_SETATTR] = {
.enc_func = (nfsd4_enc)nfsd4_encode_setattr,
+ .enc_size = nfsd4_enc_setattr_sz,
},
[OP_SETCLIENTID] = {
.enc_func = (nfsd4_enc)nfsd4_encode_setclientid,
+ .enc_size = nfsd4_enc_setclientid_sz,
},
[OP_SETCLIENTID_CONFIRM] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_VERIFY] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_WRITE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_write,
+ .enc_size = nfsd4_enc_write_sz,
},
[OP_RELEASE_LOCKOWNER] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},

/* NFSv4.1 operations */
[OP_BACKCHANNEL_CTL] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_BIND_CONN_TO_SESSION] = {
.enc_func = (nfsd4_enc)nfsd4_encode_bind_conn_to_session,
+ .enc_size = nfsd4_enc_bind_conn_to_session_sz,
},
[OP_EXCHANGE_ID] = {
.enc_func = (nfsd4_enc)nfsd4_encode_exchange_id,
+ .enc_size = nfsd4_enc_exchange_id_sz,
},
[OP_CREATE_SESSION] = {
.enc_func = (nfsd4_enc)nfsd4_encode_create_session,
+ .enc_size = nfsd4_enc_create_session_sz,
},
[OP_DESTROY_SESSION] = {
.enc_func = (nfsd4_enc)nfsd4_encode_destroy_session,
+ .enc_size = nfsd4_enc_destroy_session_sz,
},
[OP_FREE_STATEID] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_GET_DIR_DELEGATION] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_GETDEVICEINFO] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_GETDEVICELIST] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_LAYOUTCOMMIT] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_LAYOUTGET] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_LAYOUTRETURN] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_SECINFO_NO_NAME] = {
.enc_func = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
+ .enc_size = nfsd4_enc_secinfo_no_name_sz,
},
[OP_SEQUENCE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_sequence,
+ .enc_size = nfsd4_enc_sequence_sz,
},
[OP_SET_SSV] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_TEST_STATEID] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_WANT_DELEGATION] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_DESTROY_CLIENTID] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
[OP_RECLAIM_COMPLETE] = {
.enc_func = (nfsd4_enc)nfsd4_encode_noop,
+ .enc_size = op_encode_hdr_size,
},
};

+static u32 get_ops_max_rsz(struct nfsd4_compoundres *resp)
+{
+ struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+ struct nfsd4_op op = args->ops[resp->opcnt];
+ int length = 0, maxcount = 0;
+
+ switch (op.opnum) {
+ case OP_READ:
+ maxcount = svc_max_payload(resp->rqstp);
+ if (maxcount > op.u.read.rd_length)
+ length = op.u.read.rd_length;
+ else
+ length = maxcount;
+ break;
+
+ case OP_READDIR:
+ if (op.u.readdir.rd_maxcount < PAGE_SIZE)
+ length = op.u.readdir.rd_maxcount;
+ else
+ length = PAGE_SIZE;
+ break;
+ case OP_READLINK:
+ length = PATH_MAX;
+ break;
+ }
+
+ length += nfsd4_enc_ops[op.opnum].enc_size * 4;
+ dprintk("%s opnum %u max reply %u\n", __func__, op.opnum, length);
+
+ return length;
+}
+
/*
* Calculate the total amount of memory that the compound response has taken
* after encoding the current operation.
*
- * pad: add on 8 bytes for the next operation's op_code and status so that
- * there is room to cache a failure on the next operation.
- *
- * Compare this length to the session se_fmaxresp_cached.
+ * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
*
* Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
* will be at least a page and will therefore hold the xdr_buf head.
*/
-static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
+static int nfsd4_check_resp_limit(struct nfsd4_compoundres *resp)
{
int status = 0;
struct xdr_buf *xb = &resp->rqstp->rq_res;
struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
struct nfsd4_session *session = NULL;
struct nfsd4_slot *slot = resp->cstate.slot;
- u32 length, tlen = 0, pad = 8;
+ u32 length = 0, tlen = 0, nlen = 0;

if (!nfsd4_has_session(&resp->cstate))
return status;

session = resp->cstate.session;
- if (session == NULL || slot->sl_cachethis == 0)
+ if (session == NULL)
return status;

- if (resp->opcnt >= args->opcnt)
- pad = 0; /* this is the last operation */
+ if (resp->opcnt < args->opcnt)
+ nlen = get_ops_max_rsz(resp);
+ else
+ return status;

if (xb->page_len == 0) {
- length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
+ length = (char *)resp->p - (char *)xb->head[0].iov_base + nlen;
} else {
if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;

- length = xb->head[0].iov_len + xb->page_len + tlen + pad;
+ length = xb->head[0].iov_len + xb->page_len + tlen + nlen;
}
- dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
- length, xb->page_len, tlen, pad);
+ dprintk("%s length %u, xb->page_len %u tlen %u nlen %u\n", __func__,
+ length, xb->page_len, tlen, nlen);

- if (length <= session->se_fchannel.maxresp_cached)
- return status;
- else
- return nfserr_rep_too_big_to_cache;
+ if (length > session->se_fchannel.maxresp_sz)
+ args->ops[resp->opcnt].status = nfserr_rep_too_big;
+
+ if (slot->sl_cachethis == 1 &&
+ length > session->se_fchannel.maxresp_cached)
+ args->ops[resp->opcnt].status = nfserr_rep_too_big_to_cache;
+
+ return 0;
}

void
@@ -3396,7 +3570,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
!nfsd4_enc_ops[op->opnum].enc_func);
op->status = nfsd4_enc_ops[op->opnum].enc_func(resp, op->status, &op->u);
/* nfsd4_check_drc_limit guarantees enough room for error status */
- if (!op->status && nfsd4_check_drc_limit(resp))
+ if (!op->status && nfsd4_check_resp_limit(resp))
op->status = nfserr_rep_too_big_to_cache;
status:
/*
--
1.7.5.4





2011-07-06 20:19:21

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [RFC][PATCH 2/2] nfsd41: try to check reply size before operation

On Thu, Jun 30, 2011 at 04:20:05PM +0800, Mi Jinlong wrote:
> Implementing each operation's enc_size, and try to check reply size
> before operation.

This is one of our remaining server 4.1 todo's, and it's a lot of
not-especially-fun work--thanks for taking it on.

> @@ -3161,222 +3244,313 @@ struct nfsd4_enc_op {
> static struct nfsd4_enc_op nfsd4_enc_ops[] = {
> [OP_ACCESS] = {
> .enc_func = (nfsd4_enc)nfsd4_encode_access,
> + .enc_size = nfsd4_enc_access_sz,

Perhaps the new enc_size field should go into the nfsd4_operation array
instead of here in the nfsd4_enc_op array.

> +static u32 get_ops_max_rsz(struct nfsd4_compoundres *resp)
> +{
> + struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
> + struct nfsd4_op op = args->ops[resp->opcnt];
> + int length = 0, maxcount = 0;
> +
> + switch (op.opnum) {
> + case OP_READ:
> + maxcount = svc_max_payload(resp->rqstp);
> + if (maxcount > op.u.read.rd_length)
> + length = op.u.read.rd_length;
> + else
> + length = maxcount;
> + break;
> +
> + case OP_READDIR:
> + if (op.u.readdir.rd_maxcount < PAGE_SIZE)
> + length = op.u.readdir.rd_maxcount;
> + else
> + length = PAGE_SIZE;
> + break;
> + case OP_READLINK:
> + length = PATH_MAX;
> + break;

OP_GETATTR also needs special handling, as it may include
arbitrary-length acls and file owners.

Maybe we can enforce a small limit on file owners and not have to
calculate that on the fly. ACLs, though, can definitely be very large.

Also, to prevent this switch from getting too long, I think it would be
better to add something like a get_max_resp_size() callback as another
field of the nfsd4_operation structure, and allow it to be NULL for most
operations (in which case enc_size will be used instead).

> void
> @@ -3396,7 +3570,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> !nfsd4_enc_ops[op->opnum].enc_func);
> op->status = nfsd4_enc_ops[op->opnum].enc_func(resp, op->status, &op->u);
> /* nfsd4_check_drc_limit guarantees enough room for error status */
> - if (!op->status && nfsd4_check_drc_limit(resp))
> + if (!op->status && nfsd4_check_resp_limit(resp))
> op->status = nfserr_rep_too_big_to_cache;
> status:

By the time we get here it's too late--we've already performed the
operation.

For getattr, readlink, readdir, etc., I think that's OK, as those
operations don't change anything.

But if it's a CREATE that crosses the limit, for example, then this
would be a problem: we'd create the file, and *then* return
rep_too_big_to_cache.

So I think this check should go in nfsd4_proc_compound(), before the
call to op_func().

--b.

2011-08-28 10:15:06

by Mi Jinlong

[permalink] [raw]
Subject: [PATCH v3] nfsd41: try to check reply size before operation

For checking the size of reply before calling a operation,
we need try to get maxsize of the operation's reply.

v3: using new method as Bruce said,

"we could handle operations in two different ways:

- For operations that actually change something (write, rename,
open, close, ...), do it the way we're doing it now: be
very careful to estimate the size of the response before even
processing the operation.
- For operations that don't change anything (read, getattr, ...)
just go ahead and do the operation. If you realize after the
fact that the response is too large, then return the error at
that point.

So we'd add another flag to op_flags: say, OP_MODIFIES_SOMETHING. And for
operations with OP_MODIFIES_SOMETHING set, we'd do the first thing. For
operations without it set, we'd do the second."

Signed-off-by: Mi Jinlong <[email protected]>
---
fs/nfsd/nfs4proc.c | 250 ++++++++++++++++++++++++++++++++++++++++++++++++----
fs/nfsd/nfs4xdr.c | 37 ++++----
fs/nfsd/xdr4.h | 1 +
3 files changed, 251 insertions(+), 37 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e807776..9551e9a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -35,6 +35,7 @@
#include <linux/file.h>
#include <linux/slab.h>

+#include "idmap.h"
#include "cache.h"
#include "xdr4.h"
#include "vfs.h"
@@ -994,6 +995,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)

typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
void *);
+typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
+
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
@@ -1001,6 +1004,7 @@ enum nfsd4_op_flags {
/* For rfc 5661 section 2.6.3.1.1: */
OP_HANDLES_WRONGSEC = 1 << 3,
OP_IS_PUTFH_LIKE = 1 << 4,
+ OP_MODIFIES_SOMETHING = 1 << 5, /* ops is non-idempotent */
};

struct nfsd4_operation {
@@ -1016,6 +1020,8 @@ struct nfsd4_operation {
* the v4.0 case).
*/
bool op_cacheresult;
+ /* Try to get response size before operation */
+ nfsd4op_rsize op_rsize_bop;
};

static struct nfsd4_operation nfsd4_ops[];
@@ -1110,6 +1116,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
struct nfsd4_operation *opdesc;
struct nfsd4_compound_state *cstate = &resp->cstate;
int slack_bytes;
+ u32 plen = 0;
__be32 status;

resp->xbuf = &rqstp->rq_res;
@@ -1188,6 +1195,18 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}

+ /* If ops is non-idempotent */
+ if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
+ if (opdesc->op_rsize_bop) {
+ plen = opdesc->op_rsize_bop(rqstp, op);
+ op->status = nfsd4_check_resp_size(resp, plen);
+ } else
+ op->status = nfserr_serverfault;
+ }
+
+ if (op->status)
+ goto encode_op;
+
if (opdesc->op_func)
op->status = opdesc->op_func(rqstp, cstate, &op->u);
else
@@ -1238,6 +1257,144 @@ out:
return status;
}

+#define op_encode_hdr_size (2)
+#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+#define op_encode_change_info_maxsz (5)
+#define nfs4_fattr_bitmap_maxsz (4)
+
+#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
+
+#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+
+#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz)
+#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \
+ op_encode_ace_maxsz)
+
+#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
+
+static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
+}
+
+static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_stateid_maxsz
+ + op_encode_change_info_maxsz + 1
+ + nfs4_fattr_bitmap_maxsz
+ + op_encode_delegation_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 maxcount = 0, rlen = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rlen = op->u.read.rd_length;
+
+ if (rlen > maxcount)
+ rlen = maxcount;
+
+ return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
+}
+
+static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ u32 rlen = op->u.readdir.rd_maxcount;
+
+ if (rlen > PAGE_SIZE)
+ rlen = PAGE_SIZE;
+
+ return (op_encode_hdr_size + op_encode_verifier_maxsz)
+ * sizeof(__be32) + rlen;
+}
+
+static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz)
+ * sizeof(__be32);
+}
+
+static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_change_info_maxsz
+ + op_encode_change_info_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
+ 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
+ 2 + /*eir_server_owner.so_minor_id */\
+ /* eir_server_owner.so_major_id<> */\
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
+ /* eir_server_scope<> */\
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
+ 1 + /* eir_server_impl_id array length */\
+ 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
+ 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
+}
+
+static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ return (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
+ 2 + /* csr_sequence, csr_flags */\
+ op_encode_channel_attrs_maxsz + \
+ op_encode_channel_attrs_maxsz) * sizeof(__be32);
+}
+
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
@@ -1245,20 +1402,28 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CLOSE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
},
[OP_COMMIT] = {
.op_func = (nfsd4op_func)nfsd4_commit,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_COMMIT",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
},
[OP_CREATE] = {
.op_func = (nfsd4op_func)nfsd4_create,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_CREATE",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
},
[OP_DELEGRETURN] = {
.op_func = (nfsd4op_func)nfsd4_delegreturn,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_DELEGRETURN",
+ .op_rsize_bop = nfsd4_only_status_rsize,
},
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
@@ -1271,12 +1436,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
+ .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING,
.op_name = "OP_LINK",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
},
[OP_LOCK] = {
.op_func = (nfsd4op_func)nfsd4_lock,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LOCK",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
},
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
@@ -1284,7 +1453,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LOCKU",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
@@ -1302,42 +1473,54 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
- .op_flags = OP_HANDLES_WRONGSEC,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
.op_name = "OP_OPEN",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
},
[OP_OPEN_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_open_confirm,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_OPEN_CONFIRM",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
},
[OP_OPEN_DOWNGRADE] = {
.op_func = (nfsd4op_func)nfsd4_open_downgrade,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_OPEN_DOWNGRADE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
.op_name = "OP_PUTFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
.op_name = "OP_PUTPUBFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
.op_name = "OP_PUTROOTFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READ",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READDIR",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
},
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
@@ -1345,29 +1528,38 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_REMOVE",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
},
[OP_RENAME] = {
- .op_name = "OP_RENAME",
.op_func = (nfsd4op_func)nfsd4_rename,
+ .op_flags = OP_MODIFIES_SOMETHING,
+ .op_name = "OP_RENAME",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
},
[OP_RENEW] = {
.op_func = (nfsd4op_func)nfsd4_renew,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_RENEW",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
+
},
[OP_RESTOREFH] = {
.op_func = (nfsd4op_func)nfsd4_restorefh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE,
+ | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
.op_name = "OP_RESTOREFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_SAVEFH] = {
.op_func = (nfsd4op_func)nfsd4_savefh,
- .op_flags = OP_HANDLES_WRONGSEC,
+ .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
.op_name = "OP_SAVEFH",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_SECINFO] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
@@ -1377,19 +1569,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
.op_name = "OP_SETATTR",
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
},
[OP_SETCLIENTID] = {
.op_func = (nfsd4op_func)nfsd4_setclientid,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_SETCLIENTID",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
},
[OP_SETCLIENTID_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_SETCLIENTID_CONFIRM",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
@@ -1397,35 +1595,47 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
+ .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_WRITE",
.op_cacheresult = true,
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
},
[OP_RELEASE_LOCKOWNER] = {
.op_func = (nfsd4op_func)nfsd4_release_lockowner,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_RELEASE_LOCKOWNER",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},

/* NFSv4.1 operations */
[OP_EXCHANGE_ID] = {
.op_func = (nfsd4op_func)nfsd4_exchange_id,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_EXCHANGE_ID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
},
[OP_BIND_CONN_TO_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_BIND_CONN_TO_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
},
[OP_CREATE_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_create_session,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_CREATE_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
},
[OP_DESTROY_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_destroy_session,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_DESTROY_SESSION",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_SEQUENCE] = {
.op_func = (nfsd4op_func)nfsd4_sequence,
@@ -1434,13 +1644,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_DESTROY_CLIENTID] = {
.op_func = NULL,
- .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
+ .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
+ | OP_MODIFIES_SOMETHING,
.op_name = "OP_DESTROY_CLIENTID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_RECLAIM_COMPLETE] = {
.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
- .op_flags = ALLOWED_WITHOUT_FH,
+ .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
.op_name = "OP_RECLAIM_COMPLETE",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_SECINFO_NO_NAME] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
@@ -1454,8 +1667,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
- .op_flags = ALLOWED_WITHOUT_FH,
+ .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
.op_name = "OP_FREE_STATEID",
+ .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
};

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c8bf405..b7a3c69 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3334,34 +3334,29 @@ static nfsd4_enc nfsd4_enc_ops[] = {

/*
* Calculate the total amount of memory that the compound response has taken
- * after encoding the current operation.
+ * after encoding the current operation with pad.
*
- * pad: add on 8 bytes for the next operation's op_code and status so that
- * there is room to cache a failure on the next operation.
+ * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
+ * which was specified at nfsd4_operation, else pad is zero.
*
- * Compare this length to the session se_fmaxresp_cached.
+ * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
*
* Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
* will be at least a page and will therefore hold the xdr_buf head.
*/
-static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
+int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
{
- int status = 0;
struct xdr_buf *xb = &resp->rqstp->rq_res;
- struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
struct nfsd4_session *session = NULL;
struct nfsd4_slot *slot = resp->cstate.slot;
- u32 length, tlen = 0, pad = 8;
+ u32 length, tlen = 0;

if (!nfsd4_has_session(&resp->cstate))
- return status;
+ return 0;

session = resp->cstate.session;
- if (session == NULL || slot->sl_cachethis == 0)
- return status;
-
- if (resp->opcnt >= args->opcnt)
- pad = 0; /* this is the last operation */
+ if (session == NULL)
+ return 0;

if (xb->page_len == 0) {
length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3374,10 +3369,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
length, xb->page_len, tlen, pad);

- if (length <= session->se_fchannel.maxresp_cached)
- return status;
- else
+ if (length > session->se_fchannel.maxresp_sz)
+ return nfserr_rep_too_big;
+
+ if (slot->sl_cachethis == 1 &&
+ length > session->se_fchannel.maxresp_cached)
return nfserr_rep_too_big_to_cache;
+
+ return 0;
}

void
@@ -3397,8 +3396,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
!nfsd4_enc_ops[op->opnum]);
op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
/* nfsd4_check_drc_limit guarantees enough room for error status */
- if (!op->status && nfsd4_check_drc_limit(resp))
- op->status = nfserr_rep_too_big_to_cache;
+ if (!op->status)
+ op->status = nfsd4_check_resp_size(resp, 0);
status:
/*
* Note: We write the status directly, instead of using WRITE32(),
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d2a8d044..6b5496b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -532,6 +532,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
struct nfsd4_compoundargs *);
int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
struct nfsd4_compoundres *);
+int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
--
1.7.6




2011-08-20 10:07:21

by Mi Jinlong

[permalink] [raw]
Subject: [RFC][PATCH v2] nfsd41: try to check reply size before operation

For checking the size of reply before calling a operation,
we need try to get maxsize of the operation's reply.

v1->v2:
move op_enc_size from nfsd4_enc_ops to nfsd4_operation;
add helper function to get payload len which is need as READ, READDIR ...

Signed-off-by: Mi Jinlong <[email protected]>
---
fs/nfsd/nfs4proc.c | 345 +++++++++++++++++++++++++++++++++++++++++++++++++++-
fs/nfsd/nfs4xdr.c | 31 +++--
fs/nfsd/xdr4.h | 1 +
3 files changed, 361 insertions(+), 16 deletions(-)

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e807776..3290bc0 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -34,7 +34,9 @@
*/
#include <linux/file.h>
#include <linux/slab.h>
+#include <linux/sunrpc/svcauth_gss.h>

+#include "idmap.h"
#include "cache.h"
#include "xdr4.h"
#include "vfs.h"
@@ -994,6 +996,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)

typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
void *);
+typedef u32(*nfsd4op_payload)(struct svc_rqst *);
+
enum nfsd4_op_flags {
ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
@@ -1016,6 +1020,10 @@ struct nfsd4_operation {
* the v4.0 case).
*/
bool op_cacheresult;
+ /* size except dynamic payload */
+ u32 op_enc_size;
+ /* try to get dynamic payload as READ, READDIR, READLINK, GETATTR */
+ nfsd4op_payload op_payload;
};

static struct nfsd4_operation nfsd4_ops[];
@@ -1188,7 +1196,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}

- if (opdesc->op_func)
+ if (op->status == 0 && opdesc->op_func)
op->status = opdesc->op_func(rqstp, cstate, &op->u);
else
BUG_ON(op->status == nfs_ok);
@@ -1238,172 +1246,478 @@ out:
return status;
}

+#define op_encode_hdr_size (2)
+
+#define encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
+#define encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
+
+#define nfsd4_enc_access_sz (op_encode_hdr_size + 2)
+#define nfsd4_enc_close_sz (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_commit_sz (op_encode_hdr_size + encode_verifier_maxsz)
+#define nfsd4_enc_create_sz (op_encode_hdr_size + encode_stateid_maxsz)
+
+#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_group_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define nfs4_fattr_bitmap_maxsz (3)
+#define nfs4_fattr_value_maxsz (1 + (1 + 2 + 2 + 4 + 2 + 1 + 1 + 2 + 2 + \
+ 3 + 3 + 3 + nfs4_owner_maxsz + \
+ nfs4_group_maxsz))
+#define nfs4_fattr_maxsz (nfs4_fattr_bitmap_maxsz + \
+ nfs4_fattr_value_maxsz)
+#define nfsd4_enc_getattr_sz (op_encode_hdr_size + nfs4_fattr_maxsz)
+#define nfsd4_enc_getfh_sz (op_encode_hdr_size + 1 + \
+ ((3+NFS4_FHSIZE) >> 2))
+
+#define encode_change_info_maxsz (5)
+#define nfsd4_enc_link_sz (op_encode_hdr_size + encode_change_info_maxsz)
+
+#define encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+#define encode_lock_denied_maxsz \
+ (8 + encode_lockowner_maxsz)
+#define nfsd4_enc_lock_sz (op_encode_hdr_size + encode_lock_denied_maxsz)
+#define nfsd4_enc_lockt_sz (op_encode_hdr_size + encode_lock_denied_maxsz)
+#define nfsd4_enc_locku_sz (op_encode_hdr_size + encode_stateid_maxsz)
+
+#define encode_ace_maxsz (3 + nfs4_owner_maxsz)
+#define encode_delegation_maxsz (1 + encode_stateid_maxsz + 1 + \
+ encode_ace_maxsz)
+#define nfsd4_enc_open_sz (op_encode_hdr_size + encode_stateid_maxsz + \
+ encode_change_info_maxsz + 1 + \
+ nfs4_fattr_bitmap_maxsz + \
+ encode_delegation_maxsz)
+#define nfsd4_enc_open_confirm_sz \
+ (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_open_downgrade_sz \
+ (op_encode_hdr_size + encode_stateid_maxsz)
+#define nfsd4_enc_read_sz (op_encode_hdr_size + 2)
+#define nfsd4_enc_readdir_sz (op_encode_hdr_size + encode_verifier_maxsz)
+#define nfsd4_enc_readlink_sz (op_encode_hdr_size + 1)
+#define nfsd4_enc_remove_sz (op_encode_hdr_size + encode_change_info_maxsz)
+#define nfsd4_enc_rename_sz (op_encode_hdr_size + \
+ encode_change_info_maxsz + \
+ encode_change_info_maxsz)
+
+#define NFS_MAX_SECFLAVORS (12)
+#define nfsd4_enc_secinfo_sz (op_encode_hdr_size + 4 + \
+ (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
+
+#define nfsd4_enc_setattr_sz (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz)
+#define nfsd4_enc_setclientid_sz \
+ (op_encode_hdr_size + 2 + 1024)
+#define nfsd4_enc_write_sz (op_encode_hdr_size + 2 + encode_verifier_maxsz)
+
+/* For NFSv4.1*/
+#define nfsd4_enc_bind_conn_to_session_sz \
+ (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 2)
+#define nfsd4_enc_exchange_id_sz \
+ (op_encode_hdr_size + 2 + 1 + 1 + 1 + 0 + \
+ 2 + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+ XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 + \
+ 1 + 0)
+
+#define encode_channel_attrs_maxsz (6 + 1 + 1)
+#define nfsd4_enc_create_session_sz \
+ (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + \
+ 1 + 1 + encode_channel_attrs_maxsz + \
+ encode_channel_attrs_maxsz)
+
+#define nfsd4_enc_destroy_session_sz op_encode_hdr_size
+#define nfsd4_enc_secinfo_no_name_sz nfsd4_enc_secinfo_sz
+#define nfsd4_enc_sequence_sz (op_encode_hdr_size + \
+ XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
+#define nfsd4_enc_test_stateid_sz op_encode_hdr_size
+#define nfsd4_enc_free_stateid_sz op_encode_hdr_size
+
+static u32 nfsd4_enc_read_playload(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_op op = args->ops[resp->opcnt];
+ u32 maxcount = 0, rd_length = 0;
+
+ maxcount = svc_max_payload(rqstp);
+ rd_length = op.u.read.rd_length;
+
+ if (maxcount > rd_length)
+ return rd_length;
+ else
+ return maxcount;
+}
+
+static u32 nfsd4_enc_readdir_playload(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_op op = args->ops[resp->opcnt];
+ u32 rd_maxcount = 0;
+
+ rd_maxcount = op.u.readdir.rd_maxcount;
+
+ if (rd_maxcount > PAGE_SIZE)
+ return PAGE_SIZE;
+ else
+ return rd_maxcount;
+}
+
+static u32 nfsd4_enc_readlink_playload(struct svc_rqst *rqstp)
+{
+ return PATH_MAX;
+}
+
+/*
+ * Response len count as nfsd4_encode_fattr
+ */
+static u32 nfsd4_enc_getattr_playload(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_op op = args->ops[resp->opcnt];
+ u32 *bmval = op.u.getattr.ga_bmval;
+ u32 bmval0 = bmval[0];
+ u32 bmval1 = bmval[1];
+ u32 bmval2 = bmval[2];
+ u32 mlen = 0, lc = 0;
+
+ if (bmval2)
+ mlen += 16;
+ else if (bmval1)
+ mlen += 12;
+ else
+ mlen += 8;
+
+ if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
+ if (!nfsd_suppattrs2(resp->cstate.minorversion))
+ mlen += 12;
+ else
+ mlen += 16;
+ }
+
+ if (bmval0 & FATTR4_WORD0_TYPE)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_CHANGE)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_SIZE)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_LINK_SUPPORT)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_NAMED_ATTR)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_FSID)
+ mlen += 16;
+ if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_LEASE_TIME)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_RDATTR_ERROR)
+ mlen += 4;
+
+ /* For simple, Suppose ACL is supported */
+ if (bmval0 & FATTR4_WORD0_ACL)
+ mlen += 4 + 12;
+ if (bmval0 & FATTR4_WORD0_ACLSUPPORT)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_CANSETTIME)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_CASE_PRESERVING)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED)
+ mlen += 4;
+
+ /* Using NFS4_FHSIZE for fhp->fh_handle.fh_size */
+ if (bmval0 & FATTR4_WORD0_FILEHANDLE)
+ mlen += (XDR_QUADLEN(NFS4_FHSIZE) << 2) + 4;
+ if (bmval0 & FATTR4_WORD0_FILEID)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_FILES_AVAIL)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_FILES_FREE)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_FILES_TOTAL)
+ mlen += 8;
+
+
+ /* fs locations */
+ lc = resp->cstate.current_fh.fh_export->ex_fslocs.locations_count;
+ if (bmval0 & FATTR4_WORD0_FS_LOCATIONS)
+ mlen += 4 + /* count */ \
+ ((XDR_QUADLEN(PATH_MAX) << 2) + 4) + /* components */ \
+ 4 + /* locations_count */ \
+ lc * 2 * ((XDR_QUADLEN(PATH_MAX) << 2) + 4);
+
+ if (bmval0 & FATTR4_WORD0_HOMOGENEOUS)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_MAXFILESIZE)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_MAXLINK)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_MAXNAME)
+ mlen += 4;
+ if (bmval0 & FATTR4_WORD0_MAXREAD)
+ mlen += 8;
+ if (bmval0 & FATTR4_WORD0_MAXWRITE)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_MODE)
+ mlen += 4;
+ if (bmval1 & FATTR4_WORD1_NO_TRUNC)
+ mlen += 4;
+ if (bmval1 & FATTR4_WORD1_NUMLINKS)
+ mlen += 4;
+
+ /* owner : for simple using IDMAP_NAMESZ */
+ if (bmval1 & FATTR4_WORD1_OWNER)
+ mlen += (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4;
+ /* owner group : for simple using IDMAP_NAMESZ */
+ if (bmval1 & FATTR4_WORD1_OWNER_GROUP)
+ mlen += (XDR_QUADLEN(IDMAP_NAMESZ) << 2) + 4;
+
+ if (bmval1 & FATTR4_WORD1_RAWDEV)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_SPACE_AVAIL)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_SPACE_FREE)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_SPACE_TOTAL)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_SPACE_USED)
+ mlen += 8;
+ if (bmval1 & FATTR4_WORD1_TIME_ACCESS)
+ mlen += 12;
+ if (bmval1 & FATTR4_WORD1_TIME_DELTA)
+ mlen += 12;
+ if (bmval1 & FATTR4_WORD1_TIME_METADATA)
+ mlen += 12;
+ if (bmval1 & FATTR4_WORD1_TIME_MODIFY)
+ mlen += 12;
+ if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID)
+ mlen += 8;
+ if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT)
+ mlen += 16;
+
+ return mlen;
+}
+
+static u32 nfsd4_enc_test_stateid_playload(struct svc_rqst *rqstp)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct nfsd4_op op = args->ops[resp->opcnt];
+
+ return op.u.test_stateid.ts_num_ids * sizeof(__be32);
+}
+
static struct nfsd4_operation nfsd4_ops[] = {
[OP_ACCESS] = {
.op_func = (nfsd4op_func)nfsd4_access,
.op_name = "OP_ACCESS",
+ .op_enc_size = nfsd4_enc_access_sz,
},
[OP_CLOSE] = {
.op_func = (nfsd4op_func)nfsd4_close,
.op_name = "OP_CLOSE",
+ .op_enc_size = nfsd4_enc_close_sz,
},
[OP_COMMIT] = {
.op_func = (nfsd4op_func)nfsd4_commit,
.op_name = "OP_COMMIT",
+ .op_enc_size = nfsd4_enc_commit_sz,
},
[OP_CREATE] = {
.op_func = (nfsd4op_func)nfsd4_create,
.op_name = "OP_CREATE",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_create_sz,
},
[OP_DELEGRETURN] = {
.op_func = (nfsd4op_func)nfsd4_delegreturn,
.op_name = "OP_DELEGRETURN",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
.op_flags = ALLOWED_ON_ABSENT_FS,
.op_name = "OP_GETATTR",
+ .op_enc_size = nfsd4_enc_getattr_sz,
+ .op_payload = nfsd4_enc_getattr_playload,
},
[OP_GETFH] = {
.op_func = (nfsd4op_func)nfsd4_getfh,
.op_name = "OP_GETFH",
+ .op_enc_size = nfsd4_enc_getfh_sz,
},
[OP_LINK] = {
.op_func = (nfsd4op_func)nfsd4_link,
.op_name = "OP_LINK",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_link_sz,
},
[OP_LOCK] = {
.op_func = (nfsd4op_func)nfsd4_lock,
.op_name = "OP_LOCK",
+ .op_enc_size = nfsd4_enc_lock_sz,
},
[OP_LOCKT] = {
.op_func = (nfsd4op_func)nfsd4_lockt,
.op_name = "OP_LOCKT",
+ .op_enc_size = nfsd4_enc_lockt_sz,
},
[OP_LOCKU] = {
.op_func = (nfsd4op_func)nfsd4_locku,
.op_name = "OP_LOCKU",
+ .op_enc_size = nfsd4_enc_locku_sz,
},
[OP_LOOKUP] = {
.op_func = (nfsd4op_func)nfsd4_lookup,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_LOOKUP",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_LOOKUPP] = {
.op_func = (nfsd4op_func)nfsd4_lookupp,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_LOOKUPP",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_NVERIFY] = {
.op_func = (nfsd4op_func)nfsd4_nverify,
.op_name = "OP_NVERIFY",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_OPEN] = {
.op_func = (nfsd4op_func)nfsd4_open,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_OPEN",
+ .op_enc_size = nfsd4_enc_open_sz,
},
[OP_OPEN_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_open_confirm,
.op_name = "OP_OPEN_CONFIRM",
+ .op_enc_size = nfsd4_enc_open_confirm_sz,
},
[OP_OPEN_DOWNGRADE] = {
.op_func = (nfsd4op_func)nfsd4_open_downgrade,
.op_name = "OP_OPEN_DOWNGRADE",
+ .op_enc_size = nfsd4_enc_open_downgrade_sz,
},
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTFH",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTPUBFH",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_PUTROOTFH",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read,
.op_name = "OP_READ",
+ .op_enc_size = nfsd4_enc_read_sz,
+ .op_payload = nfsd4_enc_read_playload,
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
.op_name = "OP_READDIR",
+ .op_enc_size = nfsd4_enc_readdir_sz,
+ .op_payload = nfsd4_enc_readdir_playload,
},
[OP_READLINK] = {
.op_func = (nfsd4op_func)nfsd4_readlink,
.op_name = "OP_READLINK",
+ .op_enc_size = nfsd4_enc_readlink_sz,
+ .op_payload = nfsd4_enc_readlink_playload,
},
[OP_REMOVE] = {
.op_func = (nfsd4op_func)nfsd4_remove,
.op_name = "OP_REMOVE",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_remove_sz,
},
[OP_RENAME] = {
- .op_name = "OP_RENAME",
.op_func = (nfsd4op_func)nfsd4_rename,
+ .op_name = "OP_RENAME",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_rename_sz,
},
[OP_RENEW] = {
.op_func = (nfsd4op_func)nfsd4_renew,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_RENEW",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_RESTOREFH] = {
.op_func = (nfsd4op_func)nfsd4_restorefh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
| OP_IS_PUTFH_LIKE,
.op_name = "OP_RESTOREFH",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_SAVEFH] = {
.op_func = (nfsd4op_func)nfsd4_savefh,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SAVEFH",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_SECINFO] = {
.op_func = (nfsd4op_func)nfsd4_secinfo,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
+ .op_enc_size = nfsd4_enc_secinfo_sz,
},
[OP_SETATTR] = {
.op_func = (nfsd4op_func)nfsd4_setattr,
.op_name = "OP_SETATTR",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_setattr_sz,
},
[OP_SETCLIENTID] = {
.op_func = (nfsd4op_func)nfsd4_setclientid,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_SETCLIENTID",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_setclientid_sz,
},
[OP_SETCLIENTID_CONFIRM] = {
.op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_SETCLIENTID_CONFIRM",
.op_cacheresult = true,
+ .op_enc_size = op_encode_hdr_size,
},
[OP_VERIFY] = {
.op_func = (nfsd4op_func)nfsd4_verify,
.op_name = "OP_VERIFY",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_WRITE] = {
.op_func = (nfsd4op_func)nfsd4_write,
.op_name = "OP_WRITE",
.op_cacheresult = true,
+ .op_enc_size = nfsd4_enc_write_sz,
},
[OP_RELEASE_LOCKOWNER] = {
.op_func = (nfsd4op_func)nfsd4_release_lockowner,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
.op_name = "OP_RELEASE_LOCKOWNER",
+ .op_enc_size = op_encode_hdr_size,
},

/* NFSv4.1 operations */
@@ -1411,51 +1725,62 @@ static struct nfsd4_operation nfsd4_ops[] = {
.op_func = (nfsd4op_func)nfsd4_exchange_id,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_EXCHANGE_ID",
+ .op_enc_size = nfsd4_enc_exchange_id_sz,
},
[OP_BIND_CONN_TO_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_BIND_CONN_TO_SESSION",
+ .op_enc_size = nfsd4_enc_bind_conn_to_session_sz,
},
[OP_CREATE_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_create_session,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_CREATE_SESSION",
+ .op_enc_size = nfsd4_enc_create_session_sz,
},
[OP_DESTROY_SESSION] = {
.op_func = (nfsd4op_func)nfsd4_destroy_session,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_DESTROY_SESSION",
+ .op_enc_size = nfsd4_enc_destroy_session_sz,
},
[OP_SEQUENCE] = {
.op_func = (nfsd4op_func)nfsd4_sequence,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_SEQUENCE",
+ .op_enc_size = nfsd4_enc_sequence_sz,
},
[OP_DESTROY_CLIENTID] = {
.op_func = NULL,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
.op_name = "OP_DESTROY_CLIENTID",
+ .op_enc_size = nfsd4_enc_destroy_session_sz,
},
[OP_RECLAIM_COMPLETE] = {
.op_func = (nfsd4op_func)nfsd4_reclaim_complete,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_RECLAIM_COMPLETE",
+ .op_enc_size = op_encode_hdr_size,
},
[OP_SECINFO_NO_NAME] = {
.op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
+ .op_enc_size = nfsd4_enc_secinfo_no_name_sz,
},
[OP_TEST_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_test_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_TEST_STATEID",
+ .op_enc_size = nfsd4_enc_test_stateid_sz,
+ .op_payload = nfsd4_enc_test_stateid_playload,
},
[OP_FREE_STATEID] = {
.op_func = (nfsd4op_func)nfsd4_free_stateid,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_FREE_STATEID",
+ .op_enc_size = nfsd4_enc_free_stateid_sz,
},
};

@@ -1466,6 +1791,22 @@ static const char *nfsd4_op_name(unsigned opnum)
return "unknown_operation";
}

+u32 get_ops_max_respz(struct svc_rqst * rqstp)
+{
+ struct nfsd4_compoundargs *args = rqstp->rq_argp;
+ struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ __be32 opnum = args->ops[resp->opcnt].opnum;
+ __be32 length = 0;
+
+ length = nfsd4_ops[opnum].op_enc_size * 4;
+ if (nfsd4_ops[opnum].op_payload)
+ length += nfsd4_ops[opnum].op_payload(rqstp);
+
+ dprintk("%s opnum %u max reply %u\n", __func__, opnum, length);
+
+ return length;
+}
+
#define nfsd4_voidres nfsd4_voidargs
struct nfsd4_voidargs { int dummy; };

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index c8bf405..5aa825f 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3336,32 +3336,31 @@ static nfsd4_enc nfsd4_enc_ops[] = {
* Calculate the total amount of memory that the compound response has taken
* after encoding the current operation.
*
- * pad: add on 8 bytes for the next operation's op_code and status so that
- * there is room to cache a failure on the next operation.
- *
- * Compare this length to the session se_fmaxresp_cached.
+ * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
*
* Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
* will be at least a page and will therefore hold the xdr_buf head.
*/
-static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
+static int nfsd4_check_resp_limit(struct nfsd4_compoundres *resp)
{
int status = 0;
struct xdr_buf *xb = &resp->rqstp->rq_res;
struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
struct nfsd4_session *session = NULL;
struct nfsd4_slot *slot = resp->cstate.slot;
- u32 length, tlen = 0, pad = 8;
+ u32 length, tlen = 0, pad = 0;

if (!nfsd4_has_session(&resp->cstate))
return status;

session = resp->cstate.session;
- if (session == NULL || slot->sl_cachethis == 0)
+ if (session == NULL)
return status;

if (resp->opcnt >= args->opcnt)
- pad = 0; /* this is the last operation */
+ return status;
+
+ pad = get_ops_max_respz(resp->rqstp);

if (xb->page_len == 0) {
length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
@@ -3374,10 +3373,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
length, xb->page_len, tlen, pad);

- if (length <= session->se_fchannel.maxresp_cached)
- return status;
- else
- return nfserr_rep_too_big_to_cache;
+ if (length > session->se_fchannel.maxresp_sz)
+ args->ops[resp->opcnt].status = nfserr_rep_too_big;
+
+ if (slot->sl_cachethis == 1 &&
+ length > session->se_fchannel.maxresp_cached)
+ args->ops[resp->opcnt].status = nfserr_rep_too_big_to_cache;
+
+ return 0;
}

void
@@ -3397,8 +3400,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
!nfsd4_enc_ops[op->opnum]);
op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
/* nfsd4_check_drc_limit guarantees enough room for error status */
- if (!op->status && nfsd4_check_drc_limit(resp))
- op->status = nfserr_rep_too_big_to_cache;
+ if (!op->status)
+ nfsd4_check_resp_limit(resp);
status:
/*
* Note: We write the status directly, instead of using WRITE32(),
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index d2a8d044..85596a2 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -588,6 +588,7 @@ extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_delegreturn *dr);
extern __be32 nfsd4_renew(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, clientid_t *clid);
+extern __be32 get_ops_max_respz(struct svc_rqst *);
extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid);
extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
--
1.7.6



2011-08-23 21:39:17

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [RFC][PATCH v2] nfsd41: try to check reply size before operation

On Sat, Aug 20, 2011 at 06:11:31PM +0800, Mi Jinlong wrote:
> For checking the size of reply before calling a operation,
> we need try to get maxsize of the operation's reply.
>
> v1->v2:
> move op_enc_size from nfsd4_enc_ops to nfsd4_operation;
> add helper function to get payload len which is need as READ, READDIR ...

I just want to make sure I understand the logic here. So while
encoding this operation, we estimate the size of the *next* operation:

> @@ -1466,6 +1791,22 @@ static const char *nfsd4_op_name(unsigned opnum)
> return "unknown_operation";
> }
>
> +u32 get_ops_max_respz(struct svc_rqst * rqstp)
> +{
> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> + struct nfsd4_compoundres *resp = rqstp->rq_resp;
> + __be32 opnum = args->ops[resp->opcnt].opnum;
> + __be32 length = 0;
> +
> + length = nfsd4_ops[opnum].op_enc_size * 4;
> + if (nfsd4_ops[opnum].op_payload)
> + length += nfsd4_ops[opnum].op_payload(rqstp);
> +
> + dprintk("%s opnum %u max reply %u\n", __func__, opnum, length);
> +
> + return length;
> +}
> +
> #define nfsd4_voidres nfsd4_voidargs
> struct nfsd4_voidargs { int dummy; };

and we stick the result into the next op's status field:

> @@ -3374,10 +3373,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
> dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
> length, xb->page_len, tlen, pad);
>
> - if (length <= session->se_fchannel.maxresp_cached)
> - return status;
> - else
> - return nfserr_rep_too_big_to_cache;
> + if (length > session->se_fchannel.maxresp_sz)
> + args->ops[resp->opcnt].status = nfserr_rep_too_big;
> +
> + if (slot->sl_cachethis == 1 &&
> + length > session->se_fchannel.maxresp_cached)
> + args->ops[resp->opcnt].status = nfserr_rep_too_big_to_cache;
> +
> + return 0;
> }

and then we check that status and use it when we process the next
compound:

> @@ -1188,7 +1196,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
> goto encode_op;
> }
>
> - if (opdesc->op_func)
> + if (op->status == 0 && opdesc->op_func)
> op->status = opdesc->op_func(rqstp, cstate, &op->u);
> else
> BUG_ON(op->status == nfs_ok);

Do I have that right?

Is there some reason we need to do it that way? Why not instead do
something like:

}

+ op->status == nfsd4_check_resp_size(resp)
+ if (op->status)
+ goto encode_op;
if (opdesc->op_func)
op->status = opdesc->op_func(rqstp-, cstate, &op->u);

in nfsd4_proc_compound.

A minor nitpick (already in the existing code):

> @@ -3336,32 +3336,31 @@ static nfsd4_enc nfsd4_enc_ops[] = {
> * Calculate the total amount of memory that the compound response has taken
> * after encoding the current operation.
> *
> - * pad: add on 8 bytes for the next operation's op_code and status so that
> - * there is room to cache a failure on the next operation.
> - *
> - * Compare this length to the session se_fmaxresp_cached.
> + * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
> *
> * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
> * will be at least a page and will therefore hold the xdr_buf head.
> */
> -static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
> +static int nfsd4_check_resp_limit(struct nfsd4_compoundres *resp)
> {
> int status = 0;

Status is *always* 0 in this function. Let's just get rid of it.

Oh boy:

> +static u32 nfsd4_enc_getattr_playload(struct svc_rqst *rqstp)
> +{
> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
> + struct nfsd4_compoundres *resp = rqstp->rq_resp;
> + struct nfsd4_op op = args->ops[resp->opcnt];
> + u32 *bmval = op.u.getattr.ga_bmval;
> + u32 bmval0 = bmval[0];
> + u32 bmval1 = bmval[1];
> + u32 bmval2 = bmval[2];
> + u32 mlen = 0, lc = 0;
> +
> + if (bmval2)
> + mlen += 16;
> + else if (bmval1)
> + mlen += 12;
> + else
> + mlen += 8;
> +
> + if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
> + if (!nfsd_suppattrs2(resp->cstate.minorversion))
> + mlen += 12;
> + else
> + mlen += 16;
> + }
> +
> + if (bmval0 & FATTR4_WORD0_TYPE)
> + mlen += 4;
> + if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE)
> + mlen += 4;


This is getting complicated....

The thing is, some of the most complicated ops (read, readdir, getattr)
don't *really* matter that much, because they don't change anything on
the filesystem, and don't change the server state in any way.

So maybe we could handle operations in two different ways:

- For operations that actually change something (write, rename,
open, close, ...), do it the way we're doing it now: be
very careful to estimate the size of the response before even
processing the operation.
- For operations that don't change anything (read, getattr, ...)
just go ahead and do the operation. If you realize after the
fact that the response is too large, then return the error at
that point.

So we'd add another flag to op_flags: say, OP_MODIFIES_SOMETHING. And for
operations with OP_MODIFIES_SOMETHING set, we'd do the first thing. For
operations without it set, we'd do the second.

Would there be any problem with doing it that way?

--b.

2011-08-24 09:03:56

by Mi Jinlong

[permalink] [raw]
Subject: Re: [RFC][PATCH v2] nfsd41: try to check reply size before operation



J. Bruce Fields :
> On Sat, Aug 20, 2011 at 06:11:31PM +0800, Mi Jinlong wrote:
>> For checking the size of reply before calling a operation,
>> we need try to get maxsize of the operation's reply.
>>
>> v1->v2:
>> move op_enc_size from nfsd4_enc_ops to nfsd4_operation;
>> add helper function to get payload len which is need as READ, READDIR ...
>
> I just want to make sure I understand the logic here. So while
> encoding this operation, we estimate the size of the *next* operation:

Yes,

>
>> @@ -1466,6 +1791,22 @@ static const char *nfsd4_op_name(unsigned opnum)
>> return "unknown_operation";
>> }
>>
>> +u32 get_ops_max_respz(struct svc_rqst * rqstp)
>> +{
>> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
>> + struct nfsd4_compoundres *resp = rqstp->rq_resp;
>> + __be32 opnum = args->ops[resp->opcnt].opnum;
>> + __be32 length = 0;
>> +
>> + length = nfsd4_ops[opnum].op_enc_size * 4;
>> + if (nfsd4_ops[opnum].op_payload)
>> + length += nfsd4_ops[opnum].op_payload(rqstp);
>> +
>> + dprintk("%s opnum %u max reply %u\n", __func__, opnum, length);
>> +
>> + return length;
>> +}
>> +
>> #define nfsd4_voidres nfsd4_voidargs
>> struct nfsd4_voidargs { int dummy; };
>
> and we stick the result into the next op's status field:

Yes,

>
>> @@ -3374,10 +3373,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
>> dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
>> length, xb->page_len, tlen, pad);
>>
>> - if (length <= session->se_fchannel.maxresp_cached)
>> - return status;
>> - else
>> - return nfserr_rep_too_big_to_cache;
>> + if (length > session->se_fchannel.maxresp_sz)
>> + args->ops[resp->opcnt].status = nfserr_rep_too_big;
>> +
>> + if (slot->sl_cachethis == 1 &&
>> + length > session->se_fchannel.maxresp_cached)
>> + args->ops[resp->opcnt].status = nfserr_rep_too_big_to_cache;
>> +
>> + return 0;
>> }
>
> and then we check that status and use it when we process the next
> compound:

Yes,

>
>> @@ -1188,7 +1196,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
>> goto encode_op;
>> }
>>
>> - if (opdesc->op_func)
>> + if (op->status == 0 && opdesc->op_func)
>> op->status = opdesc->op_func(rqstp, cstate, &op->u);
>> else
>> BUG_ON(op->status == nfs_ok);
>
> Do I have that right?

Yes, you are right.

>
> Is there some reason we need to do it that way? Why not instead do
> something like:

It sounds great!
I will have a try.

>
> }
>
> + op->status == nfsd4_check_resp_size(resp)
> + if (op->status)
> + goto encode_op;
> if (opdesc->op_func)
> op->status = opdesc->op_func(rqstp-, cstate, &op->u);
>
> in nfsd4_proc_compound.
>
> A minor nitpick (already in the existing code):
>
>> @@ -3336,32 +3336,31 @@ static nfsd4_enc nfsd4_enc_ops[] = {
>> * Calculate the total amount of memory that the compound response has taken
>> * after encoding the current operation.
>> *
>> - * pad: add on 8 bytes for the next operation's op_code and status so that
>> - * there is room to cache a failure on the next operation.
>> - *
>> - * Compare this length to the session se_fmaxresp_cached.
>> + * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
>> *
>> * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
>> * will be at least a page and will therefore hold the xdr_buf head.
>> */
>> -static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
>> +static int nfsd4_check_resp_limit(struct nfsd4_compoundres *resp)
>> {
>> int status = 0;
>
> Status is *always* 0 in this function. Let's just get rid of it.

OK.

>
> Oh boy:
>
>> +static u32 nfsd4_enc_getattr_playload(struct svc_rqst *rqstp)
>> +{
>> + struct nfsd4_compoundargs *args = rqstp->rq_argp;
>> + struct nfsd4_compoundres *resp = rqstp->rq_resp;
>> + struct nfsd4_op op = args->ops[resp->opcnt];
>> + u32 *bmval = op.u.getattr.ga_bmval;
>> + u32 bmval0 = bmval[0];
>> + u32 bmval1 = bmval[1];
>> + u32 bmval2 = bmval[2];
>> + u32 mlen = 0, lc = 0;
>> +
>> + if (bmval2)
>> + mlen += 16;
>> + else if (bmval1)
>> + mlen += 12;
>> + else
>> + mlen += 8;
>> +
>> + if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
>> + if (!nfsd_suppattrs2(resp->cstate.minorversion))
>> + mlen += 12;
>> + else
>> + mlen += 16;
>> + }
>> +
>> + if (bmval0 & FATTR4_WORD0_TYPE)
>> + mlen += 4;
>> + if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE)
>> + mlen += 4;
>
>
> This is getting complicated....

Agree,

>
> The thing is, some of the most complicated ops (read, readdir, getattr)
> don't *really* matter that much, because they don't change anything on
> the filesystem, and don't change the server state in any way.
>
> So maybe we could handle operations in two different ways:
>
> - For operations that actually change something (write, rename,
> open, close, ...), do it the way we're doing it now: be
> very careful to estimate the size of the response before even
> processing the operation.
> - For operations that don't change anything (read, getattr, ...)
> just go ahead and do the operation. If you realize after the
> fact that the response is too large, then return the error at
> that point.
>
> So we'd add another flag to op_flags: say, OP_MODIFIES_SOMETHING. And for
> operations with OP_MODIFIES_SOMETHING set, we'd do the first thing. For
> operations without it set, we'd do the second.
>
> Would there be any problem with doing it that way?

No, I will try to do it as that.

Thanks for comment!


--
----
thanks
Mi Jinlong


2011-09-16 14:30:36

by J. Bruce Fields

[permalink] [raw]
Subject: Re: [PATCH v3] nfsd41: try to check reply size before operation

On Sun, Aug 28, 2011 at 06:18:56PM +0800, Mi Jinlong wrote:
> For checking the size of reply before calling a operation,
> we need try to get maxsize of the operation's reply.
>
> v3: using new method as Bruce said,
>
> "we could handle operations in two different ways:
>
> - For operations that actually change something (write, rename,
> open, close, ...), do it the way we're doing it now: be
> very careful to estimate the size of the response before even
> processing the operation.
> - For operations that don't change anything (read, getattr, ...)
> just go ahead and do the operation. If you realize after the
> fact that the response is too large, then return the error at
> that point.
>
> So we'd add another flag to op_flags: say, OP_MODIFIES_SOMETHING. And for
> operations with OP_MODIFIES_SOMETHING set, we'd do the first thing. For
> operations without it set, we'd do the second."
>

Thanks, Mi Jinlong, and apologies for the delay. This looks good to me.
I made a small change:

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 531d1a5..752a367 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1204,13 +1204,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}

- /* If ops is non-idempotent */
+ /* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
- if (opdesc->op_rsize_bop) {
- plen = opdesc->op_rsize_bop(rqstp, op);
- op->status = nfsd4_check_resp_size(resp, plen);
- } else
- op->status = nfserr_serverfault;
+ plen = opdesc->op_rsize_bop(rqstp, op);
+ op->status = nfsd4_check_resp_size(resp, plen);
}

if (op->status)

If we flag an operation with OP_MODIFIES_SOMETHING but fail to add an
op_rsize_bop, that's just a bug in our code, and a bug that we'll find
the very first time we try to use the operation. So we don't need to
handle that bug; just let the kernel crash with a null dereference
instead.

Applied, may be a few days till its pushed out to my public repo.

I've also updated

http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues

--b.

> Signed-off-by: Mi Jinlong <[email protected]>
> ---
> fs/nfsd/nfs4proc.c | 250 ++++++++++++++++++++++++++++++++++++++++++++++++----
> fs/nfsd/nfs4xdr.c | 37 ++++----
> fs/nfsd/xdr4.h | 1 +
> 3 files changed, 251 insertions(+), 37 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index e807776..9551e9a 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -35,6 +35,7 @@
> #include <linux/file.h>
> #include <linux/slab.h>
>
> +#include "idmap.h"
> #include "cache.h"
> #include "xdr4.h"
> #include "vfs.h"
> @@ -994,6 +995,8 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
>
> typedef __be32(*nfsd4op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
> void *);
> +typedef u32(*nfsd4op_rsize)(struct svc_rqst *, struct nfsd4_op *op);
> +
> enum nfsd4_op_flags {
> ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
> ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
> @@ -1001,6 +1004,7 @@ enum nfsd4_op_flags {
> /* For rfc 5661 section 2.6.3.1.1: */
> OP_HANDLES_WRONGSEC = 1 << 3,
> OP_IS_PUTFH_LIKE = 1 << 4,
> + OP_MODIFIES_SOMETHING = 1 << 5, /* ops is non-idempotent */
> };
>
> struct nfsd4_operation {
> @@ -1016,6 +1020,8 @@ struct nfsd4_operation {
> * the v4.0 case).
> */
> bool op_cacheresult;
> + /* Try to get response size before operation */
> + nfsd4op_rsize op_rsize_bop;
> };
>
> static struct nfsd4_operation nfsd4_ops[];
> @@ -1110,6 +1116,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
> struct nfsd4_operation *opdesc;
> struct nfsd4_compound_state *cstate = &resp->cstate;
> int slack_bytes;
> + u32 plen = 0;
> __be32 status;
>
> resp->xbuf = &rqstp->rq_res;
> @@ -1188,6 +1195,18 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
> goto encode_op;
> }
>
> + /* If ops is non-idempotent */
> + if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
> + if (opdesc->op_rsize_bop) {
> + plen = opdesc->op_rsize_bop(rqstp, op);
> + op->status = nfsd4_check_resp_size(resp, plen);
> + } else
> + op->status = nfserr_serverfault;
> + }
> +
> + if (op->status)
> + goto encode_op;
> +
> if (opdesc->op_func)
> op->status = opdesc->op_func(rqstp, cstate, &op->u);
> else
> @@ -1238,6 +1257,144 @@ out:
> return status;
> }
>
> +#define op_encode_hdr_size (2)
> +#define op_encode_stateid_maxsz (XDR_QUADLEN(NFS4_STATEID_SIZE))
> +#define op_encode_verifier_maxsz (XDR_QUADLEN(NFS4_VERIFIER_SIZE))
> +#define op_encode_change_info_maxsz (5)
> +#define nfs4_fattr_bitmap_maxsz (4)
> +
> +#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
> +#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
> +
> +#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
> +
> +#define op_encode_ace_maxsz (3 + nfs4_owner_maxsz)
> +#define op_encode_delegation_maxsz (1 + op_encode_stateid_maxsz + 1 + \
> + op_encode_ace_maxsz)
> +
> +#define op_encode_channel_attrs_maxsz (6 + 1 + 1)
> +
> +static inline u32 nfsd4_only_status_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_status_stateid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_stateid_maxsz)* sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_commit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_change_info_maxsz
> + + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_change_info_maxsz)
> + * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_lock_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_lock_denied_maxsz)
> + * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_open_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_stateid_maxsz
> + + op_encode_change_info_maxsz + 1
> + + nfs4_fattr_bitmap_maxsz
> + + op_encode_delegation_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + u32 maxcount = 0, rlen = 0;
> +
> + maxcount = svc_max_payload(rqstp);
> + rlen = op->u.read.rd_length;
> +
> + if (rlen > maxcount)
> + rlen = maxcount;
> +
> + return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
> +}
> +
> +static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + u32 rlen = op->u.readdir.rd_maxcount;
> +
> + if (rlen > PAGE_SIZE)
> + rlen = PAGE_SIZE;
> +
> + return (op_encode_hdr_size + op_encode_verifier_maxsz)
> + * sizeof(__be32) + rlen;
> +}
> +
> +static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_change_info_maxsz)
> + * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_change_info_maxsz
> + + op_encode_change_info_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + 2 + 1024) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + 2 + 1 + /* eir_clientid, eir_sequenceid */\
> + 1 + 1 + 0 + /* eir_flags, spr_how, SP4_NONE (for now) */\
> + 2 + /*eir_server_owner.so_minor_id */\
> + /* eir_server_owner.so_major_id<> */\
> + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
> + /* eir_server_scope<> */\
> + XDR_QUADLEN(NFS4_OPAQUE_LIMIT) + 1 +\
> + 1 + /* eir_server_impl_id array length */\
> + 0 /* ignored eir_server_impl_id contents */) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_bind_conn_to_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + \
> + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* bctsr_sessid */\
> + 2 /* bctsr_dir, use_conn_in_rdma_mode */) * sizeof(__be32);
> +}
> +
> +static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
> +{
> + return (op_encode_hdr_size + \
> + XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + /* sessionid */\
> + 2 + /* csr_sequence, csr_flags */\
> + op_encode_channel_attrs_maxsz + \
> + op_encode_channel_attrs_maxsz) * sizeof(__be32);
> +}
> +
> static struct nfsd4_operation nfsd4_ops[] = {
> [OP_ACCESS] = {
> .op_func = (nfsd4op_func)nfsd4_access,
> @@ -1245,20 +1402,28 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_CLOSE] = {
> .op_func = (nfsd4op_func)nfsd4_close,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_CLOSE",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
> },
> [OP_COMMIT] = {
> .op_func = (nfsd4op_func)nfsd4_commit,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_COMMIT",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_commit_rsize,
> },
> [OP_CREATE] = {
> .op_func = (nfsd4op_func)nfsd4_create,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_CREATE",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_rsize,
> },
> [OP_DELEGRETURN] = {
> .op_func = (nfsd4op_func)nfsd4_delegreturn,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_DELEGRETURN",
> + .op_rsize_bop = nfsd4_only_status_rsize,
> },
> [OP_GETATTR] = {
> .op_func = (nfsd4op_func)nfsd4_getattr,
> @@ -1271,12 +1436,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_LINK] = {
> .op_func = (nfsd4op_func)nfsd4_link,
> + .op_flags = ALLOWED_ON_ABSENT_FS | OP_MODIFIES_SOMETHING,
> .op_name = "OP_LINK",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_link_rsize,
> },
> [OP_LOCK] = {
> .op_func = (nfsd4op_func)nfsd4_lock,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_LOCK",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_lock_rsize,
> },
> [OP_LOCKT] = {
> .op_func = (nfsd4op_func)nfsd4_lockt,
> @@ -1284,7 +1453,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_LOCKU] = {
> .op_func = (nfsd4op_func)nfsd4_locku,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_LOCKU",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
> },
> [OP_LOOKUP] = {
> .op_func = (nfsd4op_func)nfsd4_lookup,
> @@ -1302,42 +1473,54 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_OPEN] = {
> .op_func = (nfsd4op_func)nfsd4_open,
> - .op_flags = OP_HANDLES_WRONGSEC,
> + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
> .op_name = "OP_OPEN",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_open_rsize,
> },
> [OP_OPEN_CONFIRM] = {
> .op_func = (nfsd4op_func)nfsd4_open_confirm,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_OPEN_CONFIRM",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
> },
> [OP_OPEN_DOWNGRADE] = {
> .op_func = (nfsd4op_func)nfsd4_open_downgrade,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_OPEN_DOWNGRADE",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_status_stateid_rsize,
> },
> [OP_PUTFH] = {
> .op_func = (nfsd4op_func)nfsd4_putfh,
> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> - | OP_IS_PUTFH_LIKE,
> + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
> .op_name = "OP_PUTFH",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_PUTPUBFH] = {
> .op_func = (nfsd4op_func)nfsd4_putrootfh,
> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> - | OP_IS_PUTFH_LIKE,
> + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
> .op_name = "OP_PUTPUBFH",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_PUTROOTFH] = {
> .op_func = (nfsd4op_func)nfsd4_putrootfh,
> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> - | OP_IS_PUTFH_LIKE,
> + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
> .op_name = "OP_PUTROOTFH",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_READ] = {
> .op_func = (nfsd4op_func)nfsd4_read,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_READ",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
> },
> [OP_READDIR] = {
> .op_func = (nfsd4op_func)nfsd4_readdir,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_READDIR",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
> },
> [OP_READLINK] = {
> .op_func = (nfsd4op_func)nfsd4_readlink,
> @@ -1345,29 +1528,38 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_REMOVE] = {
> .op_func = (nfsd4op_func)nfsd4_remove,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_REMOVE",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_remove_rsize,
> },
> [OP_RENAME] = {
> - .op_name = "OP_RENAME",
> .op_func = (nfsd4op_func)nfsd4_rename,
> + .op_flags = OP_MODIFIES_SOMETHING,
> + .op_name = "OP_RENAME",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_rename_rsize,
> },
> [OP_RENEW] = {
> .op_func = (nfsd4op_func)nfsd4_renew,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_RENEW",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> +
> },
> [OP_RESTOREFH] = {
> .op_func = (nfsd4op_func)nfsd4_restorefh,
> .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> - | OP_IS_PUTFH_LIKE,
> + | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING,
> .op_name = "OP_RESTOREFH",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_SAVEFH] = {
> .op_func = (nfsd4op_func)nfsd4_savefh,
> - .op_flags = OP_HANDLES_WRONGSEC,
> + .op_flags = OP_HANDLES_WRONGSEC | OP_MODIFIES_SOMETHING,
> .op_name = "OP_SAVEFH",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_SECINFO] = {
> .op_func = (nfsd4op_func)nfsd4_secinfo,
> @@ -1377,19 +1569,25 @@ static struct nfsd4_operation nfsd4_ops[] = {
> [OP_SETATTR] = {
> .op_func = (nfsd4op_func)nfsd4_setattr,
> .op_name = "OP_SETATTR",
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setattr_rsize,
> },
> [OP_SETCLIENTID] = {
> .op_func = (nfsd4op_func)nfsd4_setclientid,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_SETCLIENTID",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_setclientid_rsize,
> },
> [OP_SETCLIENTID_CONFIRM] = {
> .op_func = (nfsd4op_func)nfsd4_setclientid_confirm,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_SETCLIENTID_CONFIRM",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_VERIFY] = {
> .op_func = (nfsd4op_func)nfsd4_verify,
> @@ -1397,35 +1595,47 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_WRITE] = {
> .op_func = (nfsd4op_func)nfsd4_write,
> + .op_flags = OP_MODIFIES_SOMETHING,
> .op_name = "OP_WRITE",
> .op_cacheresult = true,
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_write_rsize,
> },
> [OP_RELEASE_LOCKOWNER] = {
> .op_func = (nfsd4op_func)nfsd4_release_lockowner,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_RELEASE_LOCKOWNER",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
>
> /* NFSv4.1 operations */
> [OP_EXCHANGE_ID] = {
> .op_func = (nfsd4op_func)nfsd4_exchange_id,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_EXCHANGE_ID",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_exchange_id_rsize,
> },
> [OP_BIND_CONN_TO_SESSION] = {
> .op_func = (nfsd4op_func)nfsd4_bind_conn_to_session,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_BIND_CONN_TO_SESSION",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_bind_conn_to_session_rsize,
> },
> [OP_CREATE_SESSION] = {
> .op_func = (nfsd4op_func)nfsd4_create_session,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_CREATE_SESSION",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_create_session_rsize,
> },
> [OP_DESTROY_SESSION] = {
> .op_func = (nfsd4op_func)nfsd4_destroy_session,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_DESTROY_SESSION",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_SEQUENCE] = {
> .op_func = (nfsd4op_func)nfsd4_sequence,
> @@ -1434,13 +1644,16 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_DESTROY_CLIENTID] = {
> .op_func = NULL,
> - .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP,
> + .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_AS_FIRST_OP
> + | OP_MODIFIES_SOMETHING,
> .op_name = "OP_DESTROY_CLIENTID",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_RECLAIM_COMPLETE] = {
> .op_func = (nfsd4op_func)nfsd4_reclaim_complete,
> - .op_flags = ALLOWED_WITHOUT_FH,
> + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
> .op_name = "OP_RECLAIM_COMPLETE",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> [OP_SECINFO_NO_NAME] = {
> .op_func = (nfsd4op_func)nfsd4_secinfo_no_name,
> @@ -1454,8 +1667,9 @@ static struct nfsd4_operation nfsd4_ops[] = {
> },
> [OP_FREE_STATEID] = {
> .op_func = (nfsd4op_func)nfsd4_free_stateid,
> - .op_flags = ALLOWED_WITHOUT_FH,
> + .op_flags = ALLOWED_WITHOUT_FH | OP_MODIFIES_SOMETHING,
> .op_name = "OP_FREE_STATEID",
> + .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
> },
> };
>
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index c8bf405..b7a3c69 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -3334,34 +3334,29 @@ static nfsd4_enc nfsd4_enc_ops[] = {
>
> /*
> * Calculate the total amount of memory that the compound response has taken
> - * after encoding the current operation.
> + * after encoding the current operation with pad.
> *
> - * pad: add on 8 bytes for the next operation's op_code and status so that
> - * there is room to cache a failure on the next operation.
> + * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
> + * which was specified at nfsd4_operation, else pad is zero.
> *
> - * Compare this length to the session se_fmaxresp_cached.
> + * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
> *
> * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
> * will be at least a page and will therefore hold the xdr_buf head.
> */
> -static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
> +int nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
> {
> - int status = 0;
> struct xdr_buf *xb = &resp->rqstp->rq_res;
> - struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
> struct nfsd4_session *session = NULL;
> struct nfsd4_slot *slot = resp->cstate.slot;
> - u32 length, tlen = 0, pad = 8;
> + u32 length, tlen = 0;
>
> if (!nfsd4_has_session(&resp->cstate))
> - return status;
> + return 0;
>
> session = resp->cstate.session;
> - if (session == NULL || slot->sl_cachethis == 0)
> - return status;
> -
> - if (resp->opcnt >= args->opcnt)
> - pad = 0; /* this is the last operation */
> + if (session == NULL)
> + return 0;
>
> if (xb->page_len == 0) {
> length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
> @@ -3374,10 +3369,14 @@ static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
> dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
> length, xb->page_len, tlen, pad);
>
> - if (length <= session->se_fchannel.maxresp_cached)
> - return status;
> - else
> + if (length > session->se_fchannel.maxresp_sz)
> + return nfserr_rep_too_big;
> +
> + if (slot->sl_cachethis == 1 &&
> + length > session->se_fchannel.maxresp_cached)
> return nfserr_rep_too_big_to_cache;
> +
> + return 0;
> }
>
> void
> @@ -3397,8 +3396,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
> !nfsd4_enc_ops[op->opnum]);
> op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
> /* nfsd4_check_drc_limit guarantees enough room for error status */
> - if (!op->status && nfsd4_check_drc_limit(resp))
> - op->status = nfserr_rep_too_big_to_cache;
> + if (!op->status)
> + op->status = nfsd4_check_resp_size(resp, 0);
> status:
> /*
> * Note: We write the status directly, instead of using WRITE32(),
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index d2a8d044..6b5496b 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -532,6 +532,7 @@ int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
> struct nfsd4_compoundargs *);
> int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
> struct nfsd4_compoundres *);
> +int nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
> void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
> void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
> __be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
> --
> 1.7.6
>
>
>