2019-02-11 16:24:47

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 00/23] NFS client patches for v5.1 (complete)

Hi Anna-

Here's the complete series of my patches that are ready for the v5.1
merge window. All of them have been previously posted for review.
There are four main sets:

- the first three are RPC-over-RDMA-related fixes
- the next seven are observability improvements
- the next eight overhaul the RPC auth layer to use xdr_stream
- the last five reduce the size of RPC receive buffers

---

Chuck Lever (23):
xprtrdma: Fix sparse warnings
xprtrdma: Check inline size before providing a Write chunk
xprtrdma: Reduce the doorbell rate (Receive)
SUNRPC: Display symbolic flag names in RPC trace events
SUNRPC: Add xdr_stream::rqst field
SUNRPC: Add XDR overflow trace event
SUNRPC: Add trace event that reports reply page vector alignment
NFS: Remove print_overflow_msg()
NFS: Add trace events to report non-zero NFS status codes
SUNRPC: Remove some dprintk() call sites from auth functions
SUNRPC: Remove rpc_xprt::tsh_size
SUNRPC: Add build option to disable support for insecure enctypes
SUNRPC: Use struct xdr_stream when constructing RPC Call header
SUNRPC: Clean up rpc_verify_header()
SUNRPC: Use struct xdr_stream when decoding RPC Reply header
SUNRPC: Introduce trace points in rpc_auth_gss.ko
SUNRPC: Remove xdr_buf_trim()
SUNRPC: Add SPDX IDs to some net/sunrpc/auth_gss/ files
SUNRPC: Introduce rpc_prepare_reply_pages()
NFS: Account for XDR pad of buf->pages
SUNRPC: Make AUTH_SYS and AUTH_NULL set au_verfsize
SUNRPC: Add rpc_auth::au_ralign field
SUNRPC: Use au_rslack when computing reply buffer size


fs/lockd/clnt4xdr.c | 14 -
fs/lockd/clntxdr.c | 14 -
fs/nfs/callback_xdr.c | 64 +--
fs/nfs/flexfilelayout/flexfilelayout.c | 2
fs/nfs/nfs2xdr.c | 124 ++----
fs/nfs/nfs3xdr.c | 209 +++--------
fs/nfs/nfs42xdr.c | 21 -
fs/nfs/nfs4trace.h | 25 +
fs/nfs/nfs4xdr.c | 529 ++++++++-------------------
fs/nfs/nfstrace.c | 1
fs/nfs/nfstrace.h | 85 ++++
fs/nfsd/nfs4callback.c | 13 -
include/linux/sunrpc/auth.h | 44 +-
include/linux/sunrpc/clnt.h | 3
include/linux/sunrpc/gss_krb5_enctypes.h | 42 ++
include/linux/sunrpc/xdr.h | 23 +
include/linux/sunrpc/xprt.h | 7
include/trace/events/rpcgss.h | 361 ++++++++++++++++++
include/trace/events/rpcrdma.h | 12 +
include/trace/events/sunrpc.h | 361 ++++++++++++++++++
net/sunrpc/Kconfig | 16 +
net/sunrpc/auth.c | 136 ++++---
net/sunrpc/auth_gss/Makefile | 2
net/sunrpc/auth_gss/auth_gss.c | 551 ++++++++++++++--------------
net/sunrpc/auth_gss/gss_krb5_mech.c | 29 -
net/sunrpc/auth_gss/gss_krb5_wrap.c | 8
net/sunrpc/auth_gss/gss_mech_switch.c | 27 -
net/sunrpc/auth_gss/gss_rpc_upcall.c | 15 -
net/sunrpc/auth_gss/gss_rpc_upcall.h | 16 -
net/sunrpc/auth_gss/gss_rpc_xdr.c | 15 -
net/sunrpc/auth_gss/gss_rpc_xdr.h | 17 -
net/sunrpc/auth_gss/svcauth_gss.c | 3
net/sunrpc/auth_gss/trace.c | 11 +
net/sunrpc/auth_null.c | 56 ++-
net/sunrpc/auth_unix.c | 120 ++++--
net/sunrpc/clnt.c | 390 ++++++++++----------
net/sunrpc/svc.c | 19 -
net/sunrpc/xdr.c | 121 +++---
net/sunrpc/xprt.c | 10 -
net/sunrpc/xprtrdma/backchannel.c | 2
net/sunrpc/xprtrdma/frwr_ops.c | 4
net/sunrpc/xprtrdma/rpc_rdma.c | 22 +
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1
net/sunrpc/xprtrdma/transport.c | 1
net/sunrpc/xprtrdma/verbs.c | 2
net/sunrpc/xprtrdma/xprt_rdma.h | 12 +
net/sunrpc/xprtsock.c | 91 +++--
47 files changed, 2071 insertions(+), 1580 deletions(-)
create mode 100644 include/trace/events/rpcgss.h
create mode 100644 net/sunrpc/auth_gss/trace.c

--
Chuck Lever


2019-02-11 16:23:50

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 01/23] xprtrdma: Fix sparse warnings

linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: warning: incorrect type in argument 5 (different base types)
linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: expected unsigned int [usertype] xid
linux/net/sunrpc/xprtrdma/rpc_rdma.c:375:63: got restricted __be32 [usertype] rq_xid
linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: warning: incorrect type in argument 5 (different base types)
linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: expected unsigned int [usertype] xid
linux/net/sunrpc/xprtrdma/rpc_rdma.c:432:62: got restricted __be32 [usertype] rq_xid
linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: warning: incorrect type in argument 5 (different base types)
linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: expected unsigned int [usertype] xid
linux/net/sunrpc/xprtrdma/rpc_rdma.c:489:62: got restricted __be32 [usertype] rq_xid

Fixes: 0a93fbcb16e6 ("xprtrdma: Plant XID in on-the-wire RDMA ... ")
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/frwr_ops.c | 4 ++--
net/sunrpc/xprtrdma/xprt_rdma.h | 2 +-
2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 6a56105..52cb6c1 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -391,7 +391,7 @@ size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt)
*/
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, u32 xid,
+ int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
@@ -446,7 +446,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
goto out_mapmr_err;

ibmr->iova &= 0x00000000ffffffff;
- ibmr->iova |= ((u64)cpu_to_be32(xid)) << 32;
+ ibmr->iova |= ((u64)be32_to_cpu(xid)) << 32;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);

diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 5a18472..33db208 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -577,7 +577,7 @@ int frwr_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
size_t frwr_maxpages(struct rpcrdma_xprt *r_xprt);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, u32 xid,
+ int nsegs, bool writing, __be32 xid,
struct rpcrdma_mr **mr);
int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req);
void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs);


2019-02-11 16:23:53

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 02/23] xprtrdma: Check inline size before providing a Write chunk

In very rare cases, an NFS READ operation might predict that the
non-payload part of the RPC Call is large. For instance, an
NFSv4 COMPOUND with a large GETATTR result, in combination with a
large Kerberos credential, could push the non-payload part to be
several kilobytes.

If the non-payload part is larger than the connection's inline
threshold, the client is required to provision a Reply chunk. The
current Linux client does not check for this case. There are two
obvious ways to handle it:

a. Provision a Write chunk for the payload and a Reply chunk for
the non-payload part

b. Provision a Reply chunk for the whole RPC Reply

Some testing at a recent NFS bake-a-thon showed that servers can
mostly handle a. but there are some corner cases that do not work
yet. b. already works (it has to, to handle krb5i/p), but could be
somewhat less efficient. However, I expect this scenario to be very
rare -- no-one has reported a problem yet.

So I'm going to implement b. Sometime later I will provide some
patches to help make b. a little more efficient by more carefully
choosing the Reply chunk's segment sizes to ensure the payload is
optimally aligned.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/rpc_rdma.c | 18 +++++++++++++++++-
1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index d18614e..7774aee 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -164,6 +164,21 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read;
}

+/* The client is required to provide a Reply chunk if the maximum
+ * size of the non-payload part of the RPC Reply is larger than
+ * the inline threshold.
+ */
+static bool
+rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt,
+ const struct rpc_rqst *rqst)
+{
+ const struct xdr_buf *buf = &rqst->rq_rcv_buf;
+ const struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+
+ return buf->head[0].iov_len + buf->tail[0].iov_len <
+ ia->ri_max_inline_read;
+}
+
/* Split @vec on page boundaries into SGEs. FMR registers pages, not
* a byte range. Other modes coalesce these SGEs into a single MR
* when they can.
@@ -762,7 +777,8 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,
*/
if (rpcrdma_results_inline(r_xprt, rqst))
wtype = rpcrdma_noch;
- else if (ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ)
+ else if ((ddp_allowed && rqst->rq_rcv_buf.flags & XDRBUF_READ) &&
+ rpcrdma_nonpayload_inline(r_xprt, rqst))
wtype = rpcrdma_writech;
else
wtype = rpcrdma_replych;


2019-02-11 16:23:58

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 03/23] xprtrdma: Reduce the doorbell rate (Receive)

Post RECV WRs in batches to reduce the hardware doorbell rate per
transport. This helps the RPC-over-RDMA client scale better in
number of transports.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/xprtrdma/verbs.c | 2 ++
net/sunrpc/xprtrdma/xprt_rdma.h | 10 ++++++++++
2 files changed, 12 insertions(+)

diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 4994e75..b4e997d 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -1480,6 +1480,8 @@ struct rpcrdma_regbuf *
if (ep->rep_receive_count > needed)
goto out;
needed -= ep->rep_receive_count;
+ if (!temp)
+ needed += RPCRDMA_MAX_RECV_BATCH;

count = 0;
wr = NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 33db208..10f6593 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -205,6 +205,16 @@ struct rpcrdma_rep {
struct ib_recv_wr rr_recv_wr;
};

+/* To reduce the rate at which a transport invokes ib_post_recv
+ * (and thus the hardware doorbell rate), xprtrdma posts Receive
+ * WRs in batches.
+ *
+ * Setting this to zero disables Receive post batching.
+ */
+enum {
+ RPCRDMA_MAX_RECV_BATCH = 7,
+};
+
/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes
*/
struct rpcrdma_req;


2019-02-11 16:24:03

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 04/23] SUNRPC: Display symbolic flag names in RPC trace events

Human-readable flags make it easier to observe RPC scheduling
decisions and other operational details.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 56 +++++++++++++++++++++++++++++++++++++----
1 file changed, 50 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0d5d0d9..f88b0f5 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -77,6 +77,50 @@
)
);

+TRACE_DEFINE_ENUM(RPC_TASK_ASYNC);
+TRACE_DEFINE_ENUM(RPC_TASK_SWAPPER);
+TRACE_DEFINE_ENUM(RPC_CALL_MAJORSEEN);
+TRACE_DEFINE_ENUM(RPC_TASK_ROOTCREDS);
+TRACE_DEFINE_ENUM(RPC_TASK_DYNAMIC);
+TRACE_DEFINE_ENUM(RPC_TASK_KILLED);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFT);
+TRACE_DEFINE_ENUM(RPC_TASK_SOFTCONN);
+TRACE_DEFINE_ENUM(RPC_TASK_SENT);
+TRACE_DEFINE_ENUM(RPC_TASK_TIMEOUT);
+TRACE_DEFINE_ENUM(RPC_TASK_NOCONNECT);
+TRACE_DEFINE_ENUM(RPC_TASK_NO_RETRANS_TIMEOUT);
+
+#define rpc_show_task_flags(flags) \
+ __print_flags(flags, "|", \
+ { RPC_TASK_ASYNC, "ASYNC" }, \
+ { RPC_TASK_SWAPPER, "SWAPPER" }, \
+ { RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
+ { RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
+ { RPC_TASK_DYNAMIC, "DYNAMIC" }, \
+ { RPC_TASK_KILLED, "KILLED" }, \
+ { RPC_TASK_SOFT, "SOFT" }, \
+ { RPC_TASK_SOFTCONN, "SOFTCONN" }, \
+ { RPC_TASK_SENT, "SENT" }, \
+ { RPC_TASK_TIMEOUT, "TIMEOUT" }, \
+ { RPC_TASK_NOCONNECT, "NOCONNECT" }, \
+ { RPC_TASK_NO_RETRANS_TIMEOUT, "NORTO" })
+
+TRACE_DEFINE_ENUM(RPC_TASK_RUNNING);
+TRACE_DEFINE_ENUM(RPC_TASK_QUEUED);
+TRACE_DEFINE_ENUM(RPC_TASK_ACTIVE);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_XMIT);
+TRACE_DEFINE_ENUM(RPC_TASK_NEED_RECV);
+TRACE_DEFINE_ENUM(RPC_TASK_MSG_PIN_WAIT);
+
+#define rpc_show_runstate(flags) \
+ __print_flags(flags, "|", \
+ { (1UL << RPC_TASK_RUNNING), "RUNNING" }, \
+ { (1UL << RPC_TASK_QUEUED), "QUEUED" }, \
+ { (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
+ { (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
+ { (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
+ { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
+
DECLARE_EVENT_CLASS(rpc_task_running,

TP_PROTO(const struct rpc_task *task, const void *action),
@@ -102,10 +146,10 @@
__entry->flags = task->tk_flags;
),

- TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d action=%pf",
+ TP_printk("task:%u@%d flags=%s runstate=%s status=%d action=%pf",
__entry->task_id, __entry->client_id,
- __entry->flags,
- __entry->runstate,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->action
)
@@ -149,10 +193,10 @@
__assign_str(q_name, rpc_qname(q));
),

- TP_printk("task:%u@%d flags=%4.4x state=%4.4lx status=%d timeout=%lu queue=%s",
+ TP_printk("task:%u@%d flags=%s runstate=%s status=%d timeout=%lu queue=%s",
__entry->task_id, __entry->client_id,
- __entry->flags,
- __entry->runstate,
+ rpc_show_task_flags(__entry->flags),
+ rpc_show_runstate(__entry->runstate),
__entry->status,
__entry->timeout,
__get_str(q_name)


2019-02-11 16:24:09

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 05/23] SUNRPC: Add xdr_stream::rqst field

Having access to the controlling rpc_rqst means a trace point in the
XDR code can report:

- the XID
- the task ID and client ID
- the p_name of RPC being processed

Subsequent patches will introduce such trace points.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/callback_xdr.c | 5 +++--
fs/nfs/flexfilelayout/flexfilelayout.c | 2 +-
include/linux/sunrpc/xdr.h | 8 ++++++--
net/sunrpc/auth.c | 4 ++--
net/sunrpc/auth_gss/auth_gss.c | 4 ++--
net/sunrpc/xdr.c | 12 +++++++++---
net/sunrpc/xprtrdma/backchannel.c | 2 +-
net/sunrpc/xprtrdma/rpc_rdma.c | 4 ++--
8 files changed, 26 insertions(+), 15 deletions(-)

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index a87a562..bc7c176 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -943,10 +943,11 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp)
};
unsigned int nops = 0;

- xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
+ xdr_init_decode(&xdr_in, &rqstp->rq_arg,
+ rqstp->rq_arg.head[0].iov_base, NULL);

p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
- xdr_init_encode(&xdr_out, &rqstp->rq_res, p);
+ xdr_init_encode(&xdr_out, &rqstp->rq_res, p, NULL);

status = decode_compound_hdr_arg(&xdr_in, &hdr_arg);
if (status == htonl(NFS4ERR_RESOURCE))
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 63abe70..32701b6 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -2036,7 +2036,7 @@ static void ff_layout_encode_iostats_array(struct xdr_stream *xdr,

dprintk("%s: Begin\n", __func__);

- xdr_init_encode(&tmp_xdr, &tmp_buf, NULL);
+ xdr_init_encode(&tmp_xdr, &tmp_buf, NULL, NULL);

ff_layout_encode_ioerr(&tmp_xdr, args, ff_args);
ff_layout_encode_iostats_array(&tmp_xdr, args, ff_args);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 2ec1280..787939d 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -217,6 +217,8 @@ struct xdr_stream {
struct kvec scratch; /* Scratch buffer */
struct page **page_ptr; /* pointer to the current page */
unsigned int nwords; /* Remaining decode buffer length */
+
+ struct rpc_rqst *rqst; /* For debugging */
};

/*
@@ -227,7 +229,8 @@ typedef void (*kxdreproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
void *obj);

-extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf,
+ __be32 *p, struct rpc_rqst *rqst);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern void xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
@@ -235,7 +238,8 @@ typedef int (*kxdrdproc_t)(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
-extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
+extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
+ __be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index f3023bb..8dfab61 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -798,7 +798,7 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct xdr_stream xdr;

- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data);
+ xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp);
encode(rqstp, &xdr, obj);
}

@@ -823,7 +823,7 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct xdr_stream xdr;

- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data);
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp);
return decode(rqstp, &xdr, obj);
}

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 1531b02..a42672e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1722,7 +1722,7 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct xdr_stream xdr;

- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p);
+ xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp);
encode(rqstp, &xdr, obj);
}

@@ -1998,7 +1998,7 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct xdr_stream xdr;

- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp);
return decode(rqstp, &xdr, obj);
}

diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f302c6e..345f08b 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -483,6 +483,7 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer in which to encode data
* @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
*
* Note: at the moment the RPC client only passes the length of our
* scratch buffer in the xdr_buf's header kvec. Previously this
@@ -491,7 +492,8 @@ unsigned int xdr_stream_pos(const struct xdr_stream *xdr)
* of the buffer length, and takes care of adjusting the kvec
* length for us.
*/
-void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct rpc_rqst *rqst)
{
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
@@ -513,6 +515,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
buf->len += len;
iov->iov_len += len;
}
+ xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_encode);

@@ -819,8 +822,10 @@ static bool xdr_set_next_buffer(struct xdr_stream *xdr)
* @xdr: pointer to xdr_stream struct
* @buf: pointer to XDR buffer from which to decode data
* @p: current pointer inside XDR buffer
+ * @rqst: pointer to controlling rpc_rqst, for debugging
*/
-void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
+void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
+ struct rpc_rqst *rqst)
{
xdr->buf = buf;
xdr->scratch.iov_base = NULL;
@@ -836,6 +841,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
xdr->nwords -= p - xdr->p;
xdr->p = p;
}
+ xdr->rqst = rqst;
}
EXPORT_SYMBOL_GPL(xdr_init_decode);

@@ -854,7 +860,7 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
buf->page_len = len;
buf->buflen = len;
buf->len = len;
- xdr_init_decode(xdr, buf, NULL);
+ xdr_init_decode(xdr, buf, NULL, NULL);
}
EXPORT_SYMBOL_GPL(xdr_init_decode_pages);

diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c
index 0de9b3e..98c1e43 100644
--- a/net/sunrpc/xprtrdma/backchannel.c
+++ b/net/sunrpc/xprtrdma/backchannel.c
@@ -123,7 +123,7 @@ static int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst)

rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base);
+ req->rl_rdmabuf->rg_base, rqst);

p = xdr_reserve_space(&req->rl_stream, 28);
if (unlikely(!p))
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 7774aee..6c1fb27 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -748,7 +748,7 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt,

rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
- req->rl_rdmabuf->rg_base);
+ req->rl_rdmabuf->rg_base, rqst);

/* Fixed header fields */
ret = -EMSGSIZE;
@@ -1329,7 +1329,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)

/* Fixed transport header fields */
xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf,
- rep->rr_hdrbuf.head[0].iov_base);
+ rep->rr_hdrbuf.head[0].iov_base, NULL);
p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p));
if (unlikely(!p))
goto out_shortreply;


2019-02-11 16:24:14

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 06/23] SUNRPC: Add XDR overflow trace event

This can help field troubleshooting without needing the overhead of
a full network capture (ie, tcpdump).

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 67 +++++++++++++++++++++++++++++++++++++++++
net/sunrpc/xdr.c | 24 ++++++++++-----
2 files changed, 84 insertions(+), 7 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index f88b0f5..fbc41b8 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -254,6 +254,73 @@
__entry->backlog, __entry->rtt, __entry->execute)
);

+TRACE_EVENT(rpc_xdr_overflow,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ size_t requested
+ ),
+
+ TP_ARGS(xdr, requested),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(int, version)
+ __field(size_t, requested)
+ __field(const void *, end)
+ __field(const void *, p)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ __field(unsigned int, len)
+ __string(progname,
+ xdr->rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ if (xdr->rqst) {
+ const struct rpc_task *task = xdr->rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+ } else {
+ __entry->task_id = 0;
+ __entry->client_id = 0;
+ __assign_str(progname, "unknown")
+ __entry->version = 0;
+ __assign_str(procedure, "unknown")
+ }
+ __entry->requested = requested;
+ __entry->end = xdr->end;
+ __entry->p = xdr->p;
+ __entry->head_base = xdr->buf->head[0].iov_base,
+ __entry->head_len = xdr->buf->head[0].iov_len,
+ __entry->page_len = xdr->buf->page_len,
+ __entry->tail_base = xdr->buf->tail[0].iov_base,
+ __entry->tail_len = xdr->buf->tail[0].iov_len,
+ __entry->len = xdr->buf->len;
+ ),
+
+ TP_printk(
+ "task:%u@%u %sv%d %s requested=%zu p=%p end=%p xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->requested, __entry->p, __entry->end,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len,
+ __entry->len
+ )
+);
+
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 345f08b..6d0b615 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -16,6 +16,7 @@
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
#include <linux/bvec.h>
+#include <trace/events/sunrpc.h>

/*
* XDR functions for basic NFS types
@@ -554,9 +555,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
int frag1bytes, frag2bytes;

if (nbytes > PAGE_SIZE)
- return NULL; /* Bigger buffers require special handling */
+ goto out_overflow; /* Bigger buffers require special handling */
if (xdr->buf->len + nbytes > xdr->buf->buflen)
- return NULL; /* Sorry, we're totally out of space */
+ goto out_overflow; /* Sorry, we're totally out of space */
frag1bytes = (xdr->end - xdr->p) << 2;
frag2bytes = nbytes - frag1bytes;
if (xdr->iov)
@@ -585,6 +586,9 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}

/**
@@ -902,20 +906,23 @@ static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
size_t cplen = (char *)xdr->end - (char *)xdr->p;

if (nbytes > xdr->scratch.iov_len)
- return NULL;
+ goto out_overflow;
p = __xdr_inline_decode(xdr, cplen);
if (p == NULL)
return NULL;
memcpy(cpdest, p, cplen);
+ if (!xdr_set_next_buffer(xdr))
+ goto out_overflow;
cpdest += cplen;
nbytes -= cplen;
- if (!xdr_set_next_buffer(xdr))
- return NULL;
p = __xdr_inline_decode(xdr, nbytes);
if (p == NULL)
return NULL;
memcpy(cpdest, p, nbytes);
return xdr->scratch.iov_base;
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}

/**
@@ -932,14 +939,17 @@ __be32 * xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;

- if (nbytes == 0)
+ if (unlikely(nbytes == 0))
return xdr->p;
if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
- return NULL;
+ goto out_overflow;
p = __xdr_inline_decode(xdr, nbytes);
if (p != NULL)
return p;
return xdr_copy_to_scratch(xdr, nbytes);
+out_overflow:
+ trace_rpc_xdr_overflow(xdr, nbytes);
+ return NULL;
}
EXPORT_SYMBOL_GPL(xdr_inline_decode);



2019-02-11 16:24:20

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 07/23] SUNRPC: Add trace event that reports reply page vector alignment

We don't want READ payloads that are partially in the head iovec and
in the page buffer because this requires pull-up, which can be
expensive.

The NFS/RPC client tries hard to predict the size of the head iovec
so that the incoming READ data payload lands only in the page
vector, but it doesn't always get it right. To help diagnose such
problems, add a trace point in the logic that decodes READ-like
operations that reports whether pull-up is being done.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/sunrpc.h | 59 +++++++++++++++++++++++++++++++++++++++++
net/sunrpc/xdr.c | 33 +++++++++++++++++++----
2 files changed, 86 insertions(+), 6 deletions(-)

diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index fbc41b8..6276508 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -321,6 +321,65 @@
)
);

+TRACE_EVENT(rpc_xdr_alignment,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ size_t offset,
+ unsigned int copied
+ ),
+
+ TP_ARGS(xdr, offset, copied),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(int, version)
+ __field(size_t, offset)
+ __field(unsigned int, copied)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ __field(unsigned int, len)
+ __string(progname,
+ xdr->rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ xdr->rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_task *task = xdr->rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+
+ __entry->offset = offset;
+ __entry->copied = copied;
+ __entry->head_base = xdr->buf->head[0].iov_base,
+ __entry->head_len = xdr->buf->head[0].iov_len,
+ __entry->page_len = xdr->buf->page_len,
+ __entry->tail_base = xdr->buf->tail[0].iov_base,
+ __entry->tail_len = xdr->buf->tail[0].iov_len,
+ __entry->len = xdr->buf->len;
+ ),
+
+ TP_printk(
+ "task:%u@%u %sv%d %s offset=%zu copied=%u xdr=[%p,%zu]/%u/[%p,%zu]/%u\n",
+ __entry->task_id, __entry->client_id,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->offset, __entry->copied,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len,
+ __entry->len
+ )
+);
+
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6d0b615..5f0aa53 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -347,13 +347,15 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
* 'len' bytes. The extra data is not lost, but is instead
* moved into the inlined pages and/or the tail.
*/
-static void
+static unsigned int
xdr_shrink_bufhead(struct xdr_buf *buf, size_t len)
{
struct kvec *head, *tail;
size_t copy, offs;
unsigned int pglen = buf->page_len;
+ unsigned int result;

+ result = 0;
tail = buf->tail;
head = buf->head;

@@ -367,6 +369,7 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
copy = tail->iov_len - len;
memmove((char *)tail->iov_base + len,
tail->iov_base, copy);
+ result += copy;
}
/* Copy from the inlined pages into the tail */
copy = len;
@@ -377,11 +380,13 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
copy = 0;
else if (copy > tail->iov_len - offs)
copy = tail->iov_len - offs;
- if (copy != 0)
+ if (copy != 0) {
_copy_from_pages((char *)tail->iov_base + offs,
buf->pages,
buf->page_base + pglen + offs - len,
copy);
+ result += copy;
+ }
/* Do we also need to copy data from the head into the tail ? */
if (len > pglen) {
offs = copy = len - pglen;
@@ -391,6 +396,7 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
(char *)head->iov_base +
head->iov_len - offs,
copy);
+ result += copy;
}
}
/* Now handle pages */
@@ -406,12 +412,15 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
_copy_to_pages(buf->pages, buf->page_base,
(char *)head->iov_base + head->iov_len - len,
copy);
+ result += copy;
}
head->iov_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
+
+ return result;
}

/**
@@ -423,14 +432,16 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
* 'len' bytes. The extra data is not lost, but is instead
* moved into the tail.
*/
-static void
+static unsigned int
xdr_shrink_pagelen(struct xdr_buf *buf, size_t len)
{
struct kvec *tail;
size_t copy;
unsigned int pglen = buf->page_len;
unsigned int tailbuf_len;
+ unsigned int result;

+ result = 0;
tail = buf->tail;
BUG_ON (len > pglen);

@@ -448,18 +459,22 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
if (tail->iov_len > len) {
char *p = (char *)tail->iov_base + len;
memmove(p, tail->iov_base, tail->iov_len - len);
+ result += tail->iov_len - len;
} else
copy = tail->iov_len;
/* Copy from the inlined pages into the tail */
_copy_from_pages((char *)tail->iov_base,
buf->pages, buf->page_base + pglen - len,
copy);
+ result += copy;
}
buf->page_len -= len;
buf->buflen -= len;
/* Have we truncated the message? */
if (buf->len > buf->buflen)
buf->len = buf->buflen;
+
+ return result;
}

void
@@ -959,13 +974,17 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
struct kvec *iov;
unsigned int nwords = XDR_QUADLEN(len);
unsigned int cur = xdr_stream_pos(xdr);
+ unsigned int copied, offset;

if (xdr->nwords == 0)
return 0;
+
/* Realign pages to current pointer position */
- iov = buf->head;
+ iov = buf->head;
if (iov->iov_len > cur) {
- xdr_shrink_bufhead(buf, iov->iov_len - cur);
+ offset = iov->iov_len - cur;
+ copied = xdr_shrink_bufhead(buf, offset);
+ trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}

@@ -977,7 +996,9 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
len = buf->page_len;
else if (nwords < xdr->nwords) {
/* Truncate page data and move it into the tail */
- xdr_shrink_pagelen(buf, buf->page_len - len);
+ offset = buf->page_len - len;
+ copied = xdr_shrink_pagelen(buf, offset);
+ trace_rpc_xdr_alignment(xdr, offset, copied);
xdr->nwords = XDR_QUADLEN(buf->len - cur);
}
return len;


2019-02-11 16:24:28

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 08/23] NFS: Remove print_overflow_msg()

This issue is now captured by a trace point in the RPC client.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/lockd/clnt4xdr.c | 14 -
fs/lockd/clntxdr.c | 14 -
fs/nfs/callback_xdr.c | 59 +++---
fs/nfs/nfs2xdr.c | 84 +++------
fs/nfs/nfs3xdr.c | 163 +++++------------
fs/nfs/nfs42xdr.c | 21 +-
fs/nfs/nfs4xdr.c | 451 +++++++++++++-----------------------------------
fs/nfsd/nfs4callback.c | 13 -
8 files changed, 219 insertions(+), 600 deletions(-)

diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 214a2fa..7df6324 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -75,17 +75,6 @@ static void nlm4_compute_offsets(const struct nlm_lock *lock,
}

/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("lockd: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NLMv4 basic data types
*
* Basic NLMv4 data types are defined in Appendix II, section 6.1.4
@@ -176,7 +165,6 @@ static int decode_cookie(struct xdr_stream *xdr,
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -236,7 +224,6 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat)
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -309,7 +296,6 @@ static int decode_nlm4_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 747b9c8..4df62f6 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -71,17 +71,6 @@ static void nlm_compute_offsets(const struct nlm_lock *lock,
}

/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("lockd: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NLMv3 basic data types
*
* Basic NLMv3 data types are not defined in an IETF standards
@@ -173,7 +162,6 @@ static int decode_cookie(struct xdr_stream *xdr,
dprintk("NFS: returned cookie was too long: %u\n", length);
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -231,7 +219,6 @@ static int decode_nlm_stat(struct xdr_stream *xdr,
__func__, be32_to_cpup(p));
return -EIO;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -303,7 +290,6 @@ static int decode_nlm_holder(struct xdr_stream *xdr, struct nlm_res *result)
out:
return error;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index bc7c176..06233bf 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -72,16 +72,6 @@ static int nfs4_encode_void(struct svc_rqst *rqstp, __be32 *p)
return xdr_ressize_check(rqstp, p);
}

-static __be32 *read_buf(struct xdr_stream *xdr, size_t nbytes)
-{
- __be32 *p;
-
- p = xdr_inline_decode(xdr, nbytes);
- if (unlikely(p == NULL))
- printk(KERN_WARNING "NFS: NFSv4 callback reply buffer overflowed!\n");
- return p;
-}
-
static __be32 decode_string(struct xdr_stream *xdr, unsigned int *len,
const char **str, size_t maxlen)
{
@@ -98,13 +88,13 @@ static __be32 decode_fh(struct xdr_stream *xdr, struct nfs_fh *fh)
{
__be32 *p;

- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
fh->size = ntohl(*p);
if (fh->size > NFS4_FHSIZE)
return htonl(NFS4ERR_BADHANDLE);
- p = read_buf(xdr, fh->size);
+ p = xdr_inline_decode(xdr, fh->size);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(&fh->data[0], p, fh->size);
@@ -117,11 +107,11 @@ static __be32 decode_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
__be32 *p;
unsigned int attrlen;

- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
attrlen = ntohl(*p);
- p = read_buf(xdr, attrlen << 2);
+ p = xdr_inline_decode(xdr, attrlen << 2);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
if (likely(attrlen > 0))
@@ -135,7 +125,7 @@ static __be32 decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid)
{
__be32 *p;

- p = read_buf(xdr, NFS4_STATEID_SIZE);
+ p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
memcpy(stateid->data, p, NFS4_STATEID_SIZE);
@@ -156,7 +146,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
status = decode_string(xdr, &hdr->taglen, &hdr->tag, CB_OP_TAGLEN_MAXSZ);
if (unlikely(status != 0))
return status;
- p = read_buf(xdr, 12);
+ p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
hdr->minorversion = ntohl(*p++);
@@ -176,7 +166,7 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
static __be32 decode_op_hdr(struct xdr_stream *xdr, unsigned int *op)
{
__be32 *p;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE_HDR);
*op = ntohl(*p);
@@ -205,7 +195,7 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp,
status = decode_delegation_stateid(xdr, &args->stateid);
if (unlikely(status != 0))
return status;
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);
args->truncate = ntohl(*p);
@@ -227,7 +217,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
__be32 status = 0;
uint32_t iomode;

- p = read_buf(xdr, 4 * sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, 4 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);

@@ -245,14 +235,14 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
if (unlikely(status != 0))
return status;

- p = read_buf(xdr, 2 * sizeof(uint64_t));
+ p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_range.offset);
p = xdr_decode_hyper(p, &args->cbl_range.length);
return decode_layout_stateid(xdr, &args->cbl_stateid);
} else if (args->cbl_recall_type == RETURN_FSID) {
- p = read_buf(xdr, 2 * sizeof(uint64_t));
+ p = xdr_inline_decode(xdr, 2 * sizeof(uint64_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
p = xdr_decode_hyper(p, &args->cbl_fsid.major);
@@ -275,7 +265,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
args->ndevs = 0;

/* Num of device notifications */
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto out;
@@ -298,7 +288,8 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
for (i = 0; i < n; i++) {
struct cb_devicenotifyitem *dev = &args->devs[i];

- p = read_buf(xdr, (4 * sizeof(uint32_t)) + NFS4_DEVICEID4_SIZE);
+ p = xdr_inline_decode(xdr, (4 * sizeof(uint32_t)) +
+ NFS4_DEVICEID4_SIZE);
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@@ -329,7 +320,7 @@ __be32 decode_devicenotify_args(struct svc_rqst *rqstp,
p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);

if (dev->cbd_layout_type == NOTIFY_DEVICEID4_CHANGE) {
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL)) {
status = htonl(NFS4ERR_BADXDR);
goto err;
@@ -359,7 +350,7 @@ static __be32 decode_sessionid(struct xdr_stream *xdr,
{
__be32 *p;

- p = read_buf(xdr, NFS4_MAX_SESSIONID_LEN);
+ p = xdr_inline_decode(xdr, NFS4_MAX_SESSIONID_LEN);
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);

@@ -379,13 +370,13 @@ static __be32 decode_rc_list(struct xdr_stream *xdr,
goto out;

status = htonl(NFS4ERR_RESOURCE);
- p = read_buf(xdr, sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;

rc_list->rcl_nrefcalls = ntohl(*p++);
if (rc_list->rcl_nrefcalls) {
- p = read_buf(xdr,
+ p = xdr_inline_decode(xdr,
rc_list->rcl_nrefcalls * 2 * sizeof(uint32_t));
if (unlikely(p == NULL))
goto out;
@@ -418,7 +409,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
if (status)
return status;

- p = read_buf(xdr, 5 * sizeof(uint32_t));
+ p = xdr_inline_decode(xdr, 5 * sizeof(uint32_t));
if (unlikely(p == NULL))
return htonl(NFS4ERR_RESOURCE);

@@ -461,7 +452,7 @@ static __be32 decode_recallany_args(struct svc_rqst *rqstp,
uint32_t bitmap[2];
__be32 *p, status;

- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->craa_objs_to_keep = ntohl(*p++);
@@ -480,7 +471,7 @@ static __be32 decode_recallslot_args(struct svc_rqst *rqstp,
struct cb_recallslotargs *args = argp;
__be32 *p;

- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);
args->crsa_target_highest_slotid = ntohl(*p++);
@@ -492,14 +483,14 @@ static __be32 decode_lockowner(struct xdr_stream *xdr, struct cb_notify_lock_arg
__be32 *p;
unsigned int len;

- p = read_buf(xdr, 12);
+ p = xdr_inline_decode(xdr, 12);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);

p = xdr_decode_hyper(p, &args->cbnl_owner.clientid);
len = be32_to_cpu(*p);

- p = read_buf(xdr, len);
+ p = xdr_inline_decode(xdr, len);
if (unlikely(p == NULL))
return htonl(NFS4ERR_BADXDR);

@@ -537,7 +528,7 @@ static __be32 decode_write_response(struct xdr_stream *xdr,
__be32 *p;

/* skip the always zero field */
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
p++;
@@ -577,7 +568,7 @@ static __be32 decode_offload_args(struct svc_rqst *rqstp,
return status;

/* decode status */
- p = read_buf(xdr, 4);
+ p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
goto out;
args->error = ntohl(*p++);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 350675e..7661431 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -80,17 +80,6 @@ static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
}

/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("NFS: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NFSv2 basic data types
*
* Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
@@ -110,8 +99,8 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
count = be32_to_cpup(p);
recvd = xdr_read_pages(xdr, count);
if (unlikely(count > recvd))
@@ -125,9 +114,6 @@ static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
"count %u > recvd %u\n", count, recvd);
count = recvd;
goto out;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -157,13 +143,10 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
*status = be32_to_cpup(p);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -205,14 +188,11 @@ static int decode_fhandle(struct xdr_stream *xdr, struct nfs_fh *fh)
__be32 *p;

p = xdr_inline_decode(xdr, NFS2_FHSIZE);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
fh->size = NFS2_FHSIZE;
memcpy(fh->data, p, NFS2_FHSIZE);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -282,8 +262,8 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;

p = xdr_inline_decode(xdr, NFS_fattr_sz << 2);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;

fattr->valid |= NFS_ATTR_FATTR_V2;

@@ -325,9 +305,6 @@ static int decode_fattr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
out_gid:
dprintk("NFS: returned invalid gid\n");
return -EINVAL;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -416,23 +393,20 @@ static int decode_filename_inline(struct xdr_stream *xdr,
u32 count;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
count = be32_to_cpup(p);
if (count > NFS3_MAXNAMLEN)
goto out_nametoolong;
p = xdr_inline_decode(xdr, count);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
*name = (const char *)p;
*length = count;
return 0;
out_nametoolong:
dprintk("NFS: returned filename too long: %u\n", count);
return -ENAMETOOLONG;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -455,8 +429,8 @@ static int decode_path(struct xdr_stream *xdr)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
length = be32_to_cpup(p);
if (unlikely(length >= xdr->buf->page_len || length > NFS_MAXPATHLEN))
goto out_size;
@@ -472,9 +446,6 @@ static int decode_path(struct xdr_stream *xdr)
dprintk("NFS: server cheating in pathname result: "
"length %u > received %u\n", length, recvd);
return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -951,12 +922,12 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
int error;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
if (*p++ == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
if (*p++ == xdr_zero)
return -EAGAIN;
entry->eof = 1;
@@ -964,8 +935,8 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
}

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
entry->ino = be32_to_cpup(p);

error = decode_filename_inline(xdr, &entry->name, &entry->len);
@@ -978,17 +949,13 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
*/
entry->prev_cookie = entry->cookie;
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
entry->cookie = be32_to_cpup(p);

entry->d_type = DT_UNKNOWN;

return 0;
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EAGAIN;
}

/*
@@ -1052,17 +1019,14 @@ static int decode_info(struct xdr_stream *xdr, struct nfs2_fsstat *result)
__be32 *p;

p = xdr_inline_decode(xdr, NFS_info_sz << 2);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
result->tsize = be32_to_cpup(p++);
result->bsize = be32_to_cpup(p++);
result->blocks = be32_to_cpup(p++);
result->bfree = be32_to_cpup(p++);
result->bavail = be32_to_cpup(p);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int nfs2_xdr_dec_statfsres(struct rpc_rqst *req, struct xdr_stream *xdr,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 78df4eb..e561980 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -119,17 +119,6 @@ static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
}

/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("NFS: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
-
-/*
* Encode/decode NFSv3 basic data types
*
* Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
@@ -151,13 +140,10 @@ static int decode_uint32(struct xdr_stream *xdr, u32 *value)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
*value = be32_to_cpup(p);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_uint64(struct xdr_stream *xdr, u64 *value)
@@ -165,13 +151,10 @@ static int decode_uint64(struct xdr_stream *xdr, u64 *value)
__be32 *p;

p = xdr_inline_decode(xdr, 8);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
xdr_decode_hyper(p, value);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -211,14 +194,14 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
u32 count;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
count = be32_to_cpup(p);
if (count > NFS3_MAXNAMLEN)
goto out_nametoolong;
p = xdr_inline_decode(xdr, count);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
*name = (const char *)p;
*length = count;
return 0;
@@ -226,9 +209,6 @@ static int decode_inline_filename3(struct xdr_stream *xdr,
out_nametoolong:
dprintk("NFS: returned filename too long: %u\n", count);
return -ENAMETOOLONG;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -249,8 +229,8 @@ static int decode_nfspath3(struct xdr_stream *xdr)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
count = be32_to_cpup(p);
if (unlikely(count >= xdr->buf->page_len || count > NFS3_MAXPATHLEN))
goto out_nametoolong;
@@ -267,9 +247,6 @@ static int decode_nfspath3(struct xdr_stream *xdr)
dprintk("NFS: server cheating in pathname result: "
"count %u > recvd %u\n", count, recvd);
return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -303,13 +280,10 @@ static int decode_cookieverf3(struct xdr_stream *xdr, __be32 *verifier)
__be32 *p;

p = xdr_inline_decode(xdr, NFS3_COOKIEVERFSIZE);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
memcpy(verifier, p, NFS3_COOKIEVERFSIZE);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -330,13 +304,10 @@ static int decode_writeverf3(struct xdr_stream *xdr, struct nfs_write_verifier *
__be32 *p;

p = xdr_inline_decode(xdr, NFS3_WRITEVERFSIZE);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
memcpy(verifier->data, p, NFS3_WRITEVERFSIZE);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -364,13 +335,10 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
*status = be32_to_cpup(p);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -453,23 +421,20 @@ static int decode_nfs_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
length = be32_to_cpup(p++);
if (unlikely(length > NFS3_FHSIZE))
goto out_toobig;
p = xdr_inline_decode(xdr, length);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
fh->size = length;
memcpy(fh->data, p, length);
return 0;
out_toobig:
dprintk("NFS: file handle size (%u) too big\n", length);
return -E2BIG;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static void zero_nfs_fh3(struct nfs_fh *fh)
@@ -655,8 +620,8 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;

p = xdr_inline_decode(xdr, NFS3_fattr_sz << 2);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;

p = xdr_decode_ftype3(p, &fmode);

@@ -690,9 +655,6 @@ static int decode_fattr3(struct xdr_stream *xdr, struct nfs_fattr *fattr)
out_gid:
dprintk("NFS: returned invalid gid\n");
return -EINVAL;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -710,14 +672,11 @@ static int decode_post_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
if (*p != xdr_zero)
return decode_fattr3(xdr, fattr);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -733,8 +692,8 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;

p = xdr_inline_decode(xdr, NFS3_wcc_attr_sz << 2);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;

fattr->valid |= NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PRECHANGE
@@ -747,9 +706,6 @@ static int decode_wcc_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
fattr->pre_change_attr = nfs_timespec_to_change_attr(&fattr->pre_ctime);

return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -773,14 +729,11 @@ static int decode_pre_op_attr(struct xdr_stream *xdr, struct nfs_fattr *fattr)
__be32 *p;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
if (*p != xdr_zero)
return decode_wcc_attr(xdr, fattr);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
@@ -808,15 +761,12 @@ static int decode_wcc_data(struct xdr_stream *xdr, struct nfs_fattr *fattr)
static int decode_post_op_fh3(struct xdr_stream *xdr, struct nfs_fh *fh)
{
__be32 *p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
if (*p != xdr_zero)
return decode_nfs_fh3(xdr, fh);
zero_nfs_fh3(fh);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -1643,8 +1593,8 @@ static int decode_read3resok(struct xdr_stream *xdr,
__be32 *p;

p = xdr_inline_decode(xdr, 4 + 4 + 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
count = be32_to_cpup(p++);
eof = be32_to_cpup(p++);
ocount = be32_to_cpup(p++);
@@ -1667,9 +1617,6 @@ static int decode_read3resok(struct xdr_stream *xdr,
count = recvd;
eof = 0;
goto out;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -1731,22 +1678,18 @@ static int decode_write3resok(struct xdr_stream *xdr,
__be32 *p;

p = xdr_inline_decode(xdr, 4 + 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
result->count = be32_to_cpup(p++);
result->verf->committed = be32_to_cpup(p++);
if (unlikely(result->verf->committed > NFS_FILE_SYNC))
goto out_badvalue;
if (decode_writeverf3(xdr, &result->verf->verifier))
- goto out_eio;
+ return -EIO;
return result->count;
out_badvalue:
dprintk("NFS: bad stable_how value: %u\n", result->verf->committed);
return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
-out_eio:
- return -EIO;
}

static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
@@ -2010,12 +1953,12 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
u64 new_cookie;

p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
if (*p == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
if (*p == xdr_zero)
return -EAGAIN;
entry->eof = 1;
@@ -2051,8 +1994,8 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,

/* In fact, a post_op_fh3: */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EAGAIN;
if (*p != xdr_zero) {
error = decode_nfs_fh3(xdr, entry->fh);
if (unlikely(error)) {
@@ -2069,9 +2012,6 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,

return 0;

-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EAGAIN;
out_truncated:
dprintk("NFS: directory entry contains invalid file handle\n");
*entry = old;
@@ -2183,8 +2123,8 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
__be32 *p;

p = xdr_inline_decode(xdr, 8 * 6 + 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
p = xdr_decode_size3(p, &result->tbytes);
p = xdr_decode_size3(p, &result->fbytes);
p = xdr_decode_size3(p, &result->abytes);
@@ -2193,9 +2133,6 @@ static int decode_fsstat3resok(struct xdr_stream *xdr,
xdr_decode_size3(p, &result->afiles);
/* ignore invarsec */
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int nfs3_xdr_dec_fsstat3res(struct rpc_rqst *req,
@@ -2255,8 +2192,8 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
__be32 *p;

p = xdr_inline_decode(xdr, 4 * 7 + 8 + 8 + 4);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
result->rtmax = be32_to_cpup(p++);
result->rtpref = be32_to_cpup(p++);
result->rtmult = be32_to_cpup(p++);
@@ -2270,9 +2207,6 @@ static int decode_fsinfo3resok(struct xdr_stream *xdr,
/* ignore properties */
result->lease_time = 0;
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int nfs3_xdr_dec_fsinfo3res(struct rpc_rqst *req,
@@ -2328,15 +2262,12 @@ static int decode_pathconf3resok(struct xdr_stream *xdr,
__be32 *p;

p = xdr_inline_decode(xdr, 4 * 6);
- if (unlikely(p == NULL))
- goto out_overflow;
+ if (unlikely(!p))
+ return -EIO;
result->max_link = be32_to_cpup(p++);
result->max_namelen = be32_to_cpup(p);
/* ignore remaining fields */
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int nfs3_xdr_dec_pathconf3res(struct rpc_rqst *req,
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 69f72ed..22b3425 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -394,7 +394,7 @@ static int decode_write_response(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
count = be32_to_cpup(p);
if (count > 1)
return -EREMOTEIO;
@@ -402,18 +402,14 @@ static int decode_write_response(struct xdr_stream *xdr,
status = decode_opaque_fixed(xdr, &res->stateid,
NFS4_STATEID_SIZE);
if (unlikely(status))
- goto out_overflow;
+ return -EIO;
}
p = xdr_inline_decode(xdr, 8 + 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &res->count);
res->verifier.committed = be32_to_cpup(p);
return decode_verifier(xdr, &res->verifier.verifier);
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_copy_requirements(struct xdr_stream *xdr,
@@ -422,14 +418,11 @@ static int decode_copy_requirements(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4 + 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

res->consecutive = be32_to_cpup(p++);
res->synchronous = be32_to_cpup(p++);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_copy(struct xdr_stream *xdr, struct nfs42_copy_res *res)
@@ -474,15 +467,11 @@ static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)

p = xdr_inline_decode(xdr, 4 + 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

res->sr_eof = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &res->sr_offset);
return 0;
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_layoutstats(struct xdr_stream *xdr)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2fc8f6f..24e6a45 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -3144,22 +3144,12 @@ static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
}
#endif /* CONFIG_NFS_V4_1 */

-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("nfs: %s: prematurely hit end of receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
static int decode_opaque_inline(struct xdr_stream *xdr, unsigned int *len, char **string)
{
ssize_t ret = xdr_stream_decode_opaque_inline(xdr, (void **)string,
NFS4_OPAQUE_LIMIT);
- if (unlikely(ret < 0)) {
- if (ret == -EBADMSG)
- print_overflow_msg(__func__, xdr);
+ if (unlikely(ret < 0))
return -EIO;
- }
*len = ret;
return 0;
}
@@ -3170,22 +3160,19 @@ static int decode_compound_hdr(struct xdr_stream *xdr, struct compound_hdr *hdr)

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
hdr->status = be32_to_cpup(p++);
hdr->taglen = be32_to_cpup(p);

p = xdr_inline_decode(xdr, hdr->taglen + 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
hdr->tag = (char *)p;
p += XDR_QUADLEN(hdr->taglen);
hdr->nops = be32_to_cpup(p);
if (unlikely(hdr->nops < 1))
return nfs4_stat_to_errno(hdr->status);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
@@ -3214,7 +3201,6 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
*nfs_retval = -EREMOTEIO;
return false;
out_overflow:
- print_overflow_msg(__func__, xdr);
*nfs_retval = -EIO;
return false;
}
@@ -3235,10 +3221,9 @@ static int decode_ace(struct xdr_stream *xdr, void *ace)
char *str;

p = xdr_inline_decode(xdr, 12);
- if (likely(p))
- return decode_opaque_inline(xdr, &strlen, &str);
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ if (unlikely(!p))
+ return -EIO;
+ return decode_opaque_inline(xdr, &strlen, &str);
}

static ssize_t
@@ -3249,10 +3234,9 @@ static int decode_ace(struct xdr_stream *xdr, void *ace)
ret = xdr_stream_decode_uint32_array(xdr, bitmap, sz);
if (likely(ret >= 0))
return ret;
- if (ret == -EMSGSIZE)
- return sz;
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ if (ret != -EMSGSIZE)
+ return -EIO;
+ return sz;
}

static int decode_attr_bitmap(struct xdr_stream *xdr, uint32_t *bitmap)
@@ -3268,13 +3252,10 @@ static int decode_attr_length(struct xdr_stream *xdr, uint32_t *attrlen, unsigne

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*attrlen = be32_to_cpup(p);
*savep = xdr_stream_pos(xdr);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_supported(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *bitmask)
@@ -3303,7 +3284,7 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
if (likely(bitmap[0] & FATTR4_WORD0_TYPE)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*type = be32_to_cpup(p);
if (*type < NF4REG || *type > NF4NAMEDATTR) {
dprintk("%s: bad type %d\n", __func__, *type);
@@ -3314,9 +3295,6 @@ static int decode_attr_type(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *
}
dprintk("%s: type=0%o\n", __func__, nfs_type2fmt[*type]);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
@@ -3330,15 +3308,12 @@ static int decode_attr_fh_expire_type(struct xdr_stream *xdr,
if (likely(bitmap[0] & FATTR4_WORD0_FH_EXPIRE_TYPE)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*type = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_FH_EXPIRE_TYPE;
}
dprintk("%s: expire type=0x%x\n", __func__, *type);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *change)
@@ -3352,7 +3327,7 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (likely(bitmap[0] & FATTR4_WORD0_CHANGE)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, change);
bitmap[0] &= ~FATTR4_WORD0_CHANGE;
ret = NFS_ATTR_FATTR_CHANGE;
@@ -3360,9 +3335,6 @@ static int decode_attr_change(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
dprintk("%s: change attribute=%Lu\n", __func__,
(unsigned long long)*change);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *size)
@@ -3376,16 +3348,13 @@ static int decode_attr_size(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *
if (likely(bitmap[0] & FATTR4_WORD0_SIZE)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, size);
bitmap[0] &= ~FATTR4_WORD0_SIZE;
ret = NFS_ATTR_FATTR_SIZE;
}
dprintk("%s: file size=%Lu\n", __func__, (unsigned long long)*size);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3398,15 +3367,12 @@ static int decode_attr_link_support(struct xdr_stream *xdr, uint32_t *bitmap, ui
if (likely(bitmap[0] & FATTR4_WORD0_LINK_SUPPORT)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LINK_SUPPORT;
}
dprintk("%s: link support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3419,15 +3385,12 @@ static int decode_attr_symlink_support(struct xdr_stream *xdr, uint32_t *bitmap,
if (likely(bitmap[0] & FATTR4_WORD0_SYMLINK_SUPPORT)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_SYMLINK_SUPPORT;
}
dprintk("%s: symlink support=%s\n", __func__, *res == 0 ? "false" : "true");
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fsid *fsid)
@@ -3442,7 +3405,7 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
if (likely(bitmap[0] & FATTR4_WORD0_FSID)) {
p = xdr_inline_decode(xdr, 16);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &fsid->major);
xdr_decode_hyper(p, &fsid->minor);
bitmap[0] &= ~FATTR4_WORD0_FSID;
@@ -3452,9 +3415,6 @@ static int decode_attr_fsid(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs
(unsigned long long)fsid->major,
(unsigned long long)fsid->minor);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3467,15 +3427,12 @@ static int decode_attr_lease_time(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (likely(bitmap[0] & FATTR4_WORD0_LEASE_TIME)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_LEASE_TIME;
}
dprintk("%s: file size=%u\n", __func__, (unsigned int)*res);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
@@ -3487,14 +3444,11 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *
if (likely(bitmap[0] & FATTR4_WORD0_RDATTR_ERROR)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
*res = -be32_to_cpup(p);
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_exclcreat_supported(struct xdr_stream *xdr,
@@ -3526,13 +3480,13 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
if (likely(bitmap[0] & FATTR4_WORD0_FILEHANDLE)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
if (len > NFS4_FHSIZE)
return -EIO;
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
if (fh != NULL) {
memcpy(fh->data, p, len);
fh->size = len;
@@ -3540,9 +3494,6 @@ static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, stru
bitmap[0] &= ~FATTR4_WORD0_FILEHANDLE;
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3555,15 +3506,12 @@ static int decode_attr_aclsupport(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (likely(bitmap[0] & FATTR4_WORD0_ACLSUPPORT)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*res = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_ACLSUPPORT;
}
dprintk("%s: ACLs supported=%u\n", __func__, (unsigned int)*res);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -3577,16 +3525,13 @@ static int decode_attr_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t
if (likely(bitmap[0] & FATTR4_WORD0_FILEID)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, fileid);
bitmap[0] &= ~FATTR4_WORD0_FILEID;
ret = NFS_ATTR_FATTR_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *fileid)
@@ -3600,16 +3545,13 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
if (likely(bitmap[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, fileid);
bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
}
dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3623,15 +3565,12 @@ static int decode_attr_files_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (likely(bitmap[0] & FATTR4_WORD0_FILES_AVAIL)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_AVAIL;
}
dprintk("%s: files avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3645,15 +3584,12 @@ static int decode_attr_files_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (likely(bitmap[0] & FATTR4_WORD0_FILES_FREE)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_FREE;
}
dprintk("%s: files free=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -3667,15 +3603,12 @@ static int decode_attr_files_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (likely(bitmap[0] & FATTR4_WORD0_FILES_TOTAL)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_FILES_TOTAL;
}
dprintk("%s: files total=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
@@ -3686,7 +3619,7 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
n = be32_to_cpup(p);
if (n == 0)
goto root_path;
@@ -3718,9 +3651,6 @@ static int decode_pathname(struct xdr_stream *xdr, struct nfs4_pathname *path)
dprintk(" status %d", status);
status = -EIO;
goto out;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs4_fs_locations *res)
@@ -3745,7 +3675,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
goto out;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ goto out_eio;
n = be32_to_cpup(p);
if (n <= 0)
goto out_eio;
@@ -3758,7 +3688,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
loc = &res->locations[res->nlocations];
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ goto out_eio;
m = be32_to_cpup(p);

dprintk("%s: servers:\n", __func__);
@@ -3796,8 +3726,6 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st
out:
dprintk("%s: fs_locations done, error = %d\n", __func__, status);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
out_eio:
status = -EIO;
goto out;
@@ -3814,15 +3742,12 @@ static int decode_attr_maxfilesize(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (likely(bitmap[0] & FATTR4_WORD0_MAXFILESIZE)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[0] &= ~FATTR4_WORD0_MAXFILESIZE;
}
dprintk("%s: maxfilesize=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxlink)
@@ -3836,15 +3761,12 @@ static int decode_attr_maxlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (likely(bitmap[0] & FATTR4_WORD0_MAXLINK)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*maxlink = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXLINK;
}
dprintk("%s: maxlink=%u\n", __func__, *maxlink);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *maxname)
@@ -3858,15 +3780,12 @@ static int decode_attr_maxname(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
if (likely(bitmap[0] & FATTR4_WORD0_MAXNAME)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*maxname = be32_to_cpup(p);
bitmap[0] &= ~FATTR4_WORD0_MAXNAME;
}
dprintk("%s: maxname=%u\n", __func__, *maxname);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3881,7 +3800,7 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
uint64_t maxread;
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, &maxread);
if (maxread > 0x7FFFFFFF)
maxread = 0x7FFFFFFF;
@@ -3890,9 +3809,6 @@ static int decode_attr_maxread(struct xdr_stream *xdr, uint32_t *bitmap, uint32_
}
dprintk("%s: maxread=%lu\n", __func__, (unsigned long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *res)
@@ -3907,7 +3823,7 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
uint64_t maxwrite;
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, &maxwrite);
if (maxwrite > 0x7FFFFFFF)
maxwrite = 0x7FFFFFFF;
@@ -3916,9 +3832,6 @@ static int decode_attr_maxwrite(struct xdr_stream *xdr, uint32_t *bitmap, uint32
}
dprintk("%s: maxwrite=%lu\n", __func__, (unsigned long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *mode)
@@ -3933,7 +3846,7 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
if (likely(bitmap[1] & FATTR4_WORD1_MODE)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
tmp = be32_to_cpup(p);
*mode = tmp & ~S_IFMT;
bitmap[1] &= ~FATTR4_WORD1_MODE;
@@ -3941,9 +3854,6 @@ static int decode_attr_mode(struct xdr_stream *xdr, uint32_t *bitmap, umode_t *m
}
dprintk("%s: file mode=0%o\n", __func__, (unsigned int)*mode);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t *nlink)
@@ -3957,16 +3867,13 @@ static int decode_attr_nlink(struct xdr_stream *xdr, uint32_t *bitmap, uint32_t
if (likely(bitmap[1] & FATTR4_WORD1_NUMLINKS)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
*nlink = be32_to_cpup(p);
bitmap[1] &= ~FATTR4_WORD1_NUMLINKS;
ret = NFS_ATTR_FATTR_NLINK;
}
dprintk("%s: nlink=%u\n", __func__, (unsigned int)*nlink);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static ssize_t decode_nfs4_string(struct xdr_stream *xdr,
@@ -4011,10 +3918,9 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
return NFS_ATTR_FATTR_OWNER;
}
out:
- if (len != -EBADMSG)
- return 0;
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ if (len == -EBADMSG)
+ return -EIO;
+ return 0;
}

static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -4046,10 +3952,9 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
return NFS_ATTR_FATTR_GROUP;
}
out:
- if (len != -EBADMSG)
- return 0;
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ if (len == -EBADMSG)
+ return -EIO;
+ return 0;
}

static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rdev)
@@ -4066,7 +3971,7 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
major = be32_to_cpup(p++);
minor = be32_to_cpup(p);
tmp = MKDEV(major, minor);
@@ -4077,9 +3982,6 @@ static int decode_attr_rdev(struct xdr_stream *xdr, uint32_t *bitmap, dev_t *rde
}
dprintk("%s: rdev=(0x%x:0x%x)\n", __func__, major, minor);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4093,15 +3995,12 @@ static int decode_attr_space_avail(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_AVAIL)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_AVAIL;
}
dprintk("%s: space avail=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4115,15 +4014,12 @@ static int decode_attr_space_free(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_FREE)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_FREE;
}
dprintk("%s: space free=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *res)
@@ -4137,15 +4033,12 @@ static int decode_attr_space_total(struct xdr_stream *xdr, uint32_t *bitmap, uin
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_TOTAL)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
bitmap[1] &= ~FATTR4_WORD1_SPACE_TOTAL;
}
dprintk("%s: space total=%Lu\n", __func__, (unsigned long long)*res);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint64_t *used)
@@ -4159,7 +4052,7 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
if (likely(bitmap[1] & FATTR4_WORD1_SPACE_USED)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, used);
bitmap[1] &= ~FATTR4_WORD1_SPACE_USED;
ret = NFS_ATTR_FATTR_SPACE_USED;
@@ -4167,9 +4060,6 @@ static int decode_attr_space_used(struct xdr_stream *xdr, uint32_t *bitmap, uint
dprintk("%s: space used=%Lu\n", __func__,
(unsigned long long)*used);
return ret;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static __be32 *
@@ -4189,12 +4079,9 @@ static int decode_attr_time(struct xdr_stream *xdr, struct timespec *time)

p = xdr_inline_decode(xdr, nfstime4_maxsz << 2);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_nfstime4(p, time);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_time_access(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -4265,19 +4152,19 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
if (likely(bitmap[2] & FATTR4_WORD2_SECURITY_LABEL)) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
lfs = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
pi = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p++);
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
if (len < NFS4_MAXLABELLEN) {
if (label) {
memcpy(label->label, p, len);
@@ -4295,10 +4182,6 @@ static int decode_attr_security_label(struct xdr_stream *xdr, uint32_t *bitmap,
dprintk("%s: label=%s, len=%d, PI=%d, LFS=%d\n", __func__,
(char *)label->label, label->len, label->pi, label->lfs);
return status;
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_attr_time_modify(struct xdr_stream *xdr, uint32_t *bitmap, struct timespec *time)
@@ -4342,14 +4225,11 @@ static int decode_change_info(struct xdr_stream *xdr, struct nfs4_change_info *c

p = xdr_inline_decode(xdr, 20);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
cinfo->atomic = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &cinfo->before);
xdr_decode_hyper(p, &cinfo->after);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
@@ -4363,24 +4243,19 @@ static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
return status;
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
supp = be32_to_cpup(p++);
acc = be32_to_cpup(p);
*supported = supp;
*access = acc;
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_opaque_fixed(struct xdr_stream *xdr, void *buf, size_t len)
{
ssize_t ret = xdr_stream_decode_opaque_fixed(xdr, buf, len);
- if (unlikely(ret < 0)) {
- print_overflow_msg(__func__, xdr);
+ if (unlikely(ret < 0))
return -EIO;
- }
return 0;
}

@@ -4460,13 +4335,11 @@ static int decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
bmlen = be32_to_cpup(p);
p = xdr_inline_decode(xdr, bmlen << 2);
if (likely(p))
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -4574,13 +4447,10 @@ static int decode_threshold_hint(struct xdr_stream *xdr,
if (likely(bitmap[0] & hint_bit)) {
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, res);
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_first_threshold_item4(struct xdr_stream *xdr,
@@ -4593,10 +4463,8 @@ static int decode_first_threshold_item4(struct xdr_stream *xdr,

/* layout type */
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p)) {
- print_overflow_msg(__func__, xdr);
+ if (unlikely(!p))
return -EIO;
- }
res->l_type = be32_to_cpup(p);

/* thi_hintset bitmap */
@@ -4654,7 +4522,7 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
return -EREMOTEIO;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
num = be32_to_cpup(p);
if (num == 0)
return 0;
@@ -4667,9 +4535,6 @@ static int decode_attr_mdsthreshold(struct xdr_stream *xdr,
bitmap[2] &= ~FATTR4_WORD2_MDSTHRESHOLD;
}
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
@@ -4857,7 +4722,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
fsinfo->nlayouttypes = be32_to_cpup(p);

/* pNFS is not supported by the underlying file system */
@@ -4867,7 +4732,7 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,
/* Decode and set first layout type, move xdr->p past unused types */
p = xdr_inline_decode(xdr, fsinfo->nlayouttypes * 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

/* If we get too many, then just cap it at the max */
if (fsinfo->nlayouttypes > NFS_MAX_LAYOUT_TYPES) {
@@ -4879,9 +4744,6 @@ static int decode_pnfs_layout_types(struct xdr_stream *xdr,
for(i = 0; i < fsinfo->nlayouttypes; ++i)
fsinfo->layouttype[i] = be32_to_cpup(p++);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

/*
@@ -4915,10 +4777,8 @@ static int decode_attr_layout_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
*res = 0;
if (bitmap[2] & FATTR4_WORD2_LAYOUT_BLKSIZE) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p)) {
- print_overflow_msg(__func__, xdr);
+ if (unlikely(!p))
return -EIO;
- }
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_LAYOUT_BLKSIZE;
}
@@ -4937,10 +4797,8 @@ static int decode_attr_clone_blksize(struct xdr_stream *xdr, uint32_t *bitmap,
*res = 0;
if (bitmap[2] & FATTR4_WORD2_CLONE_BLKSIZE) {
p = xdr_inline_decode(xdr, 4);
- if (unlikely(!p)) {
- print_overflow_msg(__func__, xdr);
+ if (unlikely(!p))
return -EIO;
- }
*res = be32_to_cpup(p);
bitmap[2] &= ~FATTR4_WORD2_CLONE_BLKSIZE;
}
@@ -5016,19 +4874,16 @@ static int decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
if (len > NFS4_FHSIZE)
return -EIO;
fh->size = len;
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
memcpy(fh->data, p, len);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -5052,7 +4907,7 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)

p = xdr_inline_decode(xdr, 32); /* read 32 bytes */
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &offset); /* read 2 8-byte long words */
p = xdr_decode_hyper(p, &length);
type = be32_to_cpup(p++); /* 4 byte read */
@@ -5069,11 +4924,9 @@ static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
p = xdr_decode_hyper(p, &clientid); /* read 8 bytes */
namelen = be32_to_cpup(p); /* read 4 bytes */ /* have read all 32 bytes now */
p = xdr_inline_decode(xdr, namelen); /* variable size field */
- if (likely(p))
- return -NFS4ERR_DENIED;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
+ if (likely(!p))
+ return -EIO;
+ return -NFS4ERR_DENIED;
}

static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
@@ -5142,7 +4995,7 @@ static int decode_space_limit(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
limit_type = be32_to_cpup(p++);
switch (limit_type) {
case NFS4_LIMIT_SIZE:
@@ -5156,9 +5009,6 @@ static int decode_space_limit(struct xdr_stream *xdr,
maxsize >>= PAGE_SHIFT;
*pagemod_limit = min_t(u64, maxsize, ULONG_MAX);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_rw_delegation(struct xdr_stream *xdr,
@@ -5173,7 +5023,7 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->do_recall = be32_to_cpup(p);

switch (delegation_type) {
@@ -5186,9 +5036,6 @@ static int decode_rw_delegation(struct xdr_stream *xdr,
return -EIO;
}
return decode_ace(xdr, NULL);
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5198,7 +5045,7 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
why_no_delegation = be32_to_cpup(p);
switch (why_no_delegation) {
case WND4_CONTENTION:
@@ -5207,9 +5054,6 @@ static int decode_no_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
/* Ignore for now */
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5219,7 +5063,7 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
delegation_type = be32_to_cpup(p);
res->delegation_type = 0;
switch (delegation_type) {
@@ -5232,9 +5076,6 @@ static int decode_delegation(struct xdr_stream *xdr, struct nfs_openres *res)
return decode_no_delegation(xdr, res);
}
return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
@@ -5256,7 +5097,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->rflags = be32_to_cpup(p++);
bmlen = be32_to_cpup(p);
if (bmlen > 10)
@@ -5264,7 +5105,7 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)

p = xdr_inline_decode(xdr, bmlen << 2);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
savewords = min_t(uint32_t, bmlen, NFS4_BITMAP_SIZE);
for (i = 0; i < savewords; ++i)
res->attrset[i] = be32_to_cpup(p++);
@@ -5275,9 +5116,6 @@ static int decode_open(struct xdr_stream *xdr, struct nfs_openres *res)
xdr_error:
dprintk("%s: Bitmap too large! Length = %u\n", __func__, bmlen);
return -EIO;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_open_confirm(struct xdr_stream *xdr, struct nfs_open_confirmres *res)
@@ -5326,7 +5164,7 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
return status;
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
eof = be32_to_cpup(p++);
count = be32_to_cpup(p);
recvd = xdr_read_pages(xdr, count);
@@ -5339,9 +5177,6 @@ static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
res->eof = eof;
res->count = count;
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
@@ -5374,7 +5209,7 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
/* Convert length of symlink */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
if (len >= rcvbuf->page_len || len <= 0) {
dprintk("nfs: server returned giant symlink!\n");
@@ -5395,9 +5230,6 @@ static int decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
*/
xdr_terminate_string(rcvbuf, len);
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
@@ -5500,7 +5332,6 @@ static int decode_setattr(struct xdr_stream *xdr)
return status;
if (decode_bitmap4(xdr, NULL, 0) >= 0)
return 0;
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -5512,7 +5343,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
opnum = be32_to_cpup(p++);
if (opnum != OP_SETCLIENTID) {
dprintk("nfs: decode_setclientid: Server returned operation"
@@ -5523,7 +5354,7 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re
if (nfserr == NFS_OK) {
p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &res->clientid);
memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
} else if (nfserr == NFSERR_CLID_INUSE) {
@@ -5532,28 +5363,25 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_re
/* skip netid string */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

/* skip uaddr string */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
return -NFSERR_CLID_INUSE;
} else
return nfs4_stat_to_errno(nfserr);

return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_setclientid_confirm(struct xdr_stream *xdr)
@@ -5572,13 +5400,10 @@ static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->count = be32_to_cpup(p++);
res->verf->committed = be32_to_cpup(p++);
return decode_write_verifier(xdr, &res->verf->verifier);
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_delegreturn(struct xdr_stream *xdr)
@@ -5594,30 +5419,24 @@ static int decode_secinfo_gss(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
oid_len = be32_to_cpup(p);
if (oid_len > GSS_OID_MAX_LEN)
- goto out_err;
+ return -EINVAL;

p = xdr_inline_decode(xdr, oid_len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
memcpy(flavor->flavor_info.oid.data, p, oid_len);
flavor->flavor_info.oid.len = oid_len;

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
flavor->flavor_info.qop = be32_to_cpup(p++);
flavor->flavor_info.service = be32_to_cpup(p);

return 0;
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
-out_err:
- return -EINVAL;
}

static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
@@ -5629,7 +5448,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

res->flavors->num_flavors = 0;
num_flavors = be32_to_cpup(p);
@@ -5641,7 +5460,7 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
sec_flavor->flavor = be32_to_cpup(p);

if (sec_flavor->flavor == RPC_AUTH_GSS) {
@@ -5655,9 +5474,6 @@ static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res
status = 0;
out:
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
@@ -5711,11 +5527,11 @@ static int decode_exchange_id(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
xdr_decode_hyper(p, &res->clientid);
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->seqid = be32_to_cpup(p++);
res->flags = be32_to_cpup(p++);

@@ -5739,7 +5555,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
/* server_owner4.so_minor_id */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &res->server_owner->minor_id);

/* server_owner4.so_major_id */
@@ -5759,7 +5575,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
/* Implementation Id */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
impl_id_count = be32_to_cpup(p++);

if (impl_id_count) {
@@ -5778,16 +5594,13 @@ static int decode_exchange_id(struct xdr_stream *xdr,
/* nii_date */
p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
p = xdr_decode_hyper(p, &res->impl_id->date.seconds);
res->impl_id->date.nseconds = be32_to_cpup(p);

/* if there's more than one entry, ignore the rest */
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_chan_attrs(struct xdr_stream *xdr,
@@ -5798,7 +5611,7 @@ static int decode_chan_attrs(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 28);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
val = be32_to_cpup(p++); /* headerpadsz */
if (val)
return -EINVAL; /* no support for header padding yet */
@@ -5816,12 +5629,9 @@ static int decode_chan_attrs(struct xdr_stream *xdr,
if (nr_attrs == 1) {
p = xdr_inline_decode(xdr, 4); /* skip rdma_attrs */
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_sessionid(struct xdr_stream *xdr, struct nfs4_sessionid *sid)
@@ -5844,7 +5654,7 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr,
/* dir flags, rdma mode bool */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

res->dir = be32_to_cpup(p++);
if (res->dir == 0 || res->dir > NFS4_CDFS4_BOTH)
@@ -5855,9 +5665,6 @@ static int decode_bind_conn_to_session(struct xdr_stream *xdr,
res->use_conn_in_rdma_mode = true;

return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_create_session(struct xdr_stream *xdr,
@@ -5875,7 +5682,7 @@ static int decode_create_session(struct xdr_stream *xdr,
/* seqid, flags */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->seqid = be32_to_cpup(p++);
res->flags = be32_to_cpup(p);

@@ -5884,9 +5691,6 @@ static int decode_create_session(struct xdr_stream *xdr,
if (!status)
status = decode_chan_attrs(xdr, &res->bc_attrs);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_destroy_session(struct xdr_stream *xdr, void *dummy)
@@ -5967,7 +5771,6 @@ static int decode_sequence(struct xdr_stream *xdr,
res->sr_status = status;
return status;
out_overflow:
- print_overflow_msg(__func__, xdr);
status = -EIO;
goto out_err;
#else /* CONFIG_NFS_V4_1 */
@@ -5995,7 +5798,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
if (status == -ETOOSMALL) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
pdev->mincount = be32_to_cpup(p);
dprintk("%s: Min count too small. mincnt = %u\n",
__func__, pdev->mincount);
@@ -6005,7 +5808,7 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
type = be32_to_cpup(p++);
if (type != pdev->layout_type) {
dprintk("%s: layout mismatch req: %u pdev: %u\n",
@@ -6019,19 +5822,19 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
*/
pdev->mincount = be32_to_cpup(p);
if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
- goto out_overflow;
+ return -EIO;

/* Parse notification bitmap, verifying that it is zero. */
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
len = be32_to_cpup(p);
if (len) {
uint32_t i;

p = xdr_inline_decode(xdr, 4 * len);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;

res->notification = be32_to_cpup(p++);
for (i = 1; i < len; i++) {
@@ -6043,9 +5846,6 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
}
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
@@ -6115,7 +5915,6 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
res->status = status;
return status;
out_overflow:
- print_overflow_msg(__func__, xdr);
status = -EIO;
goto out;
}
@@ -6131,16 +5930,13 @@ static int decode_layoutreturn(struct xdr_stream *xdr,
return status;
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->lrs_present = be32_to_cpup(p);
if (res->lrs_present)
status = decode_layout_stateid(xdr, &res->stateid);
else
nfs4_stateid_copy(&res->stateid, &invalid_stateid);
return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_layoutcommit(struct xdr_stream *xdr,
@@ -6158,19 +5954,16 @@ static int decode_layoutcommit(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
sizechanged = be32_to_cpup(p);

if (sizechanged) {
/* throw away new size */
p = xdr_inline_decode(xdr, 8);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
}
return 0;
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EIO;
}

static int decode_test_stateid(struct xdr_stream *xdr,
@@ -6186,21 +5979,17 @@ static int decode_test_stateid(struct xdr_stream *xdr,

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
num_res = be32_to_cpup(p++);
if (num_res != 1)
- goto out;
+ return -EIO;

p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EIO;
res->status = be32_to_cpup(p++);

return status;
-out_overflow:
- print_overflow_msg(__func__, xdr);
-out:
- return -EIO;
}

static int decode_free_stateid(struct xdr_stream *xdr,
@@ -7570,11 +7359,11 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
uint64_t new_cookie;
__be32 *p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EAGAIN;
if (*p == xdr_zero) {
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
- goto out_overflow;
+ return -EAGAIN;
if (*p == xdr_zero)
return -EAGAIN;
entry->eof = 1;
@@ -7583,13 +7372,13 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,

p = xdr_inline_decode(xdr, 12);
if (unlikely(!p))
- goto out_overflow;
+ return -EAGAIN;
p = xdr_decode_hyper(p, &new_cookie);
entry->len = be32_to_cpup(p);

p = xdr_inline_decode(xdr, entry->len);
if (unlikely(!p))
- goto out_overflow;
+ return -EAGAIN;
entry->name = (const char *) p;

/*
@@ -7601,14 +7390,14 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
entry->fattr->valid = 0;

if (decode_attr_bitmap(xdr, bitmap) < 0)
- goto out_overflow;
+ return -EAGAIN;

if (decode_attr_length(xdr, &len, &savep) < 0)
- goto out_overflow;
+ return -EAGAIN;

if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
NULL, entry->label, entry->server) < 0)
- goto out_overflow;
+ return -EAGAIN;
if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
entry->ino = entry->fattr->mounted_on_fileid;
else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
@@ -7622,10 +7411,6 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
entry->cookie = new_cookie;

return 0;
-
-out_overflow:
- print_overflow_msg(__func__, xdr);
- return -EAGAIN;
}

/*
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index c74e453..a9d24d5 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -60,16 +60,6 @@ struct nfs4_cb_compound_hdr {
int status;
};

-/*
- * Handle decode buffer overflows out-of-line.
- */
-static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
-{
- dprintk("NFS: %s prematurely hit the end of our receive buffer. "
- "Remaining buffer length is %tu words.\n",
- func, xdr->end - xdr->p);
-}
-
static __be32 *xdr_encode_empty_array(__be32 *p)
{
*p++ = xdr_zero;
@@ -240,7 +230,6 @@ static int decode_cb_op_status(struct xdr_stream *xdr,
*status = nfs_cb_stat_to_errno(be32_to_cpup(p));
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
out_unexpected:
dprintk("NFSD: Callback server returned operation %d but "
@@ -309,7 +298,6 @@ static int decode_cb_compound4res(struct xdr_stream *xdr,
hdr->nops = be32_to_cpup(p);
return 0;
out_overflow:
- print_overflow_msg(__func__, xdr);
return -EIO;
}

@@ -437,7 +425,6 @@ static int decode_cb_sequence4resok(struct xdr_stream *xdr,
cb->cb_seq_status = status;
return status;
out_overflow:
- print_overflow_msg(__func__, xdr);
status = -EIO;
goto out;
}


2019-02-11 16:24:32

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 09/23] NFS: Add trace events to report non-zero NFS status codes

These can help field troubleshooting without needing the overhead
of a full network capture (ie, tcpdump).

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/nfs2xdr.c | 7 ++++
fs/nfs/nfs3xdr.c | 7 ++++
fs/nfs/nfs4trace.h | 25 +++++++++++++++
fs/nfs/nfs4xdr.c | 12 +++++--
fs/nfs/nfstrace.c | 1 +
fs/nfs/nfstrace.h | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 7661431..bac3a4e 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -22,6 +22,7 @@
#include <linux/nfs.h>
#include <linux/nfs2.h>
#include <linux/nfs_fs.h>
+#include "nfstrace.h"
#include "internal.h"

#define NFSDBG_FACILITY NFSDBG_XDR
@@ -145,7 +146,13 @@ static int decode_stat(struct xdr_stream *xdr, enum nfs_stat *status)
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
+ if (unlikely(*p != cpu_to_be32(NFS_OK)))
+ goto out_status;
+ *status = 0;
+ return 0;
+out_status:
*status = be32_to_cpup(p);
+ trace_nfs_xdr_status((int)*status);
return 0;
}

diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index e561980..4aa3ffe 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -21,6 +21,7 @@
#include <linux/nfs3.h>
#include <linux/nfs_fs.h>
#include <linux/nfsacl.h>
+#include "nfstrace.h"
#include "internal.h"

#define NFSDBG_FACILITY NFSDBG_XDR
@@ -337,7 +338,13 @@ static int decode_nfsstat3(struct xdr_stream *xdr, enum nfs_stat *status)
p = xdr_inline_decode(xdr, 4);
if (unlikely(!p))
return -EIO;
+ if (unlikely(*p != cpu_to_be32(NFS3_OK)))
+ goto out_status;
+ *status = 0;
+ return 0;
+out_status:
*status = be32_to_cpup(p);
+ trace_nfs_xdr_status((int)*status);
return 0;
}

diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index b4557cf..cd1a5c0 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -524,6 +524,31 @@
)
);

+TRACE_EVENT(nfs4_xdr_status,
+ TP_PROTO(
+ u32 op,
+ int error
+ ),
+
+ TP_ARGS(op, error),
+
+ TP_STRUCT__entry(
+ __field(u32, op)
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->op = op;
+ __entry->error = -error;
+ ),
+
+ TP_printk(
+ "operation %d: nfs status %d (%s)",
+ __entry->op,
+ __entry->error, show_nfsv4_errors(__entry->error)
+ )
+);
+
DECLARE_EVENT_CLASS(nfs4_open_event,
TP_PROTO(
const struct nfs_open_context *ctx,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 24e6a45..38a4cbc 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -54,6 +54,7 @@
#include <linux/nfs_fs.h>

#include "nfs4_fs.h"
+#include "nfs4trace.h"
#include "internal.h"
#include "nfs4idmap.h"
#include "nfs4session.h"
@@ -3188,11 +3189,14 @@ static bool __decode_op_hdr(struct xdr_stream *xdr, enum nfs_opnum4 expected,
opnum = be32_to_cpup(p++);
if (unlikely(opnum != expected))
goto out_bad_operation;
+ if (unlikely(*p != cpu_to_be32(NFS_OK)))
+ goto out_status;
+ *nfs_retval = 0;
+ return true;
+out_status:
nfserr = be32_to_cpup(p);
- if (nfserr == NFS_OK)
- *nfs_retval = 0;
- else
- *nfs_retval = nfs4_stat_to_errno(nfserr);
+ trace_nfs4_xdr_status(opnum, nfserr);
+ *nfs_retval = nfs4_stat_to_errno(nfserr);
return true;
out_bad_operation:
dprintk("nfs: Server returned operation"
diff --git a/fs/nfs/nfstrace.c b/fs/nfs/nfstrace.c
index b60d5fb..a90b363 100644
--- a/fs/nfs/nfstrace.c
+++ b/fs/nfs/nfstrace.c
@@ -11,3 +11,4 @@

EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
index bd60f8d..a0d6910 100644
--- a/fs/nfs/nfstrace.h
+++ b/fs/nfs/nfstrace.h
@@ -969,6 +969,91 @@
)
);

+TRACE_DEFINE_ENUM(NFS_OK);
+TRACE_DEFINE_ENUM(NFSERR_PERM);
+TRACE_DEFINE_ENUM(NFSERR_NOENT);
+TRACE_DEFINE_ENUM(NFSERR_IO);
+TRACE_DEFINE_ENUM(NFSERR_NXIO);
+TRACE_DEFINE_ENUM(NFSERR_ACCES);
+TRACE_DEFINE_ENUM(NFSERR_EXIST);
+TRACE_DEFINE_ENUM(NFSERR_XDEV);
+TRACE_DEFINE_ENUM(NFSERR_NODEV);
+TRACE_DEFINE_ENUM(NFSERR_NOTDIR);
+TRACE_DEFINE_ENUM(NFSERR_ISDIR);
+TRACE_DEFINE_ENUM(NFSERR_INVAL);
+TRACE_DEFINE_ENUM(NFSERR_FBIG);
+TRACE_DEFINE_ENUM(NFSERR_NOSPC);
+TRACE_DEFINE_ENUM(NFSERR_ROFS);
+TRACE_DEFINE_ENUM(NFSERR_MLINK);
+TRACE_DEFINE_ENUM(NFSERR_NAMETOOLONG);
+TRACE_DEFINE_ENUM(NFSERR_NOTEMPTY);
+TRACE_DEFINE_ENUM(NFSERR_DQUOT);
+TRACE_DEFINE_ENUM(NFSERR_STALE);
+TRACE_DEFINE_ENUM(NFSERR_REMOTE);
+TRACE_DEFINE_ENUM(NFSERR_WFLUSH);
+TRACE_DEFINE_ENUM(NFSERR_BADHANDLE);
+TRACE_DEFINE_ENUM(NFSERR_NOT_SYNC);
+TRACE_DEFINE_ENUM(NFSERR_BAD_COOKIE);
+TRACE_DEFINE_ENUM(NFSERR_NOTSUPP);
+TRACE_DEFINE_ENUM(NFSERR_TOOSMALL);
+TRACE_DEFINE_ENUM(NFSERR_SERVERFAULT);
+TRACE_DEFINE_ENUM(NFSERR_BADTYPE);
+TRACE_DEFINE_ENUM(NFSERR_JUKEBOX);
+
+#define nfs_show_status(x) \
+ __print_symbolic(x, \
+ { NFS_OK, "OK" }, \
+ { NFSERR_PERM, "PERM" }, \
+ { NFSERR_NOENT, "NOENT" }, \
+ { NFSERR_IO, "IO" }, \
+ { NFSERR_NXIO, "NXIO" }, \
+ { NFSERR_ACCES, "ACCES" }, \
+ { NFSERR_EXIST, "EXIST" }, \
+ { NFSERR_XDEV, "XDEV" }, \
+ { NFSERR_NODEV, "NODEV" }, \
+ { NFSERR_NOTDIR, "NOTDIR" }, \
+ { NFSERR_ISDIR, "ISDIR" }, \
+ { NFSERR_INVAL, "INVAL" }, \
+ { NFSERR_FBIG, "FBIG" }, \
+ { NFSERR_NOSPC, "NOSPC" }, \
+ { NFSERR_ROFS, "ROFS" }, \
+ { NFSERR_MLINK, "MLINK" }, \
+ { NFSERR_NAMETOOLONG, "NAMETOOLONG" }, \
+ { NFSERR_NOTEMPTY, "NOTEMPTY" }, \
+ { NFSERR_DQUOT, "DQUOT" }, \
+ { NFSERR_STALE, "STALE" }, \
+ { NFSERR_REMOTE, "REMOTE" }, \
+ { NFSERR_WFLUSH, "WFLUSH" }, \
+ { NFSERR_BADHANDLE, "BADHANDLE" }, \
+ { NFSERR_NOT_SYNC, "NOTSYNC" }, \
+ { NFSERR_BAD_COOKIE, "BADCOOKIE" }, \
+ { NFSERR_NOTSUPP, "NOTSUPP" }, \
+ { NFSERR_TOOSMALL, "TOOSMALL" }, \
+ { NFSERR_SERVERFAULT, "REMOTEIO" }, \
+ { NFSERR_BADTYPE, "BADTYPE" }, \
+ { NFSERR_JUKEBOX, "JUKEBOX" })
+
+TRACE_EVENT(nfs_xdr_status,
+ TP_PROTO(
+ int error
+ ),
+
+ TP_ARGS(error),
+
+ TP_STRUCT__entry(
+ __field(int, error)
+ ),
+
+ TP_fast_assign(
+ __entry->error = error;
+ ),
+
+ TP_printk(
+ "error=%d (%s)",
+ __entry->error, nfs_show_status(__entry->error)
+ )
+);
+
#endif /* _TRACE_NFS_H */

#undef TRACE_INCLUDE_PATH


2019-02-11 16:24:37

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 10/23] SUNRPC: Remove some dprintk() call sites from auth functions

Clean up: Reduce dprintk noise by removing dprintk() call sites
from hot path that do not report exceptions. These are usually
replaceable with function graph tracing.

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/auth.c | 29 -----------------------------
net/sunrpc/auth_unix.c | 9 +--------
2 files changed, 1 insertion(+), 37 deletions(-)

diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 8dfab61..275e84e 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -17,10 +17,6 @@
#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>

-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-# define RPCDBG_FACILITY RPCDBG_AUTH
-#endif
-
#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
struct rpc_cred_cache {
struct hlist_head *hashtable;
@@ -267,8 +263,6 @@ static int param_get_hashtbl_sz(char *buffer, const struct kernel_param *kp)
}
}
rcu_read_unlock();
-
- dprintk("RPC: %s returns %d\n", __func__, result);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_list_flavors);
@@ -636,9 +630,6 @@ struct rpc_cred *
struct rpc_cred *ret;
const struct cred *cred = current_cred();

- dprintk("RPC: looking up %s cred\n",
- auth->au_ops->au_name);
-
memset(&acred, 0, sizeof(acred));
acred.cred = cred;
ret = auth->au_ops->lookup_cred(auth, &acred, flags);
@@ -670,8 +661,6 @@ struct rpc_cred *
};
struct rpc_cred *ret;

- dprintk("RPC: %5u looking up %s cred\n",
- task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
ret = auth->au_ops->lookup_cred(auth, &acred, lookupflags);
put_cred(acred.cred);
return ret;
@@ -688,8 +677,6 @@ struct rpc_cred *

if (!acred.principal)
return NULL;
- dprintk("RPC: %5u looking up %s machine cred\n",
- task->tk_pid, task->tk_client->cl_auth->au_ops->au_name);
return auth->au_ops->lookup_cred(auth, &acred, lookupflags);
}

@@ -698,8 +685,6 @@ struct rpc_cred *
{
struct rpc_auth *auth = task->tk_client->cl_auth;

- dprintk("RPC: %5u looking up %s cred\n",
- task->tk_pid, auth->au_ops->au_name);
return rpcauth_lookupcred(auth, lookupflags);
}

@@ -776,9 +761,6 @@ struct rpc_cred *
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;

- dprintk("RPC: %5u marshaling %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
return cred->cr_ops->crmarshal(task, p);
}

@@ -787,9 +769,6 @@ struct rpc_cred *
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;

- dprintk("RPC: %5u validating %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
-
return cred->cr_ops->crvalidate(task, p);
}

@@ -808,8 +787,6 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;

- dprintk("RPC: %5u using %s cred %p to wrap rpc data\n",
- task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crwrap_req)
return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
/* By default, we encode the arguments normally. */
@@ -833,8 +810,6 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;

- dprintk("RPC: %5u using %s cred %p to unwrap rpc data\n",
- task->tk_pid, cred->cr_ops->cr_name, cred);
if (cred->cr_ops->crunwrap_resp)
return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
data, obj);
@@ -865,8 +840,6 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
goto out;
cred = task->tk_rqstp->rq_cred;
}
- dprintk("RPC: %5u refreshing %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);

err = cred->cr_ops->crrefresh(task);
out:
@@ -880,8 +853,6 @@ static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;

- dprintk("RPC: %5u invalidating %s cred %p\n",
- task->tk_pid, cred->cr_auth->au_ops->au_name, cred);
if (cred)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
}
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 387f6b3..fc8a591 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -28,8 +28,6 @@
static struct rpc_auth *
unx_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- dprintk("RPC: creating UNIX authenticator for client %p\n",
- clnt);
refcount_inc(&unix_auth.au_count);
return &unix_auth;
}
@@ -37,7 +35,6 @@
static void
unx_destroy(struct rpc_auth *auth)
{
- dprintk("RPC: destroying UNIX authenticator %p\n", auth);
}

/*
@@ -48,10 +45,6 @@
{
struct rpc_cred *ret = mempool_alloc(unix_pool, GFP_NOFS);

- dprintk("RPC: allocating UNIX cred for uid %d gid %d\n",
- from_kuid(&init_user_ns, acred->cred->fsuid),
- from_kgid(&init_user_ns, acred->cred->fsgid));
-
rpcauth_init_cred(ret, acred, auth, &unix_credops);
ret->cr_flags = 1UL << RPCAUTH_CRED_UPTODATE;
return ret;
@@ -61,7 +54,7 @@
unx_free_cred_callback(struct rcu_head *head)
{
struct rpc_cred *rpc_cred = container_of(head, struct rpc_cred, cr_rcu);
- dprintk("RPC: unx_free_cred %p\n", rpc_cred);
+
put_cred(rpc_cred->cr_cred);
mempool_free(rpc_cred, unix_pool);
}


2019-02-11 16:24:41

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 11/23] SUNRPC: Remove rpc_xprt::tsh_size

tsh_size was added to accommodate transports that send a pre-amble
before each RPC message. However, this assumes the pre-amble is
fixed in size, which isn't true for some transports. That makes
tsh_size not very generic.

Also I'd like to make the estimation of RPC send and receive
buffer sizes more precise. tsh_size doesn't currently appear to be
accounted for at all by call_allocate.

Therefore let's just remove the tsh_size concept, and make the only
transports that have a non-zero tsh_size employ a direct approach.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/xprt.h | 7 --
net/sunrpc/auth_gss/auth_gss.c | 3 -
net/sunrpc/clnt.c | 1
net/sunrpc/svc.c | 19 +-----
net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 1
net/sunrpc/xprtrdma/transport.c | 1
net/sunrpc/xprtsock.c | 91 ++++++++++++++++++----------
7 files changed, 65 insertions(+), 58 deletions(-)

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index ad7e910..3a39154 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -196,8 +196,6 @@ struct rpc_xprt {

size_t max_payload; /* largest RPC payload size,
in bytes */
- unsigned int tsh_size; /* size of transport specific
- header */

struct rpc_wait_queue binding; /* requests waiting on rpcbind */
struct rpc_wait_queue sending; /* requests waiting to send */
@@ -362,11 +360,6 @@ struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
unsigned int max_req);
void xprt_free(struct rpc_xprt *);

-static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
-{
- return p + xprt->tsh_size;
-}
-
static inline int
xprt_enable_swap(struct rpc_xprt *xprt)
{
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index a42672e..4b52e2b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1563,8 +1563,7 @@ static void gss_pipe_free(struct gss_pipe *p)

/* We compute the checksum for the verifier over the xdr-encoded bytes
* starting with the xid and ending at the end of the credential: */
- iov.iov_base = xprt_skip_transport_header(req->rq_xprt,
- req->rq_snd_buf.head[0].iov_base);
+ iov.iov_base = req->rq_snd_buf.head[0].iov_base;
iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
xdr_buf_from_iov(&iov, &verf_buf);

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d7ec613..c4203f6 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -2331,7 +2331,6 @@ void rpc_force_rebind(struct rpc_clnt *clnt)

/* FIXME: check buffer size? */

- p = xprt_skip_transport_header(req->rq_xprt, p);
*p++ = req->rq_xid; /* XID */
*p++ = htonl(RPC_CALL); /* CALL */
*p++ = htonl(RPC_VERSION); /* RPC version */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index e87ddb9..dbd1969 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1145,17 +1145,6 @@ static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ..
#endif

/*
- * Setup response header for TCP, it has a 4B record length field.
- */
-static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
-{
- struct kvec *resv = &rqstp->rq_res.head[0];
-
- /* tcp needs a space for the record length... */
- svc_putnl(resv, 0);
-}
-
-/*
* Common routine for processing the RPC request.
*/
static int
@@ -1182,10 +1171,6 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
clear_bit(RQ_DROPME, &rqstp->rq_flags);

- /* Setup reply header */
- if (rqstp->rq_prot == IPPROTO_TCP)
- svc_tcp_prep_reply_hdr(rqstp);
-
svc_putu32(resv, rqstp->rq_xid);

vers = svc_getnl(argv);
@@ -1443,6 +1428,10 @@ static void svc_tcp_prep_reply_hdr(struct svc_rqst *rqstp)
goto out_drop;
}

+ /* Reserve space for the record marker */
+ if (rqstp->rq_prot == IPPROTO_TCP)
+ svc_putnl(resv, 0);
+
/* Returns 1 for send, 0 for drop */
if (likely(svc_process_common(rqstp, argv, resv)))
return svc_send(rqstp);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index b908f2c..907464c 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -304,7 +304,6 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;

xprt->prot = XPRT_TRANSPORT_BC_RDMA;
- xprt->tsh_size = 0;
xprt->ops = &xprt_rdma_bc_procs;

memcpy(&xprt->addr, args->dstaddr, args->addrlen);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index fbc171e..e7274dc 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -332,7 +332,6 @@
xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;

xprt->resvport = 0; /* privileged port not needed */
- xprt->tsh_size = 0; /* RPC-RDMA handles framing */
xprt->ops = &xprt_rdma_procs;

/*
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7754aa3..ae09d85 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -696,6 +696,40 @@ static void xs_stream_data_receive_workfn(struct work_struct *work)

#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)

+/* Common case:
+ * - stream transport
+ * - sending from byte 0 of the message
+ * - the message is wholly contained in @xdr's head iovec
+ */
+static int xs_send_rm_and_kvec(struct socket *sock, struct xdr_buf *xdr,
+ unsigned int remainder)
+{
+ struct msghdr msg = {
+ .msg_flags = XS_SENDMSG_FLAGS | (remainder ? MSG_MORE : 0)
+ };
+ rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+ (u32)xdr->len);
+ struct kvec iov[2] = {
+ {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker)
+ },
+ {
+ .iov_base = xdr->head[0].iov_base,
+ .iov_len = xdr->head[0].iov_len
+ },
+ };
+ int ret;
+
+ ret = kernel_sendmsg(sock, &msg, iov, 2,
+ iov[0].iov_len + iov[1].iov_len);
+ if (ret < 0)
+ return ret;
+ if (ret < iov[0].iov_len)
+ return -EPIPE;
+ return ret - iov[0].iov_len;
+}
+
static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
{
struct msghdr msg = {
@@ -779,7 +813,11 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (base < xdr->head[0].iov_len || addr != NULL) {
unsigned int len = xdr->head[0].iov_len - base;
remainder -= len;
- err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+ if (!base && !addr)
+ err = xs_send_rm_and_kvec(sock, xdr, remainder);
+ else
+ err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0],
+ base, remainder != 0);
if (remainder == 0 || err != len)
goto out;
*sent_p += err;
@@ -869,16 +907,6 @@ static int xs_nospace(struct rpc_rqst *req)
return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
}

-/*
- * Construct a stream transport record marker in @buf.
- */
-static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
-{
- u32 reclen = buf->len - sizeof(rpc_fraghdr);
- rpc_fraghdr *base = buf->head[0].iov_base;
- *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
-}
-
/**
* xs_local_send_request - write an RPC request to an AF_LOCAL socket
* @req: pointer to RPC request
@@ -905,8 +933,6 @@ static int xs_local_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}

- xs_encode_stream_record_marker(&req->rq_snd_buf);
-
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);

@@ -1057,8 +1083,6 @@ static int xs_tcp_send_request(struct rpc_rqst *req)
return -ENOTCONN;
}

- xs_encode_stream_record_marker(&req->rq_snd_buf);
-
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
@@ -2534,26 +2558,35 @@ static int bc_sendto(struct rpc_rqst *req)
{
int len;
struct xdr_buf *xbufp = &req->rq_snd_buf;
- struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct socket *sock = transport->sock;
+ container_of(req->rq_xprt, struct sock_xprt, xprt);
unsigned long headoff;
unsigned long tailoff;
+ struct page *tailpage;
+ struct msghdr msg = {
+ .msg_flags = MSG_MORE
+ };
+ rpc_fraghdr marker = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT |
+ (u32)xbufp->len);
+ struct kvec iov = {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker),
+ };

- xs_encode_stream_record_marker(xbufp);
+ len = kernel_sendmsg(transport->sock, &msg, &iov, 1, iov.iov_len);
+ if (len != iov.iov_len)
+ return -EAGAIN;

+ tailpage = NULL;
+ if (xbufp->tail[0].iov_len)
+ tailpage = virt_to_page(xbufp->tail[0].iov_base);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
- len = svc_send_common(sock, xbufp,
+ len = svc_send_common(transport->sock, xbufp,
virt_to_page(xbufp->head[0].iov_base), headoff,
- xbufp->tail[0].iov_base, tailoff);
-
- if (len != xbufp->len) {
- printk(KERN_NOTICE "Error sending entire callback!\n");
- len = -EAGAIN;
- }
-
+ tailpage, tailoff);
+ if (len != xbufp->len)
+ return -EAGAIN;
return len;
}

@@ -2793,7 +2826,6 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);

xprt->prot = 0;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;

xprt->bind_timeout = XS_BIND_TO;
@@ -2862,7 +2894,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);

xprt->prot = IPPROTO_UDP;
- xprt->tsh_size = 0;
/* XXX: header size can vary due to auth type, IPv6, etc. */
xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);

@@ -2942,7 +2973,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);

xprt->prot = IPPROTO_TCP;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;

xprt->bind_timeout = XS_BIND_TO;
@@ -3015,7 +3045,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
transport = container_of(xprt, struct sock_xprt, xprt);

xprt->prot = IPPROTO_TCP;
- xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
xprt->timeout = &xs_tcp_default_timeout;



2019-02-11 16:24:46

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 12/23] SUNRPC: Add build option to disable support for insecure enctypes

Enable distributions to enforce the rejection of ancient and
insecure Kerberos enctypes in the kernel's RPCSEC_GSS
implementation. These are the single-DES encryption types that
were deprecated in 2012 by RFC 6649.

Enctypes that were deprecated more recently (by RFC 8429) remain
fully supported for now because they are still likely to be widely
used.

Signed-off-by: Chuck Lever <[email protected]>
Acked-by: Simo Sorce <[email protected]>
---
include/linux/sunrpc/gss_krb5_enctypes.h | 42 +++++++++++++++++++++++++++++-
net/sunrpc/Kconfig | 16 +++++++++++
net/sunrpc/auth_gss/gss_krb5_mech.c | 2 +
3 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/include/linux/sunrpc/gss_krb5_enctypes.h b/include/linux/sunrpc/gss_krb5_enctypes.h
index ec6234e..981c89c 100644
--- a/include/linux/sunrpc/gss_krb5_enctypes.h
+++ b/include/linux/sunrpc/gss_krb5_enctypes.h
@@ -1,4 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
/*
- * Dumb way to share this static piece of information with nfsd
+ * Define the string that exports the set of kernel-supported
+ * Kerberos enctypes. This list is sent via upcall to gssd, and
+ * is also exposed via the nfsd /proc API. The consumers generally
+ * treat this as an ordered list, where the first item in the list
+ * is the most preferred.
+ */
+
+#ifndef _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+#define _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H
+
+#ifdef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 (DES3_CBC_SHA1 and ARCFOUR_HMAC).
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ */
+#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23"
+
+#else /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+/*
+ * NB: This list includes encryption types that were deprecated
+ * by RFC 8429 and RFC 6649.
+ *
+ * ENCTYPE_AES256_CTS_HMAC_SHA1_96
+ * ENCTYPE_AES128_CTS_HMAC_SHA1_96
+ * ENCTYPE_DES3_CBC_SHA1
+ * ENCTYPE_ARCFOUR_HMAC
+ * ENCTYPE_DES_CBC_MD5
+ * ENCTYPE_DES_CBC_CRC
+ * ENCTYPE_DES_CBC_MD4
*/
#define KRB5_SUPPORTED_ENCTYPES "18,17,16,23,3,1,2"
+
+#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
+
+#endif /* _LINUX_SUNRPC_GSS_KRB5_ENCTYPES_H */
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index ac09ca8..83f5617 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -34,6 +34,22 @@ config RPCSEC_GSS_KRB5

If unsure, say Y.

+config CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+ bool "Secure RPC: Disable insecure Kerberos encryption types"
+ depends on RPCSEC_GSS_KRB5
+ default n
+ help
+ Choose Y here to disable the use of deprecated encryption types
+ with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
+ deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
+ and DES-CBC-MD4. These types were deprecated by RFC 6649 because
+ they were found to be insecure.
+
+ N is the default because many sites have deployed KDCs and
+ keytabs that contain only these deprecated encryption types.
+ Choosing Y prevents the use of known-insecure encryption types
+ but might result in compatibility problems.
+
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
depends on SUNRPC && SYSCTL
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index eab71fc..be31a58 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -53,6 +53,7 @@
static struct gss_api_mech gss_kerberos_mech; /* forward declaration */

static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
/*
* DES (All DES enctypes are mapped to the same gss functionality)
*/
@@ -74,6 +75,7 @@
.cksumlength = 8,
.keyed_cksum = 0,
},
+#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
/*
* RC4-HMAC
*/


2019-02-11 16:24:52

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 13/23] SUNRPC: Use struct xdr_stream when constructing RPC Call header

Modernize and harden the code path that constructs each RPC Call
message.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/auth.h | 15 ++-
include/linux/sunrpc/xdr.h | 6 +
include/trace/events/sunrpc.h | 29 ++++++
net/sunrpc/auth.c | 56 ++++++++----
net/sunrpc/auth_gss/auth_gss.c | 191 +++++++++++++++++++---------------------
net/sunrpc/auth_null.c | 23 +++--
net/sunrpc/auth_unix.c | 61 ++++++++-----
net/sunrpc/clnt.c | 66 +++++++-------
8 files changed, 266 insertions(+), 181 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index eed3cb1..96e237f 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -131,11 +131,12 @@ struct rpc_credops {
void (*crdestroy)(struct rpc_cred *);

int (*crmatch)(struct auth_cred *, struct rpc_cred *, int);
- __be32 * (*crmarshal)(struct rpc_task *, __be32 *);
+ int (*crmarshal)(struct rpc_task *task,
+ struct xdr_stream *xdr);
int (*crrefresh)(struct rpc_task *);
__be32 * (*crvalidate)(struct rpc_task *, __be32 *);
- int (*crwrap_req)(struct rpc_task *, kxdreproc_t,
- void *, __be32 *, void *);
+ int (*crwrap_req)(struct rpc_task *task,
+ struct xdr_stream *xdr);
int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
void *, __be32 *, void *);
int (*crkey_timeout)(struct rpc_cred *);
@@ -165,9 +166,13 @@ int rpcauth_get_gssinfo(rpc_authflavor_t,
void rpcauth_init_cred(struct rpc_cred *, const struct auth_cred *, struct rpc_auth *, const struct rpc_credops *);
struct rpc_cred * rpcauth_lookupcred(struct rpc_auth *, int);
void put_rpccred(struct rpc_cred *);
-__be32 * rpcauth_marshcred(struct rpc_task *, __be32 *);
+int rpcauth_marshcred(struct rpc_task *task,
+ struct xdr_stream *xdr);
__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *);
-int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
+int rpcauth_wrap_req_encode(struct rpc_task *task,
+ struct xdr_stream *xdr);
+int rpcauth_wrap_req(struct rpc_task *task,
+ struct xdr_stream *xdr);
int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
bool rpcauth_xmit_need_reencode(struct rpc_task *task);
int rpcauth_refreshcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 787939d..6df9ac1 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -87,6 +87,12 @@ struct xdr_buf {
#define xdr_one cpu_to_be32(1)
#define xdr_two cpu_to_be32(2)

+#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL)
+#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
+#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)
+
+#define rpc_call cpu_to_be32(RPC_CALL)
+
#define rpc_success cpu_to_be32(RPC_SUCCESS)
#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL)
#define rpc_prog_mismatch cpu_to_be32(RPC_PROG_MISMATCH)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 6276508..2b3f9d1 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -213,6 +213,35 @@
DEFINE_RPC_QUEUED_EVENT(sleep);
DEFINE_RPC_QUEUED_EVENT(wakeup);

+DECLARE_EVENT_CLASS(rpc_failure,
+
+ TP_PROTO(const struct rpc_task *task),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+ TP_printk("task:%u@%u",
+ __entry->task_id, __entry->client_id)
+);
+
+#define DEFINE_RPC_FAILURE(name) \
+ DEFINE_EVENT(rpc_failure, rpc_bad_##name, \
+ TP_PROTO( \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(task))
+
+DEFINE_RPC_FAILURE(callhdr);
+
TRACE_EVENT(rpc_stats_latency,

TP_PROTO(
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 275e84e..add2135 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -756,12 +756,21 @@ struct rpc_cred *
}
EXPORT_SYMBOL_GPL(put_rpccred);

-__be32 *
-rpcauth_marshcred(struct rpc_task *task, __be32 *p)
+/**
+ * rpcauth_marshcred - Append RPC credential to end of @xdr
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing initial portion of RPC Call header
+ *
+ * On success, an appropriate verifier is added to @xdr, @xdr is
+ * updated to point past the verifier, and zero is returned.
+ * Otherwise, @xdr is in an undefined state and a negative errno
+ * is returned.
+ */
+int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;

- return cred->cr_ops->crmarshal(task, p);
+ return ops->crmarshal(task, xdr);
}

__be32 *
@@ -772,26 +781,37 @@ struct rpc_cred *
return cred->cr_ops->crvalidate(task, p);
}

-static void rpcauth_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req_encode - XDR encode the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message.
+ * Otherwise, @xdr is in an undefined state.
+ */
+int rpcauth_wrap_req_encode(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_stream xdr;
+ kxdreproc_t encode = task->tk_msg.rpc_proc->p_encode;

- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, data, rqstp);
- encode(rqstp, &xdr, obj);
+ encode(task->tk_rqstp, xdr, task->tk_msg.rpc_argp);
+ return 0;
}
+EXPORT_SYMBOL_GPL(rpcauth_wrap_req_encode);

-int
-rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp,
- __be32 *data, void *obj)
+/**
+ * rpcauth_wrap_req - XDR encode and wrap the RPC procedure
+ * @task: controlling RPC task
+ * @xdr: stream where on-the-wire bytes are to be marshalled
+ *
+ * On success, @xdr contains the encoded and wrapped message,
+ * and zero is returned. Otherwise, @xdr is in an undefined
+ * state and a negative errno is returned.
+ */
+int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;

- if (cred->cr_ops->crwrap_req)
- return cred->cr_ops->crwrap_req(task, encode, rqstp, data, obj);
- /* By default, we encode the arguments normally. */
- rpcauth_wrap_req_encode(encode, rqstp, data, obj);
- return 0;
+ return ops->crwrap_req(task, xdr);
}

static int
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 4b52e2b..b333b1b 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1526,18 +1526,20 @@ static void gss_pipe_free(struct gss_pipe *p)
}

/*
-* Marshal credentials.
-* Maybe we should keep a cached credential for performance reasons.
-*/
-static __be32 *
-gss_marshal(struct rpc_task *task, __be32 *p)
+ * Marshal credentials.
+ *
+ * The expensive part is computing the verifier. We can't cache a
+ * pre-computed version of the verifier because the seqno, which
+ * is different every time, is included in the MIC.
+ */
+static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *cred_len;
+ __be32 *p, *cred_len;
u32 maj_stat = 0;
struct xdr_netobj mic;
struct kvec iov;
@@ -1545,7 +1547,13 @@ static void gss_pipe_free(struct gss_pipe *p)

dprintk("RPC: %5u %s\n", task->tk_pid, __func__);

- *p++ = htonl(RPC_AUTH_GSS);
+ /* Credential */
+
+ p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
+ ctx->gc_wire_ctx.len);
+ if (!p)
+ goto out_put_ctx;
+ *p++ = rpc_auth_gss;
cred_len = p++;

spin_lock(&ctx->gc_seq_lock);
@@ -1554,12 +1562,14 @@ static void gss_pipe_free(struct gss_pipe *p)
if (req->rq_seqno == MAXSEQ)
goto out_expired;

- *p++ = htonl((u32) RPC_GSS_VERSION);
- *p++ = htonl((u32) ctx->gc_proc);
- *p++ = htonl((u32) req->rq_seqno);
- *p++ = htonl((u32) gss_cred->gc_service);
+ *p++ = cpu_to_be32(RPC_GSS_VERSION);
+ *p++ = cpu_to_be32(ctx->gc_proc);
+ *p++ = cpu_to_be32(req->rq_seqno);
+ *p++ = cpu_to_be32(gss_cred->gc_service);
p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
- *cred_len = htonl((p - (cred_len + 1)) << 2);
+ *cred_len = cpu_to_be32((p - (cred_len + 1)) << 2);
+
+ /* Verifier */

/* We compute the checksum for the verifier over the xdr-encoded bytes
* starting with the xid and ending at the end of the credential: */
@@ -1567,27 +1577,27 @@ static void gss_pipe_free(struct gss_pipe *p)
iov.iov_len = (u8 *)p - (u8 *)iov.iov_base;
xdr_buf_from_iov(&iov, &verf_buf);

- /* set verifier flavor*/
- *p++ = htonl(RPC_AUTH_GSS);
-
+ p = xdr_reserve_space(xdr, sizeof(*p));
+ if (!p)
+ goto out_put_ctx;
+ *p++ = rpc_auth_gss;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
- if (maj_stat == GSS_S_CONTEXT_EXPIRED) {
+ if (maj_stat == GSS_S_CONTEXT_EXPIRED)
goto out_expired;
- } else if (maj_stat != 0) {
- pr_warn("gss_marshal: gss_get_mic FAILED (%d)\n", maj_stat);
- task->tk_status = -EIO;
+ else if (maj_stat != 0)
+ goto out_put_ctx;
+ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
goto out_put_ctx;
- }
- p = xdr_encode_opaque(p, NULL, mic.len);
gss_put_ctx(ctx);
- return p;
+ return 0;
out_expired:
+ gss_put_ctx(ctx);
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- task->tk_status = -EKEYEXPIRED;
+ return -EKEYEXPIRED;
out_put_ctx:
gss_put_ctx(ctx);
- return NULL;
+ return -EMSGSIZE;
}

static int gss_renew_cred(struct rpc_task *task)
@@ -1716,61 +1726,45 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
return ret;
}

-static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
-{
- struct xdr_stream xdr;
-
- xdr_init_encode(&xdr, &rqstp->rq_snd_buf, p, rqstp);
- encode(rqstp, &xdr, obj);
-}
-
-static inline int
-gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
+static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
- struct xdr_buf integ_buf;
- __be32 *integ_len = NULL;
+ struct rpc_rqst *rqstp = task->tk_rqstp;
+ struct xdr_buf integ_buf, *snd_buf = &rqstp->rq_snd_buf;
struct xdr_netobj mic;
- u32 offset;
- __be32 *q;
- struct kvec *iov;
- u32 maj_stat = 0;
- int status = -EIO;
+ __be32 *p, *integ_len;
+ u32 offset, maj_stat;

+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto wrap_failed;
integ_len = p++;
- offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
- *p++ = htonl(rqstp->rq_seqno);
+ *p = cpu_to_be32(rqstp->rq_seqno);

- gss_wrap_req_encode(encode, rqstp, p, obj);
+ if (rpcauth_wrap_req_encode(task, xdr))
+ goto wrap_failed;

+ offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
if (xdr_buf_subsegment(snd_buf, &integ_buf,
offset, snd_buf->len - offset))
- return status;
- *integ_len = htonl(integ_buf.len);
+ goto wrap_failed;
+ *integ_len = cpu_to_be32(integ_buf.len);

- /* guess whether we're in the head or the tail: */
- if (snd_buf->page_len || snd_buf->tail[0].iov_len)
- iov = snd_buf->tail;
- else
- iov = snd_buf->head;
- p = iov->iov_base + iov->iov_len;
+ p = xdr_reserve_space(xdr, 0);
+ if (!p)
+ goto wrap_failed;
mic.data = (u8 *)(p + 1);
-
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
- status = -EIO; /* XXX? */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- return status;
- q = xdr_encode_opaque(p, NULL, mic.len);
-
- offset = (u8 *)q - (u8 *)p;
- iov->iov_len += offset;
- snd_buf->len += offset;
+ goto wrap_failed;
+ /* Check that the trailing MIC fit in the buffer, after the fact */
+ if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
+ goto wrap_failed;
return 0;
+wrap_failed:
+ return -EMSGSIZE;
}

static void
@@ -1821,61 +1815,63 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
return -EAGAIN;
}

-static inline int
-gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- kxdreproc_t encode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
+static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
+ struct rpc_task *task, struct xdr_stream *xdr)
{
+ struct rpc_rqst *rqstp = task->tk_rqstp;
struct xdr_buf *snd_buf = &rqstp->rq_snd_buf;
- u32 offset;
- u32 maj_stat;
+ u32 pad, offset, maj_stat;
int status;
- __be32 *opaque_len;
+ __be32 *p, *opaque_len;
struct page **inpages;
int first;
- int pad;
struct kvec *iov;
- char *tmp;

+ status = -EIO;
+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto wrap_failed;
opaque_len = p++;
- offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
- *p++ = htonl(rqstp->rq_seqno);
+ *p = cpu_to_be32(rqstp->rq_seqno);

- gss_wrap_req_encode(encode, rqstp, p, obj);
+ if (rpcauth_wrap_req_encode(task, xdr))
+ goto wrap_failed;

status = alloc_enc_pages(rqstp);
- if (status)
- return status;
+ if (unlikely(status))
+ goto wrap_failed;
first = snd_buf->page_base >> PAGE_SHIFT;
inpages = snd_buf->pages + first;
snd_buf->pages = rqstp->rq_enc_pages;
snd_buf->page_base -= first << PAGE_SHIFT;
/*
- * Give the tail its own page, in case we need extra space in the
- * head when wrapping:
+ * Move the tail into its own page, in case gss_wrap needs
+ * more space in the head when wrapping.
*
- * call_allocate() allocates twice the slack space required
- * by the authentication flavor to rq_callsize.
- * For GSS, slack is GSS_CRED_SLACK.
+ * Still... Why can't gss_wrap just slide the tail down?
*/
if (snd_buf->page_len || snd_buf->tail[0].iov_len) {
+ char *tmp;
+
tmp = page_address(rqstp->rq_enc_pages[rqstp->rq_enc_pages_num - 1]);
memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
snd_buf->tail[0].iov_base = tmp;
}
+ status = -EIO;
+ offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
- BUG_ON(snd_buf->len > snd_buf->buflen);
- status = -EIO;
+ if (unlikely(snd_buf->len > snd_buf->buflen))
+ goto wrap_failed;
/* We're assuming that when GSS_S_CONTEXT_EXPIRED, the encryption was
* done anyway, so it's safe to put the request on the wire: */
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- return status;
+ goto wrap_failed;

- *opaque_len = htonl(snd_buf->len - offset);
- /* guess whether we're in the head or the tail: */
+ *opaque_len = cpu_to_be32(snd_buf->len - offset);
+ /* guess whether the pad goes into the head or the tail: */
if (snd_buf->page_len || snd_buf->tail[0].iov_len)
iov = snd_buf->tail;
else
@@ -1887,37 +1883,36 @@ static void gss_wrap_req_encode(kxdreproc_t encode, struct rpc_rqst *rqstp,
snd_buf->len += pad;

return 0;
+wrap_failed:
+ return status;
}

-static int
-gss_wrap_req(struct rpc_task *task,
- kxdreproc_t encode, void *rqstp, __be32 *p, void *obj)
+static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- int status = -EIO;
+ int status;

dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
* wrapping context destruction requests makes the most sense.
*/
- gss_wrap_req_encode(encode, rqstp, p, obj);
- status = 0;
+ status = rpcauth_wrap_req_encode(task, xdr);
goto out;
}
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
- gss_wrap_req_encode(encode, rqstp, p, obj);
- status = 0;
+ status = rpcauth_wrap_req_encode(task, xdr);
break;
case RPC_GSS_SVC_INTEGRITY:
- status = gss_wrap_req_integ(cred, ctx, encode, rqstp, p, obj);
+ status = gss_wrap_req_integ(cred, ctx, task, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
- status = gss_wrap_req_priv(cred, ctx, encode, rqstp, p, obj);
+ status = gss_wrap_req_priv(cred, ctx, task, xdr);
break;
}
out:
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index d0ceac5..797f8472 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -59,15 +59,21 @@
/*
* Marshal credential.
*/
-static __be32 *
-nul_marshal(struct rpc_task *task, __be32 *p)
+static int
+nul_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = 0;
- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = 0;
-
- return p;
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 4 * sizeof(*p));
+ if (!p)
+ return -EMSGSIZE;
+ /* Credential */
+ *p++ = rpc_auth_null;
+ *p++ = xdr_zero;
+ /* Verifier */
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;
+ return 0;
}

/*
@@ -125,6 +131,7 @@ struct rpc_auth null_auth = {
.crdestroy = nul_destroy_cred,
.crmatch = nul_match,
.crmarshal = nul_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = nul_refresh,
.crvalidate = nul_validate,
};
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index fc8a591..1d5b7ed 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -99,37 +99,55 @@
* Marshal credentials.
* Maybe we should keep a cached credential for performance reasons.
*/
-static __be32 *
-unx_marshal(struct rpc_task *task, __be32 *p)
+static int
+unx_marshal(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
- __be32 *base, *hold;
+ __be32 *p, *cred_len, *gidarr_len;
int i;
struct group_info *gi = cred->cr_cred->group_info;

- *p++ = htonl(RPC_AUTH_UNIX);
- base = p++;
- *p++ = htonl(jiffies/HZ);
-
- /*
- * Copy the UTS nodename captured when the client was created.
- */
- p = xdr_encode_array(p, clnt->cl_nodename, clnt->cl_nodelen);
-
- *p++ = htonl((u32) from_kuid(&init_user_ns, cred->cr_cred->fsuid));
- *p++ = htonl((u32) from_kgid(&init_user_ns, cred->cr_cred->fsgid));
- hold = p++;
+ /* Credential */
+
+ p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_unix;
+ cred_len = p++;
+ *p++ = xdr_zero; /* stamp */
+ if (xdr_stream_encode_opaque(xdr, clnt->cl_nodename,
+ clnt->cl_nodelen) < 0)
+ goto marshal_failed;
+ p = xdr_reserve_space(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = cpu_to_be32(from_kuid(&init_user_ns, cred->cr_cred->fsuid));
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns, cred->cr_cred->fsgid));
+
+ gidarr_len = p++;
if (gi)
for (i = 0; i < UNX_NGROUPS && i < gi->ngroups; i++)
- *p++ = htonl((u32) from_kgid(&init_user_ns, gi->gid[i]));
- *hold = htonl(p - hold - 1); /* gid array length */
- *base = htonl((p - base - 1) << 2); /* cred length */
+ *p++ = cpu_to_be32(from_kgid(&init_user_ns,
+ gi->gid[i]));
+ *gidarr_len = cpu_to_be32(p - gidarr_len - 1);
+ *cred_len = cpu_to_be32((p - cred_len - 1) << 2);
+ p = xdr_reserve_space(xdr, (p - gidarr_len - 1) << 2);
+ if (!p)
+ goto marshal_failed;
+
+ /* Verifier */
+
+ p = xdr_reserve_space(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto marshal_failed;
+ *p++ = rpc_auth_null;
+ *p = xdr_zero;

- *p++ = htonl(RPC_AUTH_NULL);
- *p++ = htonl(0);
+ return 0;

- return p;
+marshal_failed:
+ return -EMSGSIZE;
}

/*
@@ -202,6 +220,7 @@ struct rpc_auth unix_auth = {
.crdestroy = unx_destroy_cred,
.crmatch = unx_match,
.crmarshal = unx_marshal,
+ .crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = unx_refresh,
.crvalidate = unx_validate,
};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c4203f6..d6750b7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -77,7 +77,8 @@
static void call_connect(struct rpc_task *task);
static void call_connect_status(struct rpc_task *task);

-static __be32 *rpc_encode_header(struct rpc_task *task);
+static int rpc_encode_header(struct rpc_task *task,
+ struct xdr_stream *xdr);
static __be32 *rpc_verify_header(struct rpc_task *task);
static int rpc_ping(struct rpc_clnt *clnt);

@@ -1728,10 +1729,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
rpc_xdr_encode(struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- kxdreproc_t encode;
- __be32 *p;
-
- dprint_status(task);
+ struct xdr_stream xdr;

xdr_buf_init(&req->rq_snd_buf,
req->rq_buffer,
@@ -1740,18 +1738,13 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
req->rq_rbuffer,
req->rq_rcvsize);

- p = rpc_encode_header(task);
- if (p == NULL)
+ req->rq_snd_buf.head[0].iov_len = 0;
+ xdr_init_encode(&xdr, &req->rq_snd_buf,
+ req->rq_snd_buf.head[0].iov_base, req);
+ if (rpc_encode_header(task, &xdr))
return;

- encode = task->tk_msg.rpc_proc->p_encode;
- if (encode == NULL)
- return;
-
- task->tk_status = rpcauth_wrap_req(task, encode, req, p,
- task->tk_msg.rpc_argp);
- if (task->tk_status == 0)
- xprt_request_prepare(req);
+ task->tk_status = rpcauth_wrap_req(task, &xdr);
}

/*
@@ -1762,6 +1755,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
{
if (!rpc_task_need_encode(task))
goto out;
+ dprint_status(task);
/* Encode here so that rpcsec_gss can use correct sequence number. */
rpc_xdr_encode(task);
/* Did the encode result in an error condition? */
@@ -1779,6 +1773,8 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
rpc_exit(task, task->tk_status);
}
return;
+ } else {
+ xprt_request_prepare(task->tk_rqstp);
}

/* Add task to reply queue before transmission to avoid races */
@@ -2322,25 +2318,33 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
}
}

-static __be32 *
-rpc_encode_header(struct rpc_task *task)
+static int
+rpc_encode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- __be32 *p = req->rq_svec[0].iov_base;
-
- /* FIXME: check buffer size? */
-
- *p++ = req->rq_xid; /* XID */
- *p++ = htonl(RPC_CALL); /* CALL */
- *p++ = htonl(RPC_VERSION); /* RPC version */
- *p++ = htonl(clnt->cl_prog); /* program number */
- *p++ = htonl(clnt->cl_vers); /* program version */
- *p++ = htonl(task->tk_msg.rpc_proc->p_proc); /* procedure */
- p = rpcauth_marshcred(task, p);
- if (p)
- req->rq_slen = xdr_adjust_iovec(&req->rq_svec[0], p);
- return p;
+ __be32 *p;
+ int error;
+
+ error = -EMSGSIZE;
+ p = xdr_reserve_space(xdr, RPC_CALLHDRSIZE << 2);
+ if (!p)
+ goto out_fail;
+ *p++ = req->rq_xid;
+ *p++ = rpc_call;
+ *p++ = cpu_to_be32(RPC_VERSION);
+ *p++ = cpu_to_be32(clnt->cl_prog);
+ *p++ = cpu_to_be32(clnt->cl_vers);
+ *p = cpu_to_be32(task->tk_msg.rpc_proc->p_proc);
+
+ error = rpcauth_marshcred(task, xdr);
+ if (error < 0)
+ goto out_fail;
+ return 0;
+out_fail:
+ trace_rpc_bad_callhdr(task);
+ rpc_exit(task, error);
+ return error;
}

static __be32 *


2019-02-11 16:24:57

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 14/23] SUNRPC: Clean up rpc_verify_header()

- Recover some instruction count because I'm about to introduce a
few xdr_inline_decode call sites
- Replace dprintk() call sites with trace points
- Reduce the hot path so it fits in fewer cachelines

I've also renamed it rpc_decode_header() to match everything else
in the RPC client.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/xdr.h | 7 +
include/trace/events/sunrpc.h | 52 ++++++++++
net/sunrpc/clnt.c | 223 ++++++++++++++++++-----------------------
3 files changed, 154 insertions(+), 128 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 6df9ac1..c540419 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -92,6 +92,9 @@ struct xdr_buf {
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)

#define rpc_call cpu_to_be32(RPC_CALL)
+#define rpc_reply cpu_to_be32(RPC_REPLY)
+
+#define rpc_msg_accepted cpu_to_be32(RPC_MSG_ACCEPTED)

#define rpc_success cpu_to_be32(RPC_SUCCESS)
#define rpc_prog_unavail cpu_to_be32(RPC_PROG_UNAVAIL)
@@ -101,6 +104,9 @@ struct xdr_buf {
#define rpc_system_err cpu_to_be32(RPC_SYSTEM_ERR)
#define rpc_drop_reply cpu_to_be32(RPC_DROP_REPLY)

+#define rpc_mismatch cpu_to_be32(RPC_MISMATCH)
+#define rpc_auth_error cpu_to_be32(RPC_AUTH_ERROR)
+
#define rpc_auth_ok cpu_to_be32(RPC_AUTH_OK)
#define rpc_autherr_badcred cpu_to_be32(RPC_AUTH_BADCRED)
#define rpc_autherr_rejectedcred cpu_to_be32(RPC_AUTH_REJECTEDCRED)
@@ -109,7 +115,6 @@ struct xdr_buf {
#define rpc_autherr_tooweak cpu_to_be32(RPC_AUTH_TOOWEAK)
#define rpcsec_gsserr_credproblem cpu_to_be32(RPCSEC_GSS_CREDPROBLEM)
#define rpcsec_gsserr_ctxproblem cpu_to_be32(RPCSEC_GSS_CTXPROBLEM)
-#define rpc_autherr_oldseqnum cpu_to_be32(101)

/*
* Miscellaneous XDR helper functions
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 2b3f9d1..0654e9c 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -241,6 +241,58 @@
TP_ARGS(task))

DEFINE_RPC_FAILURE(callhdr);
+DEFINE_RPC_FAILURE(verifier);
+
+DECLARE_EVENT_CLASS(rpc_reply_event,
+
+ TP_PROTO(
+ const struct rpc_task *task
+ ),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __string(progname, task->tk_client->cl_program->name)
+ __field(u32, version)
+ __string(procname, rpc_proc_name(task))
+ __string(servername, task->tk_xprt->servername)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+ __assign_str(progname, task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procname, rpc_proc_name(task))
+ __assign_str(servername, task->tk_xprt->servername)
+ ),
+
+ TP_printk("task:%u@%d server=%s xid=0x%08x %sv%d %s",
+ __entry->task_id, __entry->client_id, __get_str(servername),
+ __entry->xid, __get_str(progname), __entry->version,
+ __get_str(procname))
+)
+
+#define DEFINE_RPC_REPLY_EVENT(name) \
+ DEFINE_EVENT(rpc_reply_event, rpc__##name, \
+ TP_PROTO( \
+ const struct rpc_task *task \
+ ), \
+ TP_ARGS(task))
+
+DEFINE_RPC_REPLY_EVENT(prog_unavail);
+DEFINE_RPC_REPLY_EVENT(prog_mismatch);
+DEFINE_RPC_REPLY_EVENT(proc_unavail);
+DEFINE_RPC_REPLY_EVENT(garbage_args);
+DEFINE_RPC_REPLY_EVENT(unparsable);
+DEFINE_RPC_REPLY_EVENT(mismatch);
+DEFINE_RPC_REPLY_EVENT(stale_creds);
+DEFINE_RPC_REPLY_EVENT(bad_creds);
+DEFINE_RPC_REPLY_EVENT(auth_tooweak);

TRACE_EVENT(rpc_stats_latency,

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index d6750b7..e973508 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,7 @@

static int rpc_encode_header(struct rpc_task *task,
struct xdr_stream *xdr);
-static __be32 *rpc_verify_header(struct rpc_task *task);
+static __be32 *rpc_decode_header(struct rpc_task *task);
static int rpc_ping(struct rpc_clnt *clnt);

static void rpc_register_client(struct rpc_clnt *clnt)
@@ -2292,7 +2292,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
goto out_retry;
}

- p = rpc_verify_header(task);
+ p = rpc_decode_header(task);
if (IS_ERR(p)) {
if (p == ERR_PTR(-EAGAIN))
goto out_retry;
@@ -2308,7 +2308,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
return;
out_retry:
task->tk_status = 0;
- /* Note: rpc_verify_header() may have freed the RPC slot */
+ /* Note: rpc_decode_header() may have freed the RPC slot */
if (task->tk_rqstp == req) {
xdr_free_bvec(&req->rq_rcv_buf);
req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
@@ -2347,164 +2347,133 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
return error;
}

-static __be32 *
-rpc_verify_header(struct rpc_task *task)
+static noinline __be32 *
+rpc_decode_header(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
__be32 *p = iov->iov_base;
- u32 n;
int error = -EACCES;

- if ((task->tk_rqstp->rq_rcv_buf.len & 3) != 0) {
- /* RFC-1014 says that the representation of XDR data must be a
- * multiple of four bytes
- * - if it isn't pointer subtraction in the NFS client may give
- * undefined results
- */
- dprintk("RPC: %5u %s: XDR representation not a multiple of"
- " 4 bytes: 0x%x\n", task->tk_pid, __func__,
- task->tk_rqstp->rq_rcv_buf.len);
- error = -EIO;
- goto out_err;
- }
+ /* RFC-1014 says that the representation of XDR data must be a
+ * multiple of four bytes
+ * - if it isn't pointer subtraction in the NFS client may give
+ * undefined results
+ */
+ if (task->tk_rqstp->rq_rcv_buf.len & 3)
+ goto out_badlen;
if ((len -= 3) < 0)
- goto out_overflow;
+ goto out_unparsable;

- p += 1; /* skip XID */
- if ((n = ntohl(*p++)) != RPC_REPLY) {
- dprintk("RPC: %5u %s: not an RPC reply: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- goto out_garbage;
- }
+ p++; /* skip XID */
+ if (*p++ != rpc_reply)
+ goto out_unparsable;
+ if (*p++ != rpc_msg_accepted)
+ goto out_msg_denied;

- if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
- if (--len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_AUTH_ERROR:
- break;
- case RPC_MISMATCH:
- dprintk("RPC: %5u %s: RPC call version mismatch!\n",
- task->tk_pid, __func__);
- error = -EPROTONOSUPPORT;
- goto out_err;
- default:
- dprintk("RPC: %5u %s: RPC call rejected, "
- "unknown error: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- goto out_err;
- }
- if (--len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_AUTH_REJECTEDCRED:
- case RPC_AUTH_REJECTEDVERF:
- case RPCSEC_GSS_CREDPROBLEM:
- case RPCSEC_GSS_CTXPROBLEM:
- if (!task->tk_cred_retry)
- break;
- task->tk_cred_retry--;
- dprintk("RPC: %5u %s: retry stale creds\n",
- task->tk_pid, __func__);
- rpcauth_invalcred(task);
- /* Ensure we obtain a new XID! */
- xprt_release(task);
- task->tk_action = call_reserve;
- goto out_retry;
- case RPC_AUTH_BADCRED:
- case RPC_AUTH_BADVERF:
- /* possibly garbled cred/verf? */
- if (!task->tk_garb_retry)
- break;
- task->tk_garb_retry--;
- dprintk("RPC: %5u %s: retry garbled creds\n",
- task->tk_pid, __func__);
- task->tk_action = call_encode;
- goto out_retry;
- case RPC_AUTH_TOOWEAK:
- printk(KERN_NOTICE "RPC: server %s requires stronger "
- "authentication.\n",
- task->tk_xprt->servername);
- break;
- default:
- dprintk("RPC: %5u %s: unknown auth error: %x\n",
- task->tk_pid, __func__, n);
- error = -EIO;
- }
- dprintk("RPC: %5u %s: call rejected %d\n",
- task->tk_pid, __func__, n);
- goto out_err;
- }
p = rpcauth_checkverf(task, p);
- if (IS_ERR(p)) {
- error = PTR_ERR(p);
- dprintk("RPC: %5u %s: auth check failed with %d\n",
- task->tk_pid, __func__, error);
- goto out_garbage; /* bad verifier, retry */
- }
+ if (IS_ERR(p))
+ goto out_verifier;
+
len = p - (__be32 *)iov->iov_base - 1;
if (len < 0)
- goto out_overflow;
- switch ((n = ntohl(*p++))) {
- case RPC_SUCCESS:
+ goto out_unparsable;
+ switch (*p++) {
+ case rpc_success:
return p;
- case RPC_PROG_UNAVAIL:
- dprintk("RPC: %5u %s: program %u is unsupported "
- "by server %s\n", task->tk_pid, __func__,
- (unsigned int)clnt->cl_prog,
- task->tk_xprt->servername);
+ case rpc_prog_unavail:
+ trace_rpc__prog_unavail(task);
error = -EPFNOSUPPORT;
goto out_err;
- case RPC_PROG_MISMATCH:
- dprintk("RPC: %5u %s: program %u, version %u unsupported "
- "by server %s\n", task->tk_pid, __func__,
- (unsigned int)clnt->cl_prog,
- (unsigned int)clnt->cl_vers,
- task->tk_xprt->servername);
+ case rpc_prog_mismatch:
+ trace_rpc__prog_mismatch(task);
error = -EPROTONOSUPPORT;
goto out_err;
- case RPC_PROC_UNAVAIL:
- dprintk("RPC: %5u %s: proc %s unsupported by program %u, "
- "version %u on server %s\n",
- task->tk_pid, __func__,
- rpc_proc_name(task),
- clnt->cl_prog, clnt->cl_vers,
- task->tk_xprt->servername);
+ case rpc_proc_unavail:
+ trace_rpc__proc_unavail(task);
error = -EOPNOTSUPP;
goto out_err;
- case RPC_GARBAGE_ARGS:
- dprintk("RPC: %5u %s: server saw garbage\n",
- task->tk_pid, __func__);
- break; /* retry */
+ case rpc_garbage_args:
+ trace_rpc__garbage_args(task);
+ break;
default:
- dprintk("RPC: %5u %s: server accept status: %x\n",
- task->tk_pid, __func__, n);
- /* Also retry */
+ trace_rpc__unparsable(task);
}

out_garbage:
clnt->cl_stats->rpcgarbage++;
if (task->tk_garb_retry) {
task->tk_garb_retry--;
- dprintk("RPC: %5u %s: retrying\n",
- task->tk_pid, __func__);
task->tk_action = call_encode;
-out_retry:
return ERR_PTR(-EAGAIN);
}
out_err:
rpc_exit(task, error);
- dprintk("RPC: %5u %s: call failed with error %d\n", task->tk_pid,
- __func__, error);
return ERR_PTR(error);
-out_overflow:
- dprintk("RPC: %5u %s: server reply was truncated.\n", task->tk_pid,
- __func__);
+
+out_badlen:
+ trace_rpc__unparsable(task);
+ error = -EIO;
+ goto out_err;
+
+out_unparsable:
+ trace_rpc__unparsable(task);
+ error = -EIO;
goto out_garbage;
+
+out_verifier:
+ trace_rpc_bad_verifier(task);
+ error = PTR_ERR(p);
+ goto out_garbage;
+
+out_msg_denied:
+ switch (*p++) {
+ case rpc_auth_error:
+ break;
+ case rpc_mismatch:
+ trace_rpc__mismatch(task);
+ error = -EPROTONOSUPPORT;
+ goto out_err;
+ default:
+ trace_rpc__unparsable(task);
+ error = -EIO;
+ goto out_err;
+ }
+
+ switch (*p++) {
+ case rpc_autherr_rejectedcred:
+ case rpc_autherr_rejectedverf:
+ case rpcsec_gsserr_credproblem:
+ case rpcsec_gsserr_ctxproblem:
+ if (!task->tk_cred_retry)
+ break;
+ task->tk_cred_retry--;
+ trace_rpc__stale_creds(task);
+ rpcauth_invalcred(task);
+ /* Ensure we obtain a new XID! */
+ xprt_release(task);
+ task->tk_action = call_reserve;
+ return ERR_PTR(-EAGAIN);
+ case rpc_autherr_badcred:
+ case rpc_autherr_badverf:
+ /* possibly garbled cred/verf? */
+ if (!task->tk_garb_retry)
+ break;
+ task->tk_garb_retry--;
+ trace_rpc__bad_creds(task);
+ task->tk_action = call_encode;
+ return ERR_PTR(-EAGAIN);
+ case rpc_autherr_tooweak:
+ trace_rpc__auth_tooweak(task);
+ pr_warn("RPC: server %s requires stronger authentication.\n",
+ task->tk_xprt->servername);
+ break;
+ default:
+ trace_rpc__unparsable(task);
+ error = -EIO;
+ }
+ goto out_err;
}

static void rpcproc_encode_null(struct rpc_rqst *rqstp, struct xdr_stream *xdr,


2019-02-11 16:25:03

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 15/23] SUNRPC: Use struct xdr_stream when decoding RPC Reply header

Modernize and harden the code path that parses an RPC Reply
message.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/auth.h | 15 ++-
include/linux/sunrpc/xdr.h | 1
net/sunrpc/auth.c | 63 ++++++++----
net/sunrpc/auth_gss/auth_gss.c | 204 ++++++++++++++++++++++------------------
net/sunrpc/auth_null.c | 31 +++---
net/sunrpc/auth_unix.c | 42 +++++---
net/sunrpc/clnt.c | 88 +++++++++--------
7 files changed, 243 insertions(+), 201 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 96e237f..c51e189 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -134,11 +134,12 @@ struct rpc_credops {
int (*crmarshal)(struct rpc_task *task,
struct xdr_stream *xdr);
int (*crrefresh)(struct rpc_task *);
- __be32 * (*crvalidate)(struct rpc_task *, __be32 *);
+ int (*crvalidate)(struct rpc_task *task,
+ struct xdr_stream *xdr);
int (*crwrap_req)(struct rpc_task *task,
struct xdr_stream *xdr);
- int (*crunwrap_resp)(struct rpc_task *, kxdrdproc_t,
- void *, __be32 *, void *);
+ int (*crunwrap_resp)(struct rpc_task *task,
+ struct xdr_stream *xdr);
int (*crkey_timeout)(struct rpc_cred *);
char * (*crstringify_acceptor)(struct rpc_cred *);
bool (*crneed_reencode)(struct rpc_task *);
@@ -168,12 +169,16 @@ int rpcauth_get_gssinfo(rpc_authflavor_t,
void put_rpccred(struct rpc_cred *);
int rpcauth_marshcred(struct rpc_task *task,
struct xdr_stream *xdr);
-__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *);
+int rpcauth_checkverf(struct rpc_task *task,
+ struct xdr_stream *xdr);
int rpcauth_wrap_req_encode(struct rpc_task *task,
struct xdr_stream *xdr);
int rpcauth_wrap_req(struct rpc_task *task,
struct xdr_stream *xdr);
-int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+int rpcauth_unwrap_resp_decode(struct rpc_task *task,
+ struct xdr_stream *xdr);
+int rpcauth_unwrap_resp(struct rpc_task *task,
+ struct xdr_stream *xdr);
bool rpcauth_xmit_need_reencode(struct rpc_task *task);
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index c540419..65af6a2 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -89,6 +89,7 @@ struct xdr_buf {

#define rpc_auth_null cpu_to_be32(RPC_AUTH_NULL)
#define rpc_auth_unix cpu_to_be32(RPC_AUTH_UNIX)
+#define rpc_auth_short cpu_to_be32(RPC_AUTH_SHORT)
#define rpc_auth_gss cpu_to_be32(RPC_AUTH_GSS)

#define rpc_call cpu_to_be32(RPC_CALL)
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index add2135..e786102 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -17,6 +17,8 @@
#include <linux/sunrpc/gss_api.h>
#include <linux/spinlock.h>

+#include <trace/events/sunrpc.h>
+
#define RPC_CREDCACHE_DEFAULT_HASHBITS (4)
struct rpc_cred_cache {
struct hlist_head *hashtable;
@@ -773,14 +775,6 @@ int rpcauth_marshcred(struct rpc_task *task, struct xdr_stream *xdr)
return ops->crmarshal(task, xdr);
}

-__be32 *
-rpcauth_checkverf(struct rpc_task *task, __be32 *p)
-{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
-
- return cred->cr_ops->crvalidate(task, p);
-}
-
/**
* rpcauth_wrap_req_encode - XDR encode the RPC procedure
* @task: controlling RPC task
@@ -814,27 +808,52 @@ int rpcauth_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
return ops->crwrap_req(task, xdr);
}

-static int
-rpcauth_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
- __be32 *data, void *obj)
+/**
+ * rpcauth_checkverf - Validate verifier in RPC Reply header
+ * @task: controlling RPC task
+ * @xdr: xdr_stream containing RPC Reply header
+ *
+ * On success, @xdr is updated to point past the verifier and
+ * zero is returned. Otherwise, @xdr is in an undefined state
+ * and a negative errno is returned.
+ */
+int
+rpcauth_checkverf(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct xdr_stream xdr;
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;

- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, data, rqstp);
- return decode(rqstp, &xdr, obj);
+ return ops->crvalidate(task, xdr);
}

+/**
+ * rpcauth_unwrap_resp_decode - Invoke XDR decode function
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
int
-rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp,
- __be32 *data, void *obj)
+rpcauth_unwrap_resp_decode(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
+
+ return decode(task->tk_rqstp, xdr, task->tk_msg.rpc_resp);
+}
+EXPORT_SYMBOL_GPL(rpcauth_unwrap_resp_decode);
+
+/**
+ * rpcauth_unwrap_resp - Invoke unwrap and decode function for the cred
+ * @task: controlling RPC task
+ * @xdr: stream where the Reply message resides
+ *
+ * Returns zero on success; otherwise a negative errno is returned.
+ */
+int
+rpcauth_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
+{
+ const struct rpc_credops *ops = task->tk_rqstp->rq_cred->cr_ops;

- if (cred->cr_ops->crunwrap_resp)
- return cred->cr_ops->crunwrap_resp(task, decode, rqstp,
- data, obj);
- /* By default, we decode the arguments normally. */
- return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
+ return ops->crunwrap_resp(task, xdr);
}

bool
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index b333b1b..206788e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1671,59 +1671,62 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
return 0;
}

-static __be32 *
-gss_validate(struct rpc_task *task, __be32 *p)
+static int
+gss_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_cred *cred = task->tk_rqstp->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *seq = NULL;
+ __be32 *p, *seq = NULL;
struct kvec iov;
struct xdr_buf verf_buf;
struct xdr_netobj mic;
- u32 flav,len;
- u32 maj_stat;
- __be32 *ret = ERR_PTR(-EIO);
+ u32 len, maj_stat;
+ int status;

- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ goto validate_failed;
+ if (*p++ != rpc_auth_gss)
+ goto validate_failed;
+ len = be32_to_cpup(p);
+ if (len > RPC_MAX_AUTH_SIZE)
+ goto validate_failed;
+ p = xdr_inline_decode(xdr, len);
+ if (!p)
+ goto validate_failed;

- flav = ntohl(*p++);
- if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
- goto out_bad;
- if (flav != RPC_AUTH_GSS)
- goto out_bad;
seq = kmalloc(4, GFP_NOFS);
if (!seq)
- goto out_bad;
- *seq = htonl(task->tk_rqstp->rq_seqno);
+ goto validate_failed;
+ *seq = cpu_to_be32(task->tk_rqstp->rq_seqno);
iov.iov_base = seq;
iov.iov_len = 4;
xdr_buf_from_iov(&iov, &verf_buf);
mic.data = (u8 *)p;
mic.len = len;
-
- ret = ERR_PTR(-EACCES);
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- if (maj_stat) {
- dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
- task->tk_pid, __func__, maj_stat);
- goto out_bad;
- }
+ if (maj_stat)
+ goto bad_mic;
+
/* We leave it to unwrap to calculate au_rslack. For now we just
* calculate the length of the verifier: */
cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
+ status = 0;
+out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
- task->tk_pid, __func__);
- kfree(seq);
- return p + XDR_QUADLEN(len);
-out_bad:
- gss_put_ctx(ctx);
- dprintk("RPC: %5u %s failed ret %ld.\n", task->tk_pid, __func__,
- PTR_ERR(ret));
kfree(seq);
- return ret;
+ return status;
+
+validate_failed:
+ status = -EIO;
+ goto out;
+bad_mic:
+ dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
+ task->tk_pid, __func__, maj_stat);
+ status = -EACCES;
+ goto out;
}

static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
@@ -1921,79 +1924,98 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
return status;
}

-static inline int
+static int
+gss_unwrap_resp_auth(struct rpc_cred *cred)
+{
+ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize;
+ return 0;
+}
+
+static int
gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, __be32 **p)
+ struct rpc_rqst *rqstp, struct xdr_stream *xdr)
{
- struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
- struct xdr_buf integ_buf;
+ struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
+ u32 data_offset, mic_offset, integ_len, maj_stat;
struct xdr_netobj mic;
- u32 data_offset, mic_offset;
- u32 integ_len;
- u32 maj_stat;
- int status = -EIO;
+ __be32 *p;

- integ_len = ntohl(*(*p)++);
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (unlikely(!p))
+ goto unwrap_failed;
+ integ_len = be32_to_cpup(p++);
if (integ_len & 3)
- return status;
- data_offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+ goto unwrap_failed;
+ data_offset = (u8 *)(p) - (u8 *)rcv_buf->head[0].iov_base;
mic_offset = integ_len + data_offset;
if (mic_offset > rcv_buf->len)
- return status;
- if (ntohl(*(*p)++) != rqstp->rq_seqno)
- return status;
-
- if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
- mic_offset - data_offset))
- return status;
+ goto unwrap_failed;
+ if (be32_to_cpup(p) != rqstp->rq_seqno)
+ goto unwrap_failed;

+ if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
+ goto unwrap_failed;
if (xdr_buf_read_netobj(rcv_buf, &mic, mic_offset))
- return status;
-
+ goto unwrap_failed;
maj_stat = gss_verify_mic(ctx->gc_gss_ctx, &integ_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
- return status;
+ goto bad_mic;
+
+ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
+ 1 + XDR_QUADLEN(mic.len);
return 0;
+unwrap_failed:
+ return -EIO;
+bad_mic:
+ dprintk("RPC: %s: gss_verify_mic returned error 0x%08x\n",
+ __func__, maj_stat);
+ return -EIO;
}

-static inline int
+static int
gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, __be32 **p)
-{
- struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
- u32 offset;
- u32 opaque_len;
- u32 maj_stat;
- int status = -EIO;
-
- opaque_len = ntohl(*(*p)++);
- offset = (u8 *)(*p) - (u8 *)rcv_buf->head[0].iov_base;
+ struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+{
+ struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
+ struct kvec *head = rqstp->rq_rcv_buf.head;
+ unsigned int savedlen = rcv_buf->len;
+ u32 offset, opaque_len, maj_stat;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (unlikely(!p))
+ goto unwrap_failed;
+ opaque_len = be32_to_cpup(p++);
+ offset = (u8 *)(p) - (u8 *)head->iov_base;
if (offset + opaque_len > rcv_buf->len)
- return status;
- /* remove padding: */
+ goto unwrap_failed;
rcv_buf->len = offset + opaque_len;

maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
if (maj_stat != GSS_S_COMPLETE)
- return status;
- if (ntohl(*(*p)++) != rqstp->rq_seqno)
- return status;
-
- return 0;
-}
+ goto bad_unwrap;
+ /* gss_unwrap decrypted the sequence number */
+ if (be32_to_cpup(p++) != rqstp->rq_seqno)
+ goto unwrap_failed;

-static int
-gss_unwrap_req_decode(kxdrdproc_t decode, struct rpc_rqst *rqstp,
- __be32 *p, void *obj)
-{
- struct xdr_stream xdr;
+ /* gss_unwrap redacts the opaque blob from the head iovec.
+ * rcv_buf has changed, thus the stream needs to be reset.
+ */
+ xdr_init_decode(xdr, rcv_buf, p, rqstp);

- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p, rqstp);
- return decode(rqstp, &xdr, obj);
+ cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
+ XDR_QUADLEN(savedlen - rcv_buf->len);
+ return 0;
+unwrap_failed:
+ return -EIO;
+bad_unwrap:
+ dprintk("RPC: %s: gss_unwrap returned error 0x%08x\n",
+ __func__, maj_stat);
+ return -EIO;
}

static bool
@@ -2037,39 +2059,33 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
}

static int
-gss_unwrap_resp(struct rpc_task *task,
- kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
+gss_unwrap_resp(struct rpc_task *task, struct xdr_stream *xdr)
{
- struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+ struct rpc_rqst *rqstp = task->tk_rqstp;
+ struct rpc_cred *cred = rqstp->rq_cred;
struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
gc_base);
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- __be32 *savedp = p;
- struct kvec *head = ((struct rpc_rqst *)rqstp)->rq_rcv_buf.head;
- int savedlen = head->iov_len;
- int status = -EIO;
+ int status = -EIO;

if (ctx->gc_proc != RPC_GSS_PROC_DATA)
goto out_decode;
switch (gss_cred->gc_service) {
case RPC_GSS_SVC_NONE:
+ status = gss_unwrap_resp_auth(cred);
break;
case RPC_GSS_SVC_INTEGRITY:
- status = gss_unwrap_resp_integ(cred, ctx, rqstp, &p);
- if (status)
- goto out;
+ status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
- status = gss_unwrap_resp_priv(cred, ctx, rqstp, &p);
- if (status)
- goto out;
+ status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr);
break;
}
- /* take into account extra slack for integrity and privacy cases: */
- cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + (p - savedp)
- + (savedlen - head->iov_len);
+ if (status)
+ goto out;
+
out_decode:
- status = gss_unwrap_req_decode(decode, rqstp, p, obj);
+ status = rpcauth_unwrap_resp_decode(task, xdr);
out:
gss_put_ctx(ctx);
dprintk("RPC: %5u %s returning %d\n",
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 797f8472..bf96975 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -86,25 +86,19 @@
return 0;
}

-static __be32 *
-nul_validate(struct rpc_task *task, __be32 *p)
+static int
+nul_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
- rpc_authflavor_t flavor;
- u32 size;
-
- flavor = ntohl(*p++);
- if (flavor != RPC_AUTH_NULL) {
- printk("RPC: bad verf flavor: %u\n", flavor);
- return ERR_PTR(-EIO);
- }
-
- size = ntohl(*p++);
- if (size != 0) {
- printk("RPC: bad verf size: %u\n", size);
- return ERR_PTR(-EIO);
- }
-
- return p;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ return -EIO;
+ if (*p++ != rpc_auth_null)
+ return -EIO;
+ if (*p != xdr_zero)
+ return -EIO;
+ return 0;
}

const struct rpc_authops authnull_ops = {
@@ -134,6 +128,7 @@ struct rpc_auth null_auth = {
.crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = nul_refresh,
.crvalidate = nul_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
};

static
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 1d5b7ed..5ea84a9 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -160,29 +160,32 @@
return 0;
}

-static __be32 *
-unx_validate(struct rpc_task *task, __be32 *p)
+static int
+unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
- rpc_authflavor_t flavor;
- u32 size;
-
- flavor = ntohl(*p++);
- if (flavor != RPC_AUTH_NULL &&
- flavor != RPC_AUTH_UNIX &&
- flavor != RPC_AUTH_SHORT) {
- printk("RPC: bad verf flavor: %u\n", flavor);
- return ERR_PTR(-EIO);
- }
+ __be32 *p;
+ u32 size;

- size = ntohl(*p++);
- if (size > RPC_MAX_AUTH_SIZE) {
- printk("RPC: giant verf size: %u\n", size);
- return ERR_PTR(-EIO);
+ p = xdr_inline_decode(xdr, 2 * sizeof(*p));
+ if (!p)
+ return -EIO;
+ switch (*p++) {
+ case rpc_auth_null:
+ case rpc_auth_unix:
+ case rpc_auth_short:
+ break;
+ default:
+ return -EIO;
}
- task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
- p += (size >> 2);
+ size = be32_to_cpup(p);
+ if (size > RPC_MAX_AUTH_SIZE)
+ return -EIO;
+ p = xdr_inline_decode(xdr, size);
+ if (!p)
+ return -EIO;

- return p;
+ task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
+ return 0;
}

int __init rpc_init_authunix(void)
@@ -223,4 +226,5 @@ struct rpc_auth unix_auth = {
.crwrap_req = rpcauth_wrap_req_encode,
.crrefresh = unx_refresh,
.crvalidate = unx_validate,
+ .crunwrap_resp = rpcauth_unwrap_resp_decode,
};
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index e973508..803e931 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -79,7 +79,8 @@

static int rpc_encode_header(struct rpc_task *task,
struct xdr_stream *xdr);
-static __be32 *rpc_decode_header(struct rpc_task *task);
+static int rpc_decode_header(struct rpc_task *task,
+ struct xdr_stream *xdr);
static int rpc_ping(struct rpc_clnt *clnt);

static void rpc_register_client(struct rpc_clnt *clnt)
@@ -2251,12 +2252,11 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
{
struct rpc_clnt *clnt = task->tk_client;
struct rpc_rqst *req = task->tk_rqstp;
- kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
- __be32 *p;
+ struct xdr_stream xdr;

dprint_status(task);

- if (!decode) {
+ if (!task->tk_msg.rpc_proc->p_decode) {
task->tk_action = rpc_exit_task;
return;
}
@@ -2292,29 +2292,27 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
goto out_retry;
}

- p = rpc_decode_header(task);
- if (IS_ERR(p)) {
- if (p == ERR_PTR(-EAGAIN))
- goto out_retry;
+ xdr_init_decode(&xdr, &req->rq_rcv_buf,
+ req->rq_rcv_buf.head[0].iov_base, req);
+ switch (rpc_decode_header(task, &xdr)) {
+ case 0:
+ task->tk_action = rpc_exit_task;
+ task->tk_status = rpcauth_unwrap_resp(task, &xdr);
+ dprintk("RPC: %5u %s result %d\n",
+ task->tk_pid, __func__, task->tk_status);
return;
- }
- task->tk_action = rpc_exit_task;
-
- task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
- task->tk_msg.rpc_resp);
-
- dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
- task->tk_status);
- return;
+ case -EAGAIN:
out_retry:
- task->tk_status = 0;
- /* Note: rpc_decode_header() may have freed the RPC slot */
- if (task->tk_rqstp == req) {
- xdr_free_bvec(&req->rq_rcv_buf);
- req->rq_reply_bytes_recvd = req->rq_rcv_buf.len = 0;
- if (task->tk_client->cl_discrtry)
- xprt_conditional_disconnect(req->rq_xprt,
- req->rq_connect_cookie);
+ task->tk_status = 0;
+ /* Note: rpc_decode_header() may have freed the RPC slot */
+ if (task->tk_rqstp == req) {
+ xdr_free_bvec(&req->rq_rcv_buf);
+ req->rq_reply_bytes_recvd = 0;
+ req->rq_rcv_buf.len = 0;
+ if (task->tk_client->cl_discrtry)
+ xprt_conditional_disconnect(req->rq_xprt,
+ req->rq_connect_cookie);
+ }
}
}

@@ -2347,14 +2345,12 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
return error;
}

-static noinline __be32 *
-rpc_decode_header(struct rpc_task *task)
+static noinline int
+rpc_decode_header(struct rpc_task *task, struct xdr_stream *xdr)
{
struct rpc_clnt *clnt = task->tk_client;
- struct kvec *iov = &task->tk_rqstp->rq_rcv_buf.head[0];
- int len = task->tk_rqstp->rq_rcv_buf.len >> 2;
- __be32 *p = iov->iov_base;
int error = -EACCES;
+ __be32 *p;

/* RFC-1014 says that the representation of XDR data must be a
* multiple of four bytes
@@ -2363,25 +2359,26 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
*/
if (task->tk_rqstp->rq_rcv_buf.len & 3)
goto out_badlen;
- if ((len -= 3) < 0)
- goto out_unparsable;

+ p = xdr_inline_decode(xdr, 3 * sizeof(*p));
+ if (!p)
+ goto out_unparsable;
p++; /* skip XID */
if (*p++ != rpc_reply)
goto out_unparsable;
if (*p++ != rpc_msg_accepted)
goto out_msg_denied;

- p = rpcauth_checkverf(task, p);
- if (IS_ERR(p))
+ error = rpcauth_checkverf(task, xdr);
+ if (error)
goto out_verifier;

- len = p - (__be32 *)iov->iov_base - 1;
- if (len < 0)
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
goto out_unparsable;
- switch (*p++) {
+ switch (*p) {
case rpc_success:
- return p;
+ return 0;
case rpc_prog_unavail:
trace_rpc__prog_unavail(task);
error = -EPFNOSUPPORT;
@@ -2406,11 +2403,11 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
if (task->tk_garb_retry) {
task->tk_garb_retry--;
task->tk_action = call_encode;
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
}
out_err:
rpc_exit(task, error);
- return ERR_PTR(error);
+ return error;

out_badlen:
trace_rpc__unparsable(task);
@@ -2424,10 +2421,12 @@ void rpc_force_rebind(struct rpc_clnt *clnt)

out_verifier:
trace_rpc_bad_verifier(task);
- error = PTR_ERR(p);
goto out_garbage;

out_msg_denied:
+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
+ goto out_unparsable;
switch (*p++) {
case rpc_auth_error:
break;
@@ -2441,6 +2440,9 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
goto out_err;
}

+ p = xdr_inline_decode(xdr, sizeof(*p));
+ if (!p)
+ goto out_unparsable;
switch (*p++) {
case rpc_autherr_rejectedcred:
case rpc_autherr_rejectedverf:
@@ -2454,7 +2456,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
/* Ensure we obtain a new XID! */
xprt_release(task);
task->tk_action = call_reserve;
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
case rpc_autherr_badcred:
case rpc_autherr_badverf:
/* possibly garbled cred/verf? */
@@ -2463,7 +2465,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
task->tk_garb_retry--;
trace_rpc__bad_creds(task);
task->tk_action = call_encode;
- return ERR_PTR(-EAGAIN);
+ return -EAGAIN;
case rpc_autherr_tooweak:
trace_rpc__auth_tooweak(task);
pr_warn("RPC: server %s requires stronger authentication.\n",


2019-02-11 16:25:09

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 16/23] SUNRPC: Introduce trace points in rpc_auth_gss.ko

Add infrastructure for trace points in the RPC_AUTH_GSS kernel
module, and add a few sample trace points. These report exceptional
or unexpected events, and observe the assignment of GSS sequence
numbers.

Signed-off-by: Chuck Lever <[email protected]>
---
include/trace/events/rpcgss.h | 361 ++++++++++++++++++++++++++++++++++++++++
include/trace/events/rpcrdma.h | 12 +
include/trace/events/sunrpc.h | 61 +++++++
net/sunrpc/auth_gss/Makefile | 2
net/sunrpc/auth_gss/auth_gss.c | 165 +++++++++---------
net/sunrpc/auth_gss/trace.c | 11 +
net/sunrpc/xprt.c | 10 +
7 files changed, 530 insertions(+), 92 deletions(-)
create mode 100644 include/trace/events/rpcgss.h
create mode 100644 net/sunrpc/auth_gss/trace.c

diff --git a/include/trace/events/rpcgss.h b/include/trace/events/rpcgss.h
new file mode 100644
index 0000000..d1f7fe1
--- /dev/null
+++ b/include/trace/events/rpcgss.h
@@ -0,0 +1,361 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2018 Oracle. All rights reserved.
+ *
+ * Trace point definitions for the "rpcgss" subsystem.
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM rpcgss
+
+#if !defined(_TRACE_RPCRDMA_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_RPCGSS_H
+
+#include <linux/tracepoint.h>
+
+/**
+ ** GSS-API related trace events
+ **/
+
+TRACE_DEFINE_ENUM(GSS_S_BAD_MECH);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAME);
+TRACE_DEFINE_ENUM(GSS_S_BAD_NAMETYPE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_BINDINGS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_STATUS);
+TRACE_DEFINE_ENUM(GSS_S_BAD_SIG);
+TRACE_DEFINE_ENUM(GSS_S_NO_CRED);
+TRACE_DEFINE_ENUM(GSS_S_NO_CONTEXT);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_DEFECTIVE_CREDENTIAL);
+TRACE_DEFINE_ENUM(GSS_S_CREDENTIALS_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_CONTEXT_EXPIRED);
+TRACE_DEFINE_ENUM(GSS_S_FAILURE);
+TRACE_DEFINE_ENUM(GSS_S_BAD_QOP);
+TRACE_DEFINE_ENUM(GSS_S_UNAUTHORIZED);
+TRACE_DEFINE_ENUM(GSS_S_UNAVAILABLE);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_ELEMENT);
+TRACE_DEFINE_ENUM(GSS_S_NAME_NOT_MN);
+TRACE_DEFINE_ENUM(GSS_S_CONTINUE_NEEDED);
+TRACE_DEFINE_ENUM(GSS_S_DUPLICATE_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_OLD_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_UNSEQ_TOKEN);
+TRACE_DEFINE_ENUM(GSS_S_GAP_TOKEN);
+
+#define show_gss_status(x) \
+ __print_flags(x, "|", \
+ { GSS_S_BAD_MECH, "GSS_S_BAD_MECH" }, \
+ { GSS_S_BAD_NAME, "GSS_S_BAD_NAME" }, \
+ { GSS_S_BAD_NAMETYPE, "GSS_S_BAD_NAMETYPE" }, \
+ { GSS_S_BAD_BINDINGS, "GSS_S_BAD_BINDINGS" }, \
+ { GSS_S_BAD_STATUS, "GSS_S_BAD_STATUS" }, \
+ { GSS_S_BAD_SIG, "GSS_S_BAD_SIG" }, \
+ { GSS_S_NO_CRED, "GSS_S_NO_CRED" }, \
+ { GSS_S_NO_CONTEXT, "GSS_S_NO_CONTEXT" }, \
+ { GSS_S_DEFECTIVE_TOKEN, "GSS_S_DEFECTIVE_TOKEN" }, \
+ { GSS_S_DEFECTIVE_CREDENTIAL, "GSS_S_DEFECTIVE_CREDENTIAL" }, \
+ { GSS_S_CREDENTIALS_EXPIRED, "GSS_S_CREDENTIALS_EXPIRED" }, \
+ { GSS_S_CONTEXT_EXPIRED, "GSS_S_CONTEXT_EXPIRED" }, \
+ { GSS_S_FAILURE, "GSS_S_FAILURE" }, \
+ { GSS_S_BAD_QOP, "GSS_S_BAD_QOP" }, \
+ { GSS_S_UNAUTHORIZED, "GSS_S_UNAUTHORIZED" }, \
+ { GSS_S_UNAVAILABLE, "GSS_S_UNAVAILABLE" }, \
+ { GSS_S_DUPLICATE_ELEMENT, "GSS_S_DUPLICATE_ELEMENT" }, \
+ { GSS_S_NAME_NOT_MN, "GSS_S_NAME_NOT_MN" }, \
+ { GSS_S_CONTINUE_NEEDED, "GSS_S_CONTINUE_NEEDED" }, \
+ { GSS_S_DUPLICATE_TOKEN, "GSS_S_DUPLICATE_TOKEN" }, \
+ { GSS_S_OLD_TOKEN, "GSS_S_OLD_TOKEN" }, \
+ { GSS_S_UNSEQ_TOKEN, "GSS_S_UNSEQ_TOKEN" }, \
+ { GSS_S_GAP_TOKEN, "GSS_S_GAP_TOKEN" })
+
+
+DECLARE_EVENT_CLASS(rpcgss_gssapi_event,
+ TP_PROTO(
+ const struct rpc_task *task,
+ u32 maj_stat
+ ),
+
+ TP_ARGS(task, maj_stat),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, maj_stat)
+
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->maj_stat = maj_stat;
+ ),
+
+ TP_printk("task:%u@%u maj_stat=%s",
+ __entry->task_id, __entry->client_id,
+ __entry->maj_stat == 0 ?
+ "GSS_S_COMPLETE" : show_gss_status(__entry->maj_stat))
+);
+
+#define DEFINE_GSSAPI_EVENT(name) \
+ DEFINE_EVENT(rpcgss_gssapi_event, rpcgss_##name, \
+ TP_PROTO( \
+ const struct rpc_task *task, \
+ u32 maj_stat \
+ ), \
+ TP_ARGS(task, maj_stat))
+
+TRACE_EVENT(rpcgss_import_ctx,
+ TP_PROTO(
+ int status
+ ),
+
+ TP_ARGS(status),
+
+ TP_STRUCT__entry(
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->status = status;
+ ),
+
+ TP_printk("status=%d", __entry->status)
+);
+
+DEFINE_GSSAPI_EVENT(get_mic);
+DEFINE_GSSAPI_EVENT(verify_mic);
+DEFINE_GSSAPI_EVENT(wrap);
+DEFINE_GSSAPI_EVENT(unwrap);
+
+
+/**
+ ** GSS auth unwrap failures
+ **/
+
+TRACE_EVENT(rpcgss_unwrap_failed,
+ TP_PROTO(
+ const struct rpc_task *task
+ ),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ ),
+
+ TP_printk("task:%u@%u", __entry->task_id, __entry->client_id)
+);
+
+TRACE_EVENT(rpcgss_bad_seqno,
+ TP_PROTO(
+ const struct rpc_task *task,
+ u32 expected,
+ u32 received
+ ),
+
+ TP_ARGS(task, expected, received),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, expected)
+ __field(u32, received)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->expected = expected;
+ __entry->received = received;
+ ),
+
+ TP_printk("task:%u@%u expected seqno %u, received seqno %u",
+ __entry->task_id, __entry->client_id,
+ __entry->expected, __entry->received)
+);
+
+TRACE_EVENT(rpcgss_seqno,
+ TP_PROTO(
+ const struct rpc_task *task
+ ),
+
+ TP_ARGS(task),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seqno)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_rqst *rqst = task->tk_rqstp;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->seqno = rqst->rq_seqno;
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x seqno=%u",
+ __entry->task_id, __entry->client_id,
+ __entry->xid, __entry->seqno)
+);
+
+TRACE_EVENT(rpcgss_need_reencode,
+ TP_PROTO(
+ const struct rpc_task *task,
+ u32 seq_xmit,
+ bool ret
+ ),
+
+ TP_ARGS(task, seq_xmit, ret),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seq_xmit)
+ __field(u32, seqno)
+ __field(bool, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+ __entry->seq_xmit = seq_xmit;
+ __entry->seqno = task->tk_rqstp->rq_seqno;
+ __entry->ret = ret;
+ ),
+
+ TP_printk("task:%u@%u xid=0x%08x rq_seqno=%u seq_xmit=%u reencode %sneeded",
+ __entry->task_id, __entry->client_id,
+ __entry->xid, __entry->seqno, __entry->seq_xmit,
+ __entry->ret ? "" : "un")
+);
+
+/**
+ ** gssd upcall related trace events
+ **/
+
+TRACE_EVENT(rpcgss_upcall_msg,
+ TP_PROTO(
+ const char *buf
+ ),
+
+ TP_ARGS(buf),
+
+ TP_STRUCT__entry(
+ __string(msg, buf)
+ ),
+
+ TP_fast_assign(
+ __assign_str(msg, buf)
+ ),
+
+ TP_printk("msg='%s'", __get_str(msg))
+);
+
+TRACE_EVENT(rpcgss_upcall_result,
+ TP_PROTO(
+ u32 uid,
+ int result
+ ),
+
+ TP_ARGS(uid, result),
+
+ TP_STRUCT__entry(
+ __field(u32, uid)
+ __field(int, result)
+
+ ),
+
+ TP_fast_assign(
+ __entry->uid = uid;
+ __entry->result = result;
+ ),
+
+ TP_printk("for uid %u, result=%d", __entry->uid, __entry->result)
+);
+
+TRACE_EVENT(rpcgss_context,
+ TP_PROTO(
+ unsigned long expiry,
+ unsigned long now,
+ unsigned int timeout,
+ unsigned int len,
+ const u8 *data
+ ),
+
+ TP_ARGS(expiry, now, timeout, len, data),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, expiry)
+ __field(unsigned long, now)
+ __field(unsigned int, timeout)
+ __field(int, len)
+ __string(acceptor, data)
+ ),
+
+ TP_fast_assign(
+ __entry->expiry = expiry;
+ __entry->now = now;
+ __entry->timeout = timeout;
+ __entry->len = len;
+ strncpy(__get_str(acceptor), data, len);
+ ),
+
+ TP_printk("gc_expiry=%lu now=%lu timeout=%u acceptor=%.*s",
+ __entry->expiry, __entry->now, __entry->timeout,
+ __entry->len, __get_str(acceptor))
+);
+
+
+/**
+ ** Miscellaneous events
+ */
+
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5I);
+TRACE_DEFINE_ENUM(RPC_AUTH_GSS_KRB5P);
+
+#define show_pseudoflavor(x) \
+ __print_symbolic(x, \
+ { RPC_AUTH_GSS_KRB5, "RPC_AUTH_GSS_KRB5" }, \
+ { RPC_AUTH_GSS_KRB5I, "RPC_AUTH_GSS_KRB5I" }, \
+ { RPC_AUTH_GSS_KRB5P, "RPC_AUTH_GSS_KRB5P" })
+
+
+TRACE_EVENT(rpcgss_createauth,
+ TP_PROTO(
+ unsigned int flavor,
+ int error
+ ),
+
+ TP_ARGS(flavor, error),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, flavor)
+ __field(int, error)
+
+ ),
+
+ TP_fast_assign(
+ __entry->flavor = flavor;
+ __entry->error = error;
+ ),
+
+ TP_printk("flavor=%s error=%d",
+ show_pseudoflavor(__entry->flavor), __entry->error)
+);
+
+
+#endif /* _TRACE_RPCGSS_H */
+
+#include <trace/define_trace.h>
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index 399b1ae..962975b 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -521,12 +521,18 @@

TP_STRUCT__entry(
__field(const void *, req)
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
__field(int, num_sge)
__field(int, signaled)
__field(int, status)
),

TP_fast_assign(
+ const struct rpc_rqst *rqst = &req->rl_slot;
+
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
__entry->req = req;
__entry->num_sge = req->rl_sendctx->sc_wr.num_sge;
__entry->signaled = req->rl_sendctx->sc_wr.send_flags &
@@ -534,9 +540,11 @@
__entry->status = status;
),

- TP_printk("req=%p, %d SGEs%s, status=%d",
+ TP_printk("task:%u@%u req=%p (%d SGE%s) %sstatus=%d",
+ __entry->task_id, __entry->client_id,
__entry->req, __entry->num_sge,
- (__entry->signaled ? ", signaled" : ""),
+ (__entry->num_sge == 1 ? "" : "s"),
+ (__entry->signaled ? "signaled " : ""),
__entry->status
)
);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 0654e9c..e58dda8 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -655,9 +655,68 @@

DEFINE_RPC_XPRT_EVENT(timer);
DEFINE_RPC_XPRT_EVENT(lookup_rqst);
-DEFINE_RPC_XPRT_EVENT(transmit);
DEFINE_RPC_XPRT_EVENT(complete_rqst);

+TRACE_EVENT(xprt_transmit,
+ TP_PROTO(
+ const struct rpc_rqst *rqst,
+ int status
+ ),
+
+ TP_ARGS(rqst, status),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seqno)
+ __field(int, status)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = rqst->rq_task->tk_pid;
+ __entry->client_id = rqst->rq_task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->seqno = rqst->rq_seqno;
+ __entry->status = status;
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x seqno=%u status=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->seqno, __entry->status)
+);
+
+TRACE_EVENT(xprt_enq_xmit,
+ TP_PROTO(
+ const struct rpc_task *task,
+ int stage
+ ),
+
+ TP_ARGS(task, stage),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, seqno)
+ __field(int, stage)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+ __entry->seqno = task->tk_rqstp->rq_seqno;
+ __entry->stage = stage;
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x seqno=%u stage=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->seqno, __entry->stage)
+);
+
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),

diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index c374268..4a29f4c 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -7,7 +7,7 @@ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o

auth_rpcgss-y := auth_gss.o gss_generic_token.o \
gss_mech_switch.o svcauth_gss.o \
- gss_rpc_upcall.o gss_rpc_xdr.o
+ gss_rpc_upcall.o gss_rpc_xdr.o trace.o

obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 206788e..3d1fbd6 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -55,6 +55,8 @@

#include "../netns.h"

+#include <trace/events/rpcgss.h>
+
static const struct rpc_authops authgss_ops;

static const struct rpc_credops gss_credops;
@@ -260,6 +262,7 @@ struct gss_auth {
}
ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
if (ret < 0) {
+ trace_rpcgss_import_ctx(ret);
p = ERR_PTR(ret);
goto err;
}
@@ -275,12 +278,9 @@ struct gss_auth {
if (IS_ERR(p))
goto err;
done:
- dprintk("RPC: %s Success. gc_expiry %lu now %lu timeout %u acceptor %.*s\n",
- __func__, ctx->gc_expiry, now, timeout, ctx->gc_acceptor.len,
- ctx->gc_acceptor.data);
- return p;
+ trace_rpcgss_context(ctx->gc_expiry, now, timeout,
+ ctx->gc_acceptor.len, ctx->gc_acceptor.data);
err:
- dprintk("RPC: %s returns error %ld\n", __func__, -PTR_ERR(p));
return p;
}

@@ -354,10 +354,8 @@ static void put_pipe_version(struct net *net)
if (auth && pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
- dprintk("RPC: %s found msg %p\n", __func__, pos);
return pos;
}
- dprintk("RPC: %s found nothing\n", __func__);
return NULL;
}

@@ -456,7 +454,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
size_t buflen = sizeof(gss_msg->databuf);
int len;

- len = scnprintf(p, buflen, "mech=%s uid=%d ", mech->gm_name,
+ len = scnprintf(p, buflen, "mech=%s uid=%d", mech->gm_name,
from_kuid(&init_user_ns, gss_msg->uid));
buflen -= len;
p += len;
@@ -467,7 +465,7 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
* identity that we are authenticating to.
*/
if (target_name) {
- len = scnprintf(p, buflen, "target=%s ", target_name);
+ len = scnprintf(p, buflen, " target=%s", target_name);
buflen -= len;
p += len;
gss_msg->msg.len += len;
@@ -487,11 +485,11 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
char *c = strchr(service_name, '@');

if (!c)
- len = scnprintf(p, buflen, "service=%s ",
+ len = scnprintf(p, buflen, " service=%s",
service_name);
else
len = scnprintf(p, buflen,
- "service=%.*s srchost=%s ",
+ " service=%.*s srchost=%s",
(int)(c - service_name),
service_name, c + 1);
buflen -= len;
@@ -500,17 +498,17 @@ static int gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
}

if (mech->gm_upcall_enctypes) {
- len = scnprintf(p, buflen, "enctypes=%s ",
+ len = scnprintf(p, buflen, " enctypes=%s",
mech->gm_upcall_enctypes);
buflen -= len;
p += len;
gss_msg->msg.len += len;
}
+ trace_rpcgss_upcall_msg(gss_msg->databuf);
len = scnprintf(p, buflen, "\n");
if (len == 0)
goto out_overflow;
gss_msg->msg.len += len;
-
gss_msg->msg.data = gss_msg->databuf;
return 0;
out_overflow:
@@ -603,8 +601,6 @@ static void warn_gssd(void)
struct rpc_pipe *pipe;
int err = 0;

- dprintk("RPC: %5u %s for uid %u\n",
- task->tk_pid, __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
/* XXX: warning on the first, under the assumption we
@@ -612,7 +608,8 @@ static void warn_gssd(void)
warn_gssd();
task->tk_timeout = 15*HZ;
rpc_sleep_on(&pipe_version_rpc_waitqueue, task, NULL);
- return -EAGAIN;
+ err = -EAGAIN;
+ goto out;
}
if (IS_ERR(gss_msg)) {
err = PTR_ERR(gss_msg);
@@ -635,9 +632,8 @@ static void warn_gssd(void)
spin_unlock(&pipe->lock);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %5u %s for uid %u result %d\n",
- task->tk_pid, __func__,
- from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+ trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+ cred->cr_cred->fsuid), err);
return err;
}

@@ -652,14 +648,13 @@ static void warn_gssd(void)
DEFINE_WAIT(wait);
int err;

- dprintk("RPC: %s for uid %u\n",
- __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid));
retry:
err = 0;
/* if gssd is down, just skip upcalling altogether */
if (!gssd_running(net)) {
warn_gssd();
- return -EACCES;
+ err = -EACCES;
+ goto out;
}
gss_msg = gss_setup_upcall(gss_auth, cred);
if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -700,8 +695,8 @@ static void warn_gssd(void)
finish_wait(&gss_msg->waitqueue, &wait);
gss_release_msg(gss_msg);
out:
- dprintk("RPC: %s for uid %u result %d\n",
- __func__, from_kuid(&init_user_ns, cred->cr_cred->fsuid), err);
+ trace_rpcgss_upcall_result(from_kuid(&init_user_ns,
+ cred->cr_cred->fsuid), err);
return err;
}

@@ -794,7 +789,6 @@ static void warn_gssd(void)
err:
kfree(buf);
out:
- dprintk("RPC: %s returning %zd\n", __func__, err);
return err;
}

@@ -863,8 +857,6 @@ static int gss_pipe_open_v1(struct inode *inode)
struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);

if (msg->errno < 0) {
- dprintk("RPC: %s releasing msg %p\n",
- __func__, gss_msg);
refcount_inc(&gss_msg->count);
gss_unhash_msg(gss_msg);
if (msg->errno == -ETIMEDOUT)
@@ -1024,8 +1016,6 @@ static void gss_pipe_free(struct gss_pipe *p)
struct rpc_auth * auth;
int err = -ENOMEM; /* XXX? */

- dprintk("RPC: creating GSS authenticator for client %p\n", clnt);
-
if (!try_module_get(THIS_MODULE))
return ERR_PTR(err);
if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
@@ -1041,10 +1031,8 @@ static void gss_pipe_free(struct gss_pipe *p)
gss_auth->net = get_net(rpc_net_ns(clnt));
err = -EINVAL;
gss_auth->mech = gss_mech_get_by_pseudoflavor(flavor);
- if (!gss_auth->mech) {
- dprintk("RPC: Pseudoflavor %d not found!\n", flavor);
+ if (!gss_auth->mech)
goto err_put_net;
- }
gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor);
if (gss_auth->service == 0)
goto err_put_mech;
@@ -1099,6 +1087,7 @@ static void gss_pipe_free(struct gss_pipe *p)
kfree(gss_auth);
out_dec:
module_put(THIS_MODULE);
+ trace_rpcgss_createauth(flavor, err);
return ERR_PTR(err);
}

@@ -1135,9 +1124,6 @@ static void gss_pipe_free(struct gss_pipe *p)
struct gss_auth *gss_auth = container_of(auth,
struct gss_auth, rpc_auth);

- dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
- auth, auth->au_flavor);
-
if (hash_hashed(&gss_auth->hash)) {
spin_lock(&gss_auth_hash_lock);
hash_del(&gss_auth->hash);
@@ -1300,8 +1286,6 @@ static void gss_pipe_free(struct gss_pipe *p)
static void
gss_do_free_ctx(struct gss_cl_ctx *ctx)
{
- dprintk("RPC: %s\n", __func__);
-
gss_delete_sec_context(&ctx->gc_gss_ctx);
kfree(ctx->gc_wire_ctx.data);
kfree(ctx->gc_acceptor.data);
@@ -1324,7 +1308,6 @@ static void gss_pipe_free(struct gss_pipe *p)
static void
gss_free_cred(struct gss_cred *gss_cred)
{
- dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
kfree(gss_cred);
}

@@ -1381,10 +1364,6 @@ static void gss_pipe_free(struct gss_pipe *p)
struct gss_cred *cred = NULL;
int err = -ENOMEM;

- dprintk("RPC: %s for uid %d, flavor %d\n",
- __func__, from_kuid(&init_user_ns, acred->cred->fsuid),
- auth->au_flavor);
-
if (!(cred = kzalloc(sizeof(*cred), gfp)))
goto out_err;

@@ -1400,7 +1379,6 @@ static void gss_pipe_free(struct gss_pipe *p)
return &cred->gc_base;

out_err:
- dprintk("RPC: %s failed with error %d\n", __func__, err);
return ERR_PTR(err);
}

@@ -1544,15 +1522,14 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
struct xdr_netobj mic;
struct kvec iov;
struct xdr_buf verf_buf;
-
- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
+ int status;

/* Credential */

p = xdr_reserve_space(xdr, 7 * sizeof(*p) +
ctx->gc_wire_ctx.len);
if (!p)
- goto out_put_ctx;
+ goto marshal_failed;
*p++ = rpc_auth_gss;
cred_len = p++;

@@ -1560,7 +1537,8 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)
req->rq_seqno = (ctx->gc_seq < MAXSEQ) ? ctx->gc_seq++ : MAXSEQ;
spin_unlock(&ctx->gc_seq_lock);
if (req->rq_seqno == MAXSEQ)
- goto out_expired;
+ goto expired;
+ trace_rpcgss_seqno(task);

*p++ = cpu_to_be32(RPC_GSS_VERSION);
*p++ = cpu_to_be32(ctx->gc_proc);
@@ -1579,25 +1557,31 @@ static int gss_marshal(struct rpc_task *task, struct xdr_stream *xdr)

p = xdr_reserve_space(xdr, sizeof(*p));
if (!p)
- goto out_put_ctx;
+ goto marshal_failed;
*p++ = rpc_auth_gss;
mic.data = (u8 *)(p + 1);
maj_stat = gss_get_mic(ctx->gc_gss_ctx, &verf_buf, &mic);
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
- goto out_expired;
+ goto expired;
else if (maj_stat != 0)
- goto out_put_ctx;
+ goto bad_mic;
if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
- goto out_put_ctx;
- gss_put_ctx(ctx);
- return 0;
-out_expired:
+ goto marshal_failed;
+ status = 0;
+out:
gss_put_ctx(ctx);
+ return status;
+expired:
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
- return -EKEYEXPIRED;
-out_put_ctx:
- gss_put_ctx(ctx);
- return -EMSGSIZE;
+ status = -EKEYEXPIRED;
+ goto out;
+marshal_failed:
+ status = -EMSGSIZE;
+ goto out;
+bad_mic:
+ trace_rpcgss_get_mic(task, maj_stat);
+ status = -EIO;
+ goto out;
}

static int gss_renew_cred(struct rpc_task *task)
@@ -1723,8 +1707,7 @@ static int gss_cred_is_negative_entry(struct rpc_cred *cred)
status = -EIO;
goto out;
bad_mic:
- dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
- task->tk_pid, __func__, maj_stat);
+ trace_rpcgss_verify_mic(task, maj_stat);
status = -EACCES;
goto out;
}
@@ -1761,13 +1744,16 @@ static int gss_wrap_req_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- goto wrap_failed;
+ goto bad_mic;
/* Check that the trailing MIC fit in the buffer, after the fact */
if (xdr_stream_encode_opaque_inline(xdr, (void **)&p, mic.len) < 0)
goto wrap_failed;
return 0;
wrap_failed:
return -EMSGSIZE;
+bad_mic:
+ trace_rpcgss_get_mic(task, maj_stat);
+ return -EIO;
}

static void
@@ -1860,7 +1846,6 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
memcpy(tmp, snd_buf->tail[0].iov_base, snd_buf->tail[0].iov_len);
snd_buf->tail[0].iov_base = tmp;
}
- status = -EIO;
offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
maj_stat = gss_wrap(ctx->gc_gss_ctx, offset, snd_buf, inpages);
/* slack space should prevent this ever happening: */
@@ -1871,7 +1856,7 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
if (maj_stat == GSS_S_CONTEXT_EXPIRED)
clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
else if (maj_stat)
- goto wrap_failed;
+ goto bad_wrap;

*opaque_len = cpu_to_be32(snd_buf->len - offset);
/* guess whether the pad goes into the head or the tail: */
@@ -1888,6 +1873,9 @@ static int gss_wrap_req_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
return 0;
wrap_failed:
return status;
+bad_wrap:
+ trace_rpcgss_wrap(task, maj_stat);
+ return -EIO;
}

static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
@@ -1898,7 +1886,6 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
int status;

- dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
status = -EIO;
if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
/* The spec seems a little ambiguous here, but I think that not
@@ -1917,10 +1904,11 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
case RPC_GSS_SVC_PRIVACY:
status = gss_wrap_req_priv(cred, ctx, task, xdr);
break;
+ default:
+ status = -EIO;
}
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
return status;
}

@@ -1932,8 +1920,9 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
}

static int
-gss_unwrap_resp_integ(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+gss_unwrap_resp_integ(struct rpc_task *task, struct rpc_cred *cred,
+ struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr)
{
struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
u32 data_offset, mic_offset, integ_len, maj_stat;
@@ -1951,7 +1940,7 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
if (mic_offset > rcv_buf->len)
goto unwrap_failed;
if (be32_to_cpup(p) != rqstp->rq_seqno)
- goto unwrap_failed;
+ goto bad_seqno;

if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, integ_len))
goto unwrap_failed;
@@ -1967,16 +1956,20 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
1 + XDR_QUADLEN(mic.len);
return 0;
unwrap_failed:
+ trace_rpcgss_unwrap_failed(task);
+ return -EIO;
+bad_seqno:
+ trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(p));
return -EIO;
bad_mic:
- dprintk("RPC: %s: gss_verify_mic returned error 0x%08x\n",
- __func__, maj_stat);
+ trace_rpcgss_verify_mic(task, maj_stat);
return -EIO;
}

static int
-gss_unwrap_resp_priv(struct rpc_cred *cred, struct gss_cl_ctx *ctx,
- struct rpc_rqst *rqstp, struct xdr_stream *xdr)
+gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred,
+ struct gss_cl_ctx *ctx, struct rpc_rqst *rqstp,
+ struct xdr_stream *xdr)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
@@ -2000,7 +1993,7 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
goto bad_unwrap;
/* gss_unwrap decrypted the sequence number */
if (be32_to_cpup(p++) != rqstp->rq_seqno)
- goto unwrap_failed;
+ goto bad_seqno;

/* gss_unwrap redacts the opaque blob from the head iovec.
* rcv_buf has changed, thus the stream needs to be reset.
@@ -2011,10 +2004,13 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
XDR_QUADLEN(savedlen - rcv_buf->len);
return 0;
unwrap_failed:
+ trace_rpcgss_unwrap_failed(task);
+ return -EIO;
+bad_seqno:
+ trace_rpcgss_bad_seqno(task, rqstp->rq_seqno, be32_to_cpup(--p));
return -EIO;
bad_unwrap:
- dprintk("RPC: %s: gss_unwrap returned error 0x%08x\n",
- __func__, maj_stat);
+ trace_rpcgss_unwrap(task, maj_stat);
return -EIO;
}

@@ -2030,14 +2026,14 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_cred *cred = req->rq_cred;
struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
- u32 win, seq_xmit;
+ u32 win, seq_xmit = 0;
bool ret = true;

if (!ctx)
- return true;
+ goto out;

if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
- goto out;
+ goto out_ctx;

seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
@@ -2046,15 +2042,18 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
if (seq_xmit == tmp) {
ret = false;
- goto out;
+ goto out_ctx;
}
}

win = ctx->gc_win;
if (win > 0)
ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
-out:
+
+out_ctx:
gss_put_ctx(ctx);
+out:
+ trace_rpcgss_need_reencode(task, seq_xmit, ret);
return ret;
}

@@ -2075,10 +2074,10 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
status = gss_unwrap_resp_auth(cred);
break;
case RPC_GSS_SVC_INTEGRITY:
- status = gss_unwrap_resp_integ(cred, ctx, rqstp, xdr);
+ status = gss_unwrap_resp_integ(task, cred, ctx, rqstp, xdr);
break;
case RPC_GSS_SVC_PRIVACY:
- status = gss_unwrap_resp_priv(cred, ctx, rqstp, xdr);
+ status = gss_unwrap_resp_priv(task, cred, ctx, rqstp, xdr);
break;
}
if (status)
@@ -2088,8 +2087,6 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
status = rpcauth_unwrap_resp_decode(task, xdr);
out:
gss_put_ctx(ctx);
- dprintk("RPC: %5u %s returning %d\n",
- task->tk_pid, __func__, status);
return status;
}

diff --git a/net/sunrpc/auth_gss/trace.c b/net/sunrpc/auth_gss/trace.c
new file mode 100644
index 0000000..5576f1e
--- /dev/null
+++ b/net/sunrpc/auth_gss/trace.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018, 2019 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/gss_err.h>
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/rpcgss.h>
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index f1ec211..bc7489f 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1165,6 +1165,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 1);
goto out;
}
} else if (RPC_IS_SWAPPER(task)) {
@@ -1176,6 +1177,7 @@ void xprt_request_wait_receive(struct rpc_task *task)
/* Note: req is added _before_ pos */
list_add_tail(&req->rq_xmit, &pos->rq_xmit);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 2);
goto out;
}
} else if (!req->rq_seqno) {
@@ -1184,11 +1186,13 @@ void xprt_request_wait_receive(struct rpc_task *task)
continue;
list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
INIT_LIST_HEAD(&req->rq_xmit);
+ trace_xprt_enq_xmit(task, 3);
goto out;
}
}
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
+ trace_xprt_enq_xmit(task, 4);
out:
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
@@ -1313,8 +1317,6 @@ void xprt_end_transmit(struct rpc_task *task)
int is_retrans = RPC_WAS_SENT(task);
int status;

- dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
-
if (!req->rq_bytes_sent) {
if (xprt_request_data_received(task)) {
status = 0;
@@ -1336,9 +1338,9 @@ void xprt_end_transmit(struct rpc_task *task)

connect_cookie = xprt->connect_cookie;
status = xprt->ops->send_request(req);
- trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
req->rq_ntrans--;
+ trace_xprt_transmit(req, status);
return status;
}

@@ -1347,7 +1349,6 @@ void xprt_end_transmit(struct rpc_task *task)

xprt_inject_disconnect(xprt);

- dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
spin_lock_bh(&xprt->transport_lock);

@@ -1360,6 +1361,7 @@ void xprt_end_transmit(struct rpc_task *task)

req->rq_connect_cookie = connect_cookie;
out_dequeue:
+ trace_xprt_transmit(req, status);
xprt_request_dequeue_transmit(task);
rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
return status;


2019-02-11 16:25:13

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 17/23] SUNRPC: Remove xdr_buf_trim()

The key action of xdr_buf_trim() is that it shortens buf->len, the
length of the xdr_buf's content. The other actions -- shortening the
head, pages, and tail components -- are actually not necessary. In
particular, changing the size of those components can corrupt the
RPC message contained in the buffer. This is an accident waiting to
happen rather than a current bug, as far as we know.

Signed-off-by: Chuck Lever <[email protected]>
Acked-by: Bruce Fields <[email protected]>
---
include/linux/sunrpc/xdr.h | 1 -
net/sunrpc/auth_gss/gss_krb5_wrap.c | 8 ++++---
net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
net/sunrpc/xdr.c | 41 -----------------------------------
4 files changed, 6 insertions(+), 46 deletions(-)

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 65af6a2..9ee3970 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -179,7 +179,6 @@ static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int le
extern void xdr_shift_buf(struct xdr_buf *, size_t);
extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
-extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 5cdde6c..14a0aff 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -570,14 +570,16 @@ static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
*/
movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
- BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
- buf->head[0].iov_len);
+ if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+ buf->head[0].iov_len)
+ return GSS_S_FAILURE;
memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;

/* Trim off the trailing "extra count" and checksum blob */
- xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
+ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip;
+
return GSS_S_COMPLETE;
}

diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 152790e..f1aabab 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -896,7 +896,7 @@ u32 svcauth_gss_flavor(struct auth_domain *dom)
if (svc_getnl(&buf->head[0]) != seq)
goto out;
/* trim off the mic and padding at the end before returning */
- xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+ buf->len -= 4 + round_up_to_quad(mic.len);
stat = 0;
out:
kfree(mic.data);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 5f0aa53..4bce619 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1139,47 +1139,6 @@ void xdr_enter_page(struct xdr_stream *xdr, unsigned int len)
}
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);

-/**
- * xdr_buf_trim - lop at most "len" bytes off the end of "buf"
- * @buf: buf to be trimmed
- * @len: number of bytes to reduce "buf" by
- *
- * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note
- * that it's possible that we'll trim less than that amount if the xdr_buf is
- * too small, or if (for instance) it's all in the head and the parser has
- * already read too far into it.
- */
-void xdr_buf_trim(struct xdr_buf *buf, unsigned int len)
-{
- size_t cur;
- unsigned int trim = len;
-
- if (buf->tail[0].iov_len) {
- cur = min_t(size_t, buf->tail[0].iov_len, trim);
- buf->tail[0].iov_len -= cur;
- trim -= cur;
- if (!trim)
- goto fix_len;
- }
-
- if (buf->page_len) {
- cur = min_t(unsigned int, buf->page_len, trim);
- buf->page_len -= cur;
- trim -= cur;
- if (!trim)
- goto fix_len;
- }
-
- if (buf->head[0].iov_len) {
- cur = min_t(size_t, buf->head[0].iov_len, trim);
- buf->head[0].iov_len -= cur;
- trim -= cur;
- }
-fix_len:
- buf->len -= (len - trim);
-}
-EXPORT_SYMBOL_GPL(xdr_buf_trim);
-
static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
{
unsigned int this_len;


2019-02-11 16:25:19

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 18/23] SUNRPC: Add SPDX IDs to some net/sunrpc/auth_gss/ files

Files under net/sunrpc/auth_gss/ do not yet have SPDX ID tags.
This directory is somewhat complicated because most of these files
have license boilerplate that is not strictly GPL 2.0.

In this patch I add ID tags where there is an obvious match. The
less recognizable licenses are still under research.

For reference, SPDX IDs added in this patch correspond to the
following license text:

GPL-2.0 https://spdx.org/licenses/GPL-2.0.html
GPL-2.0+ https://spdx.org/licenses/GPL-2.0+.html
BSD-3-Clause https://spdx.org/licenses/BSD-3-Clause.html

Cc: Simo Sorce <[email protected]>
Cc: Kate Stewart <[email protected]>
Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/auth_gss/auth_gss.c | 27 +--------------------------
net/sunrpc/auth_gss/gss_krb5_mech.c | 27 +--------------------------
net/sunrpc/auth_gss/gss_mech_switch.c | 27 +--------------------------
net/sunrpc/auth_gss/gss_rpc_upcall.c | 15 +--------------
net/sunrpc/auth_gss/gss_rpc_upcall.h | 16 ++--------------
net/sunrpc/auth_gss/gss_rpc_xdr.c | 15 +--------------
net/sunrpc/auth_gss/gss_rpc_xdr.h | 17 +----------------
net/sunrpc/auth_gss/svcauth_gss.c | 1 +
8 files changed, 9 insertions(+), 136 deletions(-)

diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 3d1fbd6..fda454c 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/auth_gss/auth_gss.c
*
@@ -8,34 +9,8 @@
*
* Dug Song <[email protected]>
* Andy Adamson <[email protected]>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

-
#include <linux/module.h>
#include <linux/init.h>
#include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index be31a58..56cc85c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_krb5_mech.c
*
@@ -6,32 +7,6 @@
*
* Andy Adamson <[email protected]>
* J. Bruce Fields <[email protected]>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/

#include <crypto/hash.h>
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 379318d..8206009 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: BSD-3-Clause
/*
* linux/net/sunrpc/gss_mech_switch.c
*
@@ -5,32 +6,6 @@
* All rights reserved.
*
* J. Bruce Fields <[email protected]>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 3. Neither the name of the University nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
- * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
*/

#include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index 73dcda0..0349f45 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* linux/net/sunrpc/gss_rpc_upcall.c
*
* Copyright (C) 2012 Simo Sorce <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <linux/types.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
index 1e542ad..31e9634 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.h
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* linux/net/sunrpc/gss_rpc_upcall.h
*
* Copyright (C) 2012 Simo Sorce <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef _GSS_RPC_UPCALL_H
@@ -45,4 +32,5 @@ int gssp_accept_sec_context_upcall(struct net *net,
void init_gssp_clnt(struct sunrpc_net *);
int set_gssp_clnt(struct net *);
void clear_gssp_clnt(struct sunrpc_net *);
+
#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 006062a..2ff7b70 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <linux/sunrpc/svcauth.h>
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 146c310..3f17411 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -1,21 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* GSS Proxy upcall module
*
* Copyright (C) 2012 Simo Sorce <[email protected]>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#ifndef _LINUX_GSS_RPC_XDR_H
@@ -262,6 +249,4 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
#define GSSX_ARG_wrap_size_limit_sz 0
#define GSSX_RES_wrap_size_limit_sz 0

-
-
#endif /* _LINUX_GSS_RPC_XDR_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index f1aabab..0c5d789 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Neil Brown <[email protected]>
* J. Bruce Fields <[email protected]>


2019-02-11 16:25:24

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 19/23] SUNRPC: Introduce rpc_prepare_reply_pages()

prepare_reply_buffer() and its NFSv4 equivalents expose the details
of the RPC header and the auth slack values to upper layer
consumers, creating a layering violation, and duplicating code.

Remedy these issues by adding a new RPC client API that hides those
details from upper layers in a common helper function.

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/nfs2xdr.c | 27 +++++-----------------
fs/nfs/nfs3xdr.c | 29 ++++++-----------------
fs/nfs/nfs4xdr.c | 51 +++++++++++++++++------------------------
include/linux/sunrpc/clnt.h | 3 ++
include/trace/events/sunrpc.h | 37 ++++++++++++++++++++++++++++++
net/sunrpc/clnt.c | 19 +++++++++++++++
net/sunrpc/xdr.c | 9 +++++++
7 files changed, 102 insertions(+), 73 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index bac3a4e..1dcd0fe 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -66,21 +66,6 @@
static int nfs_stat_to_errno(enum nfs_stat);

/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
- unsigned int base, unsigned int len,
- unsigned int bufsize)
-{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
-
- replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
- xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
-/*
* Encode/decode NFSv2 basic data types
*
* Basic NFSv2 data types are defined in section 2.3 of RFC 1094:
@@ -593,8 +578,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
const struct nfs_readlinkargs *args = data;

encode_fhandle(xdr, args->fh);
- prepare_reply_buffer(req, args->pages, args->pgbase,
- args->pglen, NFS_readlinkres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->pglen, NFS_readlinkres_sz);
}

/*
@@ -629,8 +614,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
const struct nfs_pgio_args *args = data;

encode_readargs(xdr, args);
- prepare_reply_buffer(req, args->pages, args->pgbase,
- args->count, NFS_readres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, NFS_readres_sz);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}

@@ -787,8 +772,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
const struct nfs_readdirargs *args = data;

encode_readdirargs(xdr, args);
- prepare_reply_buffer(req, args->pages, 0,
- args->count, NFS_readdirres_sz);
+ rpc_prepare_reply_pages(req, args->pages, 0,
+ args->count, NFS_readdirres_sz);
}

/*
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 4aa3ffe..a54dcf4 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -105,21 +105,6 @@
};

/*
- * While encoding arguments, set up the reply buffer in advance to
- * receive reply data directly into the page cache.
- */
-static void prepare_reply_buffer(struct rpc_rqst *req, struct page **pages,
- unsigned int base, unsigned int len,
- unsigned int bufsize)
-{
- struct rpc_auth *auth = req->rq_cred->cr_auth;
- unsigned int replen;
-
- replen = RPC_REPHDRSIZE + auth->au_rslack + bufsize;
- xdr_inline_pages(&req->rq_rcv_buf, replen << 2, pages, base, len);
-}
-
-/*
* Encode/decode NFSv3 basic data types
*
* Basic NFSv3 data types are defined in section 2.5 of RFC 1813:
@@ -910,8 +895,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
const struct nfs3_readlinkargs *args = data;

encode_nfs_fh3(xdr, args->fh);
- prepare_reply_buffer(req, args->pages, args->pgbase,
- args->pglen, NFS3_readlinkres_sz);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->pglen, NFS3_readlinkres_sz);
}

/*
@@ -943,8 +928,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;

encode_read3args(xdr, args);
- prepare_reply_buffer(req, args->pages, args->pgbase,
- args->count, replen);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, replen);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}

@@ -1236,7 +1221,7 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;

encode_readdir3args(xdr, args);
- prepare_reply_buffer(req, args->pages, 0,
+ rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
}

@@ -1278,7 +1263,7 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
const struct nfs3_readdirargs *args = data;

encode_readdirplus3args(xdr, args);
- prepare_reply_buffer(req, args->pages, 0,
+ rpc_prepare_reply_pages(req, args->pages, 0,
args->count, NFS3_readdirres_sz);
}

@@ -1323,7 +1308,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
encode_nfs_fh3(xdr, args->fh);
encode_uint32(xdr, args->mask);
if (args->mask & (NFS_ACL | NFS_DFACL)) {
- prepare_reply_buffer(req, args->pages, 0,
+ rpc_prepare_reply_pages(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
ACL3_getaclres_sz);
req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38a4cbc..d0fa18d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1016,12 +1016,11 @@ static void encode_compound_hdr(struct xdr_stream *xdr,
struct compound_hdr *hdr)
{
__be32 *p;
- struct rpc_auth *auth = req->rq_cred->cr_auth;

/* initialize running count of expected bytes in reply.
* NOTE: the replied tag SHOULD be the same is the one sent,
* but this is not required as a MUST for the server to do so. */
- hdr->replen = RPC_REPHDRSIZE + auth->au_rslack + 3 + hdr->taglen;
+ hdr->replen = 3 + hdr->taglen;

WARN_ON_ONCE(hdr->taglen > NFS4_MAXTAGLEN);
encode_string(xdr, hdr->taglen, hdr->tag);
@@ -2341,9 +2340,9 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
if (args->lg_args) {
encode_layoutget(xdr, args->lg_args, &hdr);
- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
- args->lg_args->layout.pages,
- 0, args->lg_args->layout.pglen);
+ rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+ args->lg_args->layout.pglen,
+ hdr.replen);
}
encode_nops(&hdr);
}
@@ -2387,9 +2386,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
if (args->lg_args) {
encode_layoutget(xdr, args->lg_args, &hdr);
- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
- args->lg_args->layout.pages,
- 0, args->lg_args->layout.pglen);
+ rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
+ args->lg_args->layout.pglen,
+ hdr.replen);
}
encode_nops(&hdr);
}
@@ -2499,8 +2498,8 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_readlink(xdr, args, req, &hdr);

- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
- args->pgbase, args->pglen);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->pglen, hdr.replen);
encode_nops(&hdr);
}

@@ -2520,11 +2519,8 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_readdir(xdr, args, req, &hdr);

- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2, args->pages,
- args->pgbase, args->count);
- dprintk("%s: inlined page args = (%u, %p, %u, %u)\n",
- __func__, hdr.replen << 2, args->pages,
- args->pgbase, args->count);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, hdr.replen);
encode_nops(&hdr);
}

@@ -2544,8 +2540,8 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_putfh(xdr, args->fh, &hdr);
encode_read(xdr, args, &hdr);

- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
- args->pages, args->pgbase, args->count);
+ rpc_prepare_reply_pages(req, args->pages, args->pgbase,
+ args->count, hdr.replen);
req->rq_rcv_buf.flags |= XDRBUF_READ;
encode_nops(&hdr);
}
@@ -2591,9 +2587,8 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
encode_getattr(xdr, nfs4_acl_bitmap, NULL,
ARRAY_SIZE(nfs4_acl_bitmap), &hdr);

- xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
- args->acl_pages, 0, args->acl_len);
-
+ rpc_prepare_reply_pages(req, args->acl_pages, 0,
+ args->acl_len, replen);
encode_nops(&hdr);
}

@@ -2814,9 +2809,8 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
encode_fs_locations(xdr, args->bitmask, &hdr);
}

- /* Set up reply kvec to capture returned fs_locations array. */
- xdr_inline_pages(&req->rq_rcv_buf, replen << 2,
- (struct page **)&args->page, 0, PAGE_SIZE);
+ rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
+ PAGE_SIZE, replen);
encode_nops(&hdr);
}

@@ -3018,10 +3012,8 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,

/* set up reply kvec. Subtract notification bitmap max size (2)
* so that notification bitmap is put in xdr_buf tail */
- xdr_inline_pages(&req->rq_rcv_buf, (hdr.replen - 2) << 2,
- args->pdev->pages, args->pdev->pgbase,
- args->pdev->pglen);
-
+ rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
+ args->pdev->pglen, hdr.replen - 2);
encode_nops(&hdr);
}

@@ -3042,9 +3034,8 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
encode_putfh(xdr, NFS_FH(args->inode), &hdr);
encode_layoutget(xdr, args, &hdr);

- xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
- args->layout.pages, 0, args->layout.pglen);
-
+ rpc_prepare_reply_pages(req, args->layout.pages, 0,
+ args->layout.pglen, hdr.replen);
encode_nops(&hdr);
}

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 1c44171..98bc988 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -169,6 +169,9 @@ int rpcb_v4_register(struct net *net, const u32 program,
const char *netid);
void rpcb_getport_async(struct rpc_task *);

+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int hdrsize);
void rpc_call_start(struct rpc_task *);
int rpc_call_async(struct rpc_clnt *clnt,
const struct rpc_message *msg, int flags,
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index e58dda8..8451f30 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -461,6 +461,43 @@
)
);

+TRACE_EVENT(rpc_reply_pages,
+ TP_PROTO(
+ const struct rpc_rqst *req
+ ),
+
+ TP_ARGS(req),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(const void *, head_base)
+ __field(size_t, head_len)
+ __field(const void *, tail_base)
+ __field(size_t, tail_len)
+ __field(unsigned int, page_len)
+ ),
+
+ TP_fast_assign(
+ __entry->task_id = req->rq_task->tk_pid;
+ __entry->client_id = req->rq_task->tk_client->cl_clid;
+
+ __entry->head_base = req->rq_rcv_buf.head[0].iov_base;
+ __entry->head_len = req->rq_rcv_buf.head[0].iov_len;
+ __entry->page_len = req->rq_rcv_buf.page_len;
+ __entry->tail_base = req->rq_rcv_buf.tail[0].iov_base;
+ __entry->tail_len = req->rq_rcv_buf.tail[0].iov_len;
+ ),
+
+ TP_printk(
+ "task:%u@%u xdr=[%p,%zu]/%u/[%p,%zu]\n",
+ __entry->task_id, __entry->client_id,
+ __entry->head_base, __entry->head_len,
+ __entry->page_len,
+ __entry->tail_base, __entry->tail_len
+ )
+);
+
/*
* First define the enums in the below macros to be exported to userspace
* via TRACE_DEFINE_ENUM().
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 803e931..f780605 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1164,6 +1164,25 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */

+/**
+ * rpc_prepare_reply_pages - Prepare to receive a reply data payload into pages
+ * @req: RPC request to prepare
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ * @hdrsize: expected size of upper layer reply header, in XDR words
+ *
+ */
+void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
+ unsigned int base, unsigned int len,
+ unsigned int hdrsize)
+{
+ hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
+ xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
+ trace_rpc_reply_pages(req);
+}
+EXPORT_SYMBOL_GPL(rpc_prepare_reply_pages);
+
void
rpc_call_start(struct rpc_task *task)
{
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4bce619..7cca515 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -163,6 +163,15 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
buf->bvec = NULL;
}

+/**
+ * xdr_inline_pages - Prepare receive buffer for a large reply
+ * @xdr: xdr_buf into which reply will be placed
+ * @offset: expected offset where data payload will start, in bytes
+ * @pages: vector of struct page pointers
+ * @base: offset in first page where receive should start, in bytes
+ * @len: expected size of the upper layer data payload, in bytes
+ *
+ */
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)


2019-02-11 16:25:30

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

Certain NFS results (eg. READLINK) might expect a data payload that
is not an exact multiple of 4 bytes. In this case, XDR encoding
is required to pad that payload so its length on the wire is a
multiple of 4 bytes. The constants that define the maximum size of
each NFS result do not appear to account for this extra word.

In each case where the data payload is to be received into pages:

- 1 word is added to the size of the receive buffer allocated by
call_allocate

- rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
extra buffer space falls into the rcv_buf's tail iovec

- If buf->pagelen is word-aligned, an XDR pad is not needed and
is thus removed from the tail

Signed-off-by: Chuck Lever <[email protected]>
---
fs/nfs/nfs2xdr.c | 6 +++---
fs/nfs/nfs3xdr.c | 10 +++++-----
fs/nfs/nfs4xdr.c | 15 ++++++++-------
net/sunrpc/clnt.c | 6 +++++-
net/sunrpc/xdr.c | 2 ++
5 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1dcd0fe..a7ed29d 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -56,11 +56,11 @@

#define NFS_attrstat_sz (1+NFS_fattr_sz)
#define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
-#define NFS_readlinkres_sz (2)
-#define NFS_readres_sz (1+NFS_fattr_sz+1)
+#define NFS_readlinkres_sz (2+1)
+#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
#define NFS_writeres_sz (NFS_attrstat_sz)
#define NFS_stat_sz (1)
-#define NFS_readdirres_sz (1)
+#define NFS_readdirres_sz (1+1)
#define NFS_statfsres_sz (1+NFS_info_sz)

static int nfs_stat_to_errno(enum nfs_stat);
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index a54dcf4..110358f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -69,13 +69,13 @@
#define NFS3_removeres_sz (NFS3_setattrres_sz)
#define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
#define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
-#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
-#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
+#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
+#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
#define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
#define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
#define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
#define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
-#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
+#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
#define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
#define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
#define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
@@ -85,7 +85,7 @@
#define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
#define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
- XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
+ XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
#define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)

static int nfs3_stat_to_errno(enum nfs_stat);
@@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
- result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
+ result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d0fa18d..6d9d5e2 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
nfs4_fattr_bitmap_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
-#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
+#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
#define encode_readdir_maxsz (op_encode_hdr_maxsz + \
2 + encode_verifier_maxsz + 5 + \
nfs4_label_maxsz)
#define decode_readdir_maxsz (op_decode_hdr_maxsz + \
- decode_verifier_maxsz)
+ decode_verifier_maxsz + 1)
#define encode_readlink_maxsz (op_encode_hdr_maxsz)
-#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
+#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
#define encode_write_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 4)
#define decode_write_maxsz (op_decode_hdr_maxsz + \
@@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
#define encode_getacl_maxsz (encode_getattr_maxsz)
#define decode_getacl_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 1)
+ nfs4_fattr_bitmap_maxsz + 1 + 1)
#define encode_setacl_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_setacl_maxsz (decode_setattr_maxsz)
#define encode_fs_locations_maxsz \
(encode_getattr_maxsz)
#define decode_fs_locations_maxsz \
- (0)
+ (1)
#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))

@@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
1 /* opaque devaddr4 length */ + \
/* devaddr4 payload is read into page */ \
1 /* notification bitmap length */ + \
- 1 /* notification bitmap, word 0 */)
+ 1 /* notification bitmap, word 0 */ + \
+ 1 /* possible XDR padding */)
#define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
encode_stateid_maxsz)
#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
decode_stateid_maxsz + \
- XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
+ XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
2 /* offset */ + \
2 /* length */ + \
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index f780605..4ea38b0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
unsigned int base, unsigned int len,
unsigned int hdrsize)
{
- hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
+ /* Subtract one to force an extra word of buffer space for the
+ * payload's XDR pad to fall into the rcv_buf's tail iovec.
+ */
+ hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
+
xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
trace_rpc_reply_pages(req);
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 7cca515..aa8177d 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)

tail->iov_base = buf + offset;
tail->iov_len = buflen - offset;
+ if ((xdr->page_len & 3) == 0)
+ tail->iov_len -= sizeof(__be32);

xdr->buflen += len;
}


2019-02-11 16:25:35

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 21/23] SUNRPC: Make AUTH_SYS and AUTH_NULL set au_verfsize

au_verfsize will be needed for a non-flavor-specific computation
in a subsequent patch.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/auth.h | 3 +--
net/sunrpc/auth_gss/auth_gss.c | 1 +
net/sunrpc/auth_null.c | 1 +
net/sunrpc/auth_unix.c | 5 ++++-
4 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index c51e189..359dfdd 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -77,8 +77,7 @@ struct rpc_auth {
/* guess at number of u32's auth adds before
* reply data; normally the verifier size: */
unsigned int au_rslack;
- /* for gss, used to calculate au_rslack: */
- unsigned int au_verfsize;
+ unsigned int au_verfsize; /* size of reply verifier */

unsigned int au_flags; /* various flags */
const struct rpc_authops *au_ops; /* operations */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index fda454c..731e7a4 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1016,6 +1016,7 @@ static void gss_pipe_free(struct gss_pipe *p)
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
+ auth->au_verfsize = GSS_VERF_SLACK >> 2;
auth->au_flags = 0;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index bf96975..9ae0824 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -114,6 +114,7 @@
struct rpc_auth null_auth = {
.au_cslack = NUL_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = REFCOUNT_INIT(1),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index 5ea84a9..a93c564 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -163,6 +163,7 @@
static int
unx_validate(struct rpc_task *task, struct xdr_stream *xdr)
{
+ struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
__be32 *p;
u32 size;

@@ -184,7 +185,8 @@
if (!p)
return -EIO;

- task->tk_rqstp->rq_cred->cr_auth->au_rslack = (size >> 2) + 2;
+ auth->au_verfsize = XDR_QUADLEN(size) + 2;
+ auth->au_rslack = XDR_QUADLEN(size) + 2;
return 0;
}

@@ -212,6 +214,7 @@ void rpc_destroy_authunix(void)
struct rpc_auth unix_auth = {
.au_cslack = UNX_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
+ .au_verfsize = NUL_REPLYSLACK,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
.au_count = REFCOUNT_INIT(1),


2019-02-11 16:25:40

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 22/23] SUNRPC: Add rpc_auth::au_ralign field

Currently rpc_inline_rcv_pages() uses au_rslack to estimate the
size of the upper layer reply header. This is fine for auth flavors
where au_verfsize == au_rslack.

However, some auth flavors have more going on. krb5i for example has
two more words after the verifier, and another blob following the
RPC message. The calculation involving au_rslack pushes the upper
layer reply header too far into the rcv_buf.

au_rslack is still valuable: it's the amount of buffer space needed
for the reply, and is used when allocating the reply buffer. We'll
keep that.

But, add a new field that can be used to properly estimate the
location of the upper layer header in each RPC reply, based on the
auth flavor in use.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/auth.h | 9 ++++-----
net/sunrpc/auth_gss/auth_gss.c | 18 +++++++++++++-----
net/sunrpc/auth_null.c | 1 +
net/sunrpc/auth_unix.c | 1 +
net/sunrpc/clnt.c | 2 +-
5 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 359dfdd..5f9076f 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -74,13 +74,12 @@ struct rpc_cred {
struct rpc_authops;
struct rpc_auth {
unsigned int au_cslack; /* call cred size estimate */
- /* guess at number of u32's auth adds before
- * reply data; normally the verifier size: */
- unsigned int au_rslack;
+ unsigned int au_rslack; /* reply cred size estimate */
unsigned int au_verfsize; /* size of reply verifier */
+ unsigned int au_ralign; /* words before UL header */

- unsigned int au_flags; /* various flags */
- const struct rpc_authops *au_ops; /* operations */
+ unsigned int au_flags;
+ const struct rpc_authops *au_ops;
rpc_authflavor_t au_flavor; /* pseudoflavor (note may
* differ from the flavor in
* au_ops->au_flavor in gss
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 731e7a4..c67e2ad 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -1017,6 +1017,7 @@ static void gss_pipe_free(struct gss_pipe *p)
auth->au_cslack = GSS_CRED_SLACK >> 2;
auth->au_rslack = GSS_VERF_SLACK >> 2;
auth->au_verfsize = GSS_VERF_SLACK >> 2;
+ auth->au_ralign = GSS_VERF_SLACK >> 2;
auth->au_flags = 0;
auth->au_ops = &authgss_ops;
auth->au_flavor = flavor;
@@ -1891,7 +1892,10 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
static int
gss_unwrap_resp_auth(struct rpc_cred *cred)
{
- cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize;
+ struct rpc_auth *auth = cred->cr_auth;
+
+ auth->au_rslack = auth->au_verfsize;
+ auth->au_ralign = auth->au_verfsize;
return 0;
}

@@ -1902,6 +1906,7 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct xdr_buf integ_buf, *rcv_buf = &rqstp->rq_rcv_buf;
u32 data_offset, mic_offset, integ_len, maj_stat;
+ struct rpc_auth *auth = cred->cr_auth;
struct xdr_netobj mic;
__be32 *p;

@@ -1928,8 +1933,8 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
if (maj_stat != GSS_S_COMPLETE)
goto bad_mic;

- cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
- 1 + XDR_QUADLEN(mic.len);
+ auth->au_rslack = auth->au_verfsize + 2 + 1 + XDR_QUADLEN(mic.len);
+ auth->au_ralign = auth->au_verfsize + 2;
return 0;
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
@@ -1949,6 +1954,7 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
{
struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf;
struct kvec *head = rqstp->rq_rcv_buf.head;
+ struct rpc_auth *auth = cred->cr_auth;
unsigned int savedlen = rcv_buf->len;
u32 offset, opaque_len, maj_stat;
__be32 *p;
@@ -1976,8 +1982,10 @@ static int gss_wrap_req(struct rpc_task *task, struct xdr_stream *xdr)
*/
xdr_init_decode(xdr, rcv_buf, p, rqstp);

- cred->cr_auth->au_rslack = cred->cr_auth->au_verfsize + 2 +
- XDR_QUADLEN(savedlen - rcv_buf->len);
+ auth->au_rslack = auth->au_verfsize + 2 +
+ XDR_QUADLEN(savedlen - rcv_buf->len);
+ auth->au_ralign = auth->au_verfsize + 2 +
+ XDR_QUADLEN(savedlen - rcv_buf->len);
return 0;
unwrap_failed:
trace_rpcgss_unwrap_failed(task);
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 9ae0824..41a633a 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -115,6 +115,7 @@ struct rpc_auth null_auth = {
.au_cslack = NUL_CALLSLACK,
.au_rslack = NUL_REPLYSLACK,
.au_verfsize = NUL_REPLYSLACK,
+ .au_ralign = NUL_REPLYSLACK,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
.au_count = REFCOUNT_INIT(1),
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index a93c564..c048eb6 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -187,6 +187,7 @@

auth->au_verfsize = XDR_QUADLEN(size) + 2;
auth->au_rslack = XDR_QUADLEN(size) + 2;
+ auth->au_ralign = XDR_QUADLEN(size) + 2;
return 0;
}

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 4ea38b0..99bfeb1 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1180,7 +1180,7 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
/* Subtract one to force an extra word of buffer space for the
* payload's XDR pad to fall into the rcv_buf's tail iovec.
*/
- hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
+ hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_ralign - 1;

xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
trace_rpc_reply_pages(req);


2019-02-11 16:25:45

by Chuck Lever III

[permalink] [raw]
Subject: [PATCH v1 23/23] SUNRPC: Use au_rslack when computing reply buffer size

au_rslack is significantly smaller than (au_cslack << 2). Using
that value results in smaller receive buffers. In some cases this
eliminates an extra segment in Reply chunks (RPC/RDMA).

Signed-off-by: Chuck Lever <[email protected]>
---
net/sunrpc/clnt.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 99bfeb1..241e842 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1690,7 +1690,7 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
static void
call_allocate(struct rpc_task *task)
{
- unsigned int slack = task->tk_rqstp->rq_cred->cr_auth->au_cslack;
+ const struct rpc_auth *auth = task->tk_rqstp->rq_cred->cr_auth;
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -1715,9 +1715,10 @@ void rpc_force_rebind(struct rpc_clnt *clnt)
* and reply headers, and convert both values
* to byte sizes.
*/
- req->rq_callsize = RPC_CALLHDRSIZE + (slack << 1) + proc->p_arglen;
+ req->rq_callsize = RPC_CALLHDRSIZE + (auth->au_cslack << 1) +
+ proc->p_arglen;
req->rq_callsize <<= 2;
- req->rq_rcvsize = RPC_REPHDRSIZE + slack + proc->p_replen;
+ req->rq_rcvsize = RPC_REPHDRSIZE + auth->au_rslack + proc->p_replen;
req->rq_rcvsize <<= 2;

status = xprt->ops->buf_alloc(task);


2019-04-05 17:36:48

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

Hi Chuck,

This patch break ACLs. After applying this patch nfs4_getfacl fails
(it fails within xdr and returns ENOTSUPP). Any ideas why?

On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>
> Certain NFS results (eg. READLINK) might expect a data payload that
> is not an exact multiple of 4 bytes. In this case, XDR encoding
> is required to pad that payload so its length on the wire is a
> multiple of 4 bytes. The constants that define the maximum size of
> each NFS result do not appear to account for this extra word.
>
> In each case where the data payload is to be received into pages:
>
> - 1 word is added to the size of the receive buffer allocated by
> call_allocate
>
> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> extra buffer space falls into the rcv_buf's tail iovec
>
> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> is thus removed from the tail
>
> Signed-off-by: Chuck Lever <[email protected]>
> ---
> fs/nfs/nfs2xdr.c | 6 +++---
> fs/nfs/nfs3xdr.c | 10 +++++-----
> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> net/sunrpc/clnt.c | 6 +++++-
> net/sunrpc/xdr.c | 2 ++
> 5 files changed, 23 insertions(+), 16 deletions(-)
>
> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> index 1dcd0fe..a7ed29d 100644
> --- a/fs/nfs/nfs2xdr.c
> +++ b/fs/nfs/nfs2xdr.c
> @@ -56,11 +56,11 @@
>
> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> -#define NFS_readlinkres_sz (2)
> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> +#define NFS_readlinkres_sz (2+1)
> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> #define NFS_writeres_sz (NFS_attrstat_sz)
> #define NFS_stat_sz (1)
> -#define NFS_readdirres_sz (1)
> +#define NFS_readdirres_sz (1+1)
> #define NFS_statfsres_sz (1+NFS_info_sz)
>
> static int nfs_stat_to_errno(enum nfs_stat);
> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> index a54dcf4..110358f 100644
> --- a/fs/nfs/nfs3xdr.c
> +++ b/fs/nfs/nfs3xdr.c
> @@ -69,13 +69,13 @@
> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> @@ -85,7 +85,7 @@
> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>
> static int nfs3_stat_to_errno(enum nfs_stat);
> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> result->op_status = status;
> if (status != NFS3_OK)
> goto out_status;
> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> error = decode_read3resok(xdr, result);
> out:
> return error;
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index d0fa18d..6d9d5e2 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> nfs4_fattr_bitmap_maxsz)
> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> encode_stateid_maxsz + 3)
> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> 2 + encode_verifier_maxsz + 5 + \
> nfs4_label_maxsz)
> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> - decode_verifier_maxsz)
> + decode_verifier_maxsz + 1)
> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> encode_stateid_maxsz + 4)
> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> #define encode_getacl_maxsz (encode_getattr_maxsz)
> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> - nfs4_fattr_bitmap_maxsz + 1)
> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> encode_stateid_maxsz + 3)
> #define decode_setacl_maxsz (decode_setattr_maxsz)
> #define encode_fs_locations_maxsz \
> (encode_getattr_maxsz)
> #define decode_fs_locations_maxsz \
> - (0)
> + (1)
> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>
> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> 1 /* opaque devaddr4 length */ + \
> /* devaddr4 payload is read into page */ \
> 1 /* notification bitmap length */ + \
> - 1 /* notification bitmap, word 0 */)
> + 1 /* notification bitmap, word 0 */ + \
> + 1 /* possible XDR padding */)
> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> encode_stateid_maxsz)
> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> decode_stateid_maxsz + \
> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> 2 /* offset */ + \
> 2 /* length */ + \
> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> index f780605..4ea38b0 100644
> --- a/net/sunrpc/clnt.c
> +++ b/net/sunrpc/clnt.c
> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> unsigned int base, unsigned int len,
> unsigned int hdrsize)
> {
> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> + /* Subtract one to force an extra word of buffer space for the
> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> + */
> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> +
> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> trace_rpc_reply_pages(req);
> }
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index 7cca515..aa8177d 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>
> tail->iov_base = buf + offset;
> tail->iov_len = buflen - offset;
> + if ((xdr->page_len & 3) == 0)
> + tail->iov_len -= sizeof(__be32);
>
> xdr->buflen += len;
> }
>

2019-04-05 17:51:40

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages



> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
>
> Hi Chuck,
>
> This patch break ACLs. After applying this patch nfs4_getfacl fails
> (it fails within xdr and returns ENOTSUPP). Any ideas why?

Possibly the macro that defines the maximum size of the reply
is incorrect.


> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>>
>> Certain NFS results (eg. READLINK) might expect a data payload that
>> is not an exact multiple of 4 bytes. In this case, XDR encoding
>> is required to pad that payload so its length on the wire is a
>> multiple of 4 bytes. The constants that define the maximum size of
>> each NFS result do not appear to account for this extra word.
>>
>> In each case where the data payload is to be received into pages:
>>
>> - 1 word is added to the size of the receive buffer allocated by
>> call_allocate
>>
>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
>> extra buffer space falls into the rcv_buf's tail iovec
>>
>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
>> is thus removed from the tail
>>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> fs/nfs/nfs2xdr.c | 6 +++---
>> fs/nfs/nfs3xdr.c | 10 +++++-----
>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
>> net/sunrpc/clnt.c | 6 +++++-
>> net/sunrpc/xdr.c | 2 ++
>> 5 files changed, 23 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
>> index 1dcd0fe..a7ed29d 100644
>> --- a/fs/nfs/nfs2xdr.c
>> +++ b/fs/nfs/nfs2xdr.c
>> @@ -56,11 +56,11 @@
>>
>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
>> -#define NFS_readlinkres_sz (2)
>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
>> +#define NFS_readlinkres_sz (2+1)
>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
>> #define NFS_writeres_sz (NFS_attrstat_sz)
>> #define NFS_stat_sz (1)
>> -#define NFS_readdirres_sz (1)
>> +#define NFS_readdirres_sz (1+1)
>> #define NFS_statfsres_sz (1+NFS_info_sz)
>>
>> static int nfs_stat_to_errno(enum nfs_stat);
>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
>> index a54dcf4..110358f 100644
>> --- a/fs/nfs/nfs3xdr.c
>> +++ b/fs/nfs/nfs3xdr.c
>> @@ -69,13 +69,13 @@
>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
>> @@ -85,7 +85,7 @@
>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>>
>> static int nfs3_stat_to_errno(enum nfs_stat);
>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
>> result->op_status = status;
>> if (status != NFS3_OK)
>> goto out_status;
>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
>> error = decode_read3resok(xdr, result);
>> out:
>> return error;
>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index d0fa18d..6d9d5e2 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>> nfs4_fattr_bitmap_maxsz)
>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
>> encode_stateid_maxsz + 3)
>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
>> 2 + encode_verifier_maxsz + 5 + \
>> nfs4_label_maxsz)
>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
>> - decode_verifier_maxsz)
>> + decode_verifier_maxsz + 1)
>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
>> encode_stateid_maxsz + 4)
>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>> - nfs4_fattr_bitmap_maxsz + 1)
>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>> encode_stateid_maxsz + 3)
>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>> #define encode_fs_locations_maxsz \
>> (encode_getattr_maxsz)
>> #define decode_fs_locations_maxsz \
>> - (0)
>> + (1)
>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>
>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>> 1 /* opaque devaddr4 length */ + \
>> /* devaddr4 payload is read into page */ \
>> 1 /* notification bitmap length */ + \
>> - 1 /* notification bitmap, word 0 */)
>> + 1 /* notification bitmap, word 0 */ + \
>> + 1 /* possible XDR padding */)
>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
>> encode_stateid_maxsz)
>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
>> decode_stateid_maxsz + \
>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
>> 2 /* offset */ + \
>> 2 /* length */ + \
>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>> index f780605..4ea38b0 100644
>> --- a/net/sunrpc/clnt.c
>> +++ b/net/sunrpc/clnt.c
>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
>> unsigned int base, unsigned int len,
>> unsigned int hdrsize)
>> {
>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
>> + /* Subtract one to force an extra word of buffer space for the
>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
>> + */
>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
>> +
>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
>> trace_rpc_reply_pages(req);
>> }
>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>> index 7cca515..aa8177d 100644
>> --- a/net/sunrpc/xdr.c
>> +++ b/net/sunrpc/xdr.c
>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>>
>> tail->iov_base = buf + offset;
>> tail->iov_len = buflen - offset;
>> + if ((xdr->page_len & 3) == 0)
>> + tail->iov_len -= sizeof(__be32);
>>
>> xdr->buflen += len;
>> }
>>

--
Chuck Lever




2019-04-05 19:17:38

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
>
>
>
> > On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> >
> > Hi Chuck,
> >
> > This patch break ACLs. After applying this patch nfs4_getfacl fails
> > (it fails within xdr and returns ENOTSUPP). Any ideas why?
>
> Possibly the macro that defines the maximum size of the reply
> is incorrect.
>

This also breaks FS_LOCATION. I'm going to go on the limb here and say
that it probably breaks whatever else it modified. The question is:
can't we just revert it??

>
> > On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> >>
> >> Certain NFS results (eg. READLINK) might expect a data payload that
> >> is not an exact multiple of 4 bytes. In this case, XDR encoding
> >> is required to pad that payload so its length on the wire is a
> >> multiple of 4 bytes. The constants that define the maximum size of
> >> each NFS result do not appear to account for this extra word.
> >>
> >> In each case where the data payload is to be received into pages:
> >>
> >> - 1 word is added to the size of the receive buffer allocated by
> >> call_allocate
> >>
> >> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> >> extra buffer space falls into the rcv_buf's tail iovec
> >>
> >> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> >> is thus removed from the tail
> >>
> >> Signed-off-by: Chuck Lever <[email protected]>
> >> ---
> >> fs/nfs/nfs2xdr.c | 6 +++---
> >> fs/nfs/nfs3xdr.c | 10 +++++-----
> >> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> >> net/sunrpc/clnt.c | 6 +++++-
> >> net/sunrpc/xdr.c | 2 ++
> >> 5 files changed, 23 insertions(+), 16 deletions(-)
> >>
> >> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> >> index 1dcd0fe..a7ed29d 100644
> >> --- a/fs/nfs/nfs2xdr.c
> >> +++ b/fs/nfs/nfs2xdr.c
> >> @@ -56,11 +56,11 @@
> >>
> >> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> >> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> >> -#define NFS_readlinkres_sz (2)
> >> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> >> +#define NFS_readlinkres_sz (2+1)
> >> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> >> #define NFS_writeres_sz (NFS_attrstat_sz)
> >> #define NFS_stat_sz (1)
> >> -#define NFS_readdirres_sz (1)
> >> +#define NFS_readdirres_sz (1+1)
> >> #define NFS_statfsres_sz (1+NFS_info_sz)
> >>
> >> static int nfs_stat_to_errno(enum nfs_stat);
> >> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> >> index a54dcf4..110358f 100644
> >> --- a/fs/nfs/nfs3xdr.c
> >> +++ b/fs/nfs/nfs3xdr.c
> >> @@ -69,13 +69,13 @@
> >> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> >> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> >> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> >> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> >> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> >> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> >> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> >> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> >> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> >> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> >> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> >> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> >> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> >> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> >> @@ -85,7 +85,7 @@
> >> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> >> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> >> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> >> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> >>
> >> static int nfs3_stat_to_errno(enum nfs_stat);
> >> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> >> result->op_status = status;
> >> if (status != NFS3_OK)
> >> goto out_status;
> >> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >> error = decode_read3resok(xdr, result);
> >> out:
> >> return error;
> >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >> index d0fa18d..6d9d5e2 100644
> >> --- a/fs/nfs/nfs4xdr.c
> >> +++ b/fs/nfs/nfs4xdr.c
> >> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >> nfs4_fattr_bitmap_maxsz)
> >> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> >> encode_stateid_maxsz + 3)
> >> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> >> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> >> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> >> 2 + encode_verifier_maxsz + 5 + \
> >> nfs4_label_maxsz)
> >> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> >> - decode_verifier_maxsz)
> >> + decode_verifier_maxsz + 1)
> >> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> >> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> >> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> >> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> >> encode_stateid_maxsz + 4)
> >> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> >> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> >> #define encode_getacl_maxsz (encode_getattr_maxsz)
> >> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> >> - nfs4_fattr_bitmap_maxsz + 1)
> >> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> >> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> >> encode_stateid_maxsz + 3)
> >> #define decode_setacl_maxsz (decode_setattr_maxsz)
> >> #define encode_fs_locations_maxsz \
> >> (encode_getattr_maxsz)
> >> #define decode_fs_locations_maxsz \
> >> - (0)
> >> + (1)
> >> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> >> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> >>
> >> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >> 1 /* opaque devaddr4 length */ + \
> >> /* devaddr4 payload is read into page */ \
> >> 1 /* notification bitmap length */ + \
> >> - 1 /* notification bitmap, word 0 */)
> >> + 1 /* notification bitmap, word 0 */ + \
> >> + 1 /* possible XDR padding */)
> >> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> >> encode_stateid_maxsz)
> >> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> >> decode_stateid_maxsz + \
> >> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> >> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> >> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> >> 2 /* offset */ + \
> >> 2 /* length */ + \
> >> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> >> index f780605..4ea38b0 100644
> >> --- a/net/sunrpc/clnt.c
> >> +++ b/net/sunrpc/clnt.c
> >> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> >> unsigned int base, unsigned int len,
> >> unsigned int hdrsize)
> >> {
> >> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> >> + /* Subtract one to force an extra word of buffer space for the
> >> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> >> + */
> >> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> >> +
> >> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> >> trace_rpc_reply_pages(req);
> >> }
> >> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> >> index 7cca515..aa8177d 100644
> >> --- a/net/sunrpc/xdr.c
> >> +++ b/net/sunrpc/xdr.c
> >> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> >>
> >> tail->iov_base = buf + offset;
> >> tail->iov_len = buflen - offset;
> >> + if ((xdr->page_len & 3) == 0)
> >> + tail->iov_len -= sizeof(__be32);
> >>
> >> xdr->buflen += len;
> >> }
> >>
>
> --
> Chuck Lever
>
>
>

2019-04-05 19:23:33

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages



> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
>
> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
>>
>>
>>
>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
>>>
>>> Hi Chuck,
>>>
>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
>>
>> Possibly the macro that defines the maximum size of the reply
>> is incorrect.
>>
>
> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> that it probably breaks whatever else it modified.

It modifies READ, READDIR, and READLINK. Are those broken?


> The question is: can't we just revert it??

Why not "root cause" it first?


>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>>>>
>>>> Certain NFS results (eg. READLINK) might expect a data payload that
>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
>>>> is required to pad that payload so its length on the wire is a
>>>> multiple of 4 bytes. The constants that define the maximum size of
>>>> each NFS result do not appear to account for this extra word.
>>>>
>>>> In each case where the data payload is to be received into pages:
>>>>
>>>> - 1 word is added to the size of the receive buffer allocated by
>>>> call_allocate
>>>>
>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
>>>> extra buffer space falls into the rcv_buf's tail iovec
>>>>
>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
>>>> is thus removed from the tail
>>>>
>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>> ---
>>>> fs/nfs/nfs2xdr.c | 6 +++---
>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
>>>> net/sunrpc/clnt.c | 6 +++++-
>>>> net/sunrpc/xdr.c | 2 ++
>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
>>>>
>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
>>>> index 1dcd0fe..a7ed29d 100644
>>>> --- a/fs/nfs/nfs2xdr.c
>>>> +++ b/fs/nfs/nfs2xdr.c
>>>> @@ -56,11 +56,11 @@
>>>>
>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
>>>> -#define NFS_readlinkres_sz (2)
>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
>>>> +#define NFS_readlinkres_sz (2+1)
>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
>>>> #define NFS_stat_sz (1)
>>>> -#define NFS_readdirres_sz (1)
>>>> +#define NFS_readdirres_sz (1+1)
>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
>>>>
>>>> static int nfs_stat_to_errno(enum nfs_stat);
>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
>>>> index a54dcf4..110358f 100644
>>>> --- a/fs/nfs/nfs3xdr.c
>>>> +++ b/fs/nfs/nfs3xdr.c
>>>> @@ -69,13 +69,13 @@
>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
>>>> @@ -85,7 +85,7 @@
>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>>>>
>>>> static int nfs3_stat_to_errno(enum nfs_stat);
>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>> result->op_status = status;
>>>> if (status != NFS3_OK)
>>>> goto out_status;
>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>> error = decode_read3resok(xdr, result);
>>>> out:
>>>> return error;
>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>> index d0fa18d..6d9d5e2 100644
>>>> --- a/fs/nfs/nfs4xdr.c
>>>> +++ b/fs/nfs/nfs4xdr.c
>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>> nfs4_fattr_bitmap_maxsz)
>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
>>>> encode_stateid_maxsz + 3)
>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
>>>> 2 + encode_verifier_maxsz + 5 + \
>>>> nfs4_label_maxsz)
>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
>>>> - decode_verifier_maxsz)
>>>> + decode_verifier_maxsz + 1)
>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
>>>> encode_stateid_maxsz + 4)
>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>>>> - nfs4_fattr_bitmap_maxsz + 1)
>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>>>> encode_stateid_maxsz + 3)
>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>>>> #define encode_fs_locations_maxsz \
>>>> (encode_getattr_maxsz)
>>>> #define decode_fs_locations_maxsz \
>>>> - (0)
>>>> + (1)
>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>>>
>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>> 1 /* opaque devaddr4 length */ + \
>>>> /* devaddr4 payload is read into page */ \
>>>> 1 /* notification bitmap length */ + \
>>>> - 1 /* notification bitmap, word 0 */)
>>>> + 1 /* notification bitmap, word 0 */ + \
>>>> + 1 /* possible XDR padding */)
>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
>>>> encode_stateid_maxsz)
>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
>>>> decode_stateid_maxsz + \
>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
>>>> 2 /* offset */ + \
>>>> 2 /* length */ + \
>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>> index f780605..4ea38b0 100644
>>>> --- a/net/sunrpc/clnt.c
>>>> +++ b/net/sunrpc/clnt.c
>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
>>>> unsigned int base, unsigned int len,
>>>> unsigned int hdrsize)
>>>> {
>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
>>>> + /* Subtract one to force an extra word of buffer space for the
>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
>>>> + */
>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
>>>> +
>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
>>>> trace_rpc_reply_pages(req);
>>>> }
>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>>>> index 7cca515..aa8177d 100644
>>>> --- a/net/sunrpc/xdr.c
>>>> +++ b/net/sunrpc/xdr.c
>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>>>>
>>>> tail->iov_base = buf + offset;
>>>> tail->iov_len = buflen - offset;
>>>> + if ((xdr->page_len & 3) == 0)
>>>> + tail->iov_len -= sizeof(__be32);
>>>>
>>>> xdr->buflen += len;
>>>> }
>>>>
>>
>> --
>> Chuck Lever

--
Chuck Lever




2019-04-05 19:27:16

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
>
>
>
> > On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> >
> > On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> >>
> >>
> >>
> >>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>
> >>> Hi Chuck,
> >>>
> >>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> >>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> >>
> >> Possibly the macro that defines the maximum size of the reply
> >> is incorrect.
> >>
> >
> > This also breaks FS_LOCATION. I'm going to go on the limb here and say
> > that it probably breaks whatever else it modified.
>
> It modifies READ, READDIR, and READLINK. Are those broken?

I don't know how to test READLINK.. but I think READ/READDIR work OK
otherwise folks would have noticed it (I gather ACL and FS_LOCATION
testing doesn't happen frequently).

> > The question is: can't we just revert it??
>
> Why not "root cause" it first?

I'm trying :-/ I was just fishing to see how important the change was.

>
>
> >>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> >>>>
> >>>> Certain NFS results (eg. READLINK) might expect a data payload that
> >>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> >>>> is required to pad that payload so its length on the wire is a
> >>>> multiple of 4 bytes. The constants that define the maximum size of
> >>>> each NFS result do not appear to account for this extra word.
> >>>>
> >>>> In each case where the data payload is to be received into pages:
> >>>>
> >>>> - 1 word is added to the size of the receive buffer allocated by
> >>>> call_allocate
> >>>>
> >>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> >>>> extra buffer space falls into the rcv_buf's tail iovec
> >>>>
> >>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> >>>> is thus removed from the tail
> >>>>
> >>>> Signed-off-by: Chuck Lever <[email protected]>
> >>>> ---
> >>>> fs/nfs/nfs2xdr.c | 6 +++---
> >>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> >>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> >>>> net/sunrpc/clnt.c | 6 +++++-
> >>>> net/sunrpc/xdr.c | 2 ++
> >>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> >>>>
> >>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> >>>> index 1dcd0fe..a7ed29d 100644
> >>>> --- a/fs/nfs/nfs2xdr.c
> >>>> +++ b/fs/nfs/nfs2xdr.c
> >>>> @@ -56,11 +56,11 @@
> >>>>
> >>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> >>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> >>>> -#define NFS_readlinkres_sz (2)
> >>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> >>>> +#define NFS_readlinkres_sz (2+1)
> >>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> >>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> >>>> #define NFS_stat_sz (1)
> >>>> -#define NFS_readdirres_sz (1)
> >>>> +#define NFS_readdirres_sz (1+1)
> >>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> >>>>
> >>>> static int nfs_stat_to_errno(enum nfs_stat);
> >>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> >>>> index a54dcf4..110358f 100644
> >>>> --- a/fs/nfs/nfs3xdr.c
> >>>> +++ b/fs/nfs/nfs3xdr.c
> >>>> @@ -69,13 +69,13 @@
> >>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> >>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> >>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> >>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> >>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> >>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> >>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> >>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> >>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> >>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> >>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> >>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> >>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> >>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> >>>> @@ -85,7 +85,7 @@
> >>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> >>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> >>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> >>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> >>>>
> >>>> static int nfs3_stat_to_errno(enum nfs_stat);
> >>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> >>>> result->op_status = status;
> >>>> if (status != NFS3_OK)
> >>>> goto out_status;
> >>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>> error = decode_read3resok(xdr, result);
> >>>> out:
> >>>> return error;
> >>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >>>> index d0fa18d..6d9d5e2 100644
> >>>> --- a/fs/nfs/nfs4xdr.c
> >>>> +++ b/fs/nfs/nfs4xdr.c
> >>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>> nfs4_fattr_bitmap_maxsz)
> >>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> >>>> encode_stateid_maxsz + 3)
> >>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> >>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> >>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> >>>> 2 + encode_verifier_maxsz + 5 + \
> >>>> nfs4_label_maxsz)
> >>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> >>>> - decode_verifier_maxsz)
> >>>> + decode_verifier_maxsz + 1)
> >>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> >>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> >>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> >>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> >>>> encode_stateid_maxsz + 4)
> >>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> >>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> >>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> >>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> >>>> - nfs4_fattr_bitmap_maxsz + 1)
> >>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> >>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> >>>> encode_stateid_maxsz + 3)
> >>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> >>>> #define encode_fs_locations_maxsz \
> >>>> (encode_getattr_maxsz)
> >>>> #define decode_fs_locations_maxsz \
> >>>> - (0)
> >>>> + (1)
> >>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> >>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> >>>>
> >>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>> 1 /* opaque devaddr4 length */ + \
> >>>> /* devaddr4 payload is read into page */ \
> >>>> 1 /* notification bitmap length */ + \
> >>>> - 1 /* notification bitmap, word 0 */)
> >>>> + 1 /* notification bitmap, word 0 */ + \
> >>>> + 1 /* possible XDR padding */)
> >>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> >>>> encode_stateid_maxsz)
> >>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> >>>> decode_stateid_maxsz + \
> >>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> >>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> >>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> >>>> 2 /* offset */ + \
> >>>> 2 /* length */ + \
> >>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> >>>> index f780605..4ea38b0 100644
> >>>> --- a/net/sunrpc/clnt.c
> >>>> +++ b/net/sunrpc/clnt.c
> >>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> >>>> unsigned int base, unsigned int len,
> >>>> unsigned int hdrsize)
> >>>> {
> >>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> >>>> + /* Subtract one to force an extra word of buffer space for the
> >>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> >>>> + */
> >>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> >>>> +
> >>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> >>>> trace_rpc_reply_pages(req);
> >>>> }
> >>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> >>>> index 7cca515..aa8177d 100644
> >>>> --- a/net/sunrpc/xdr.c
> >>>> +++ b/net/sunrpc/xdr.c
> >>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> >>>>
> >>>> tail->iov_base = buf + offset;
> >>>> tail->iov_len = buflen - offset;
> >>>> + if ((xdr->page_len & 3) == 0)
> >>>> + tail->iov_len -= sizeof(__be32);
> >>>>
> >>>> xdr->buflen += len;
> >>>> }
> >>>>
> >>
> >> --
> >> Chuck Lever
>
> --
> Chuck Lever
>
>
>

2019-04-05 19:42:26

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages



> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
>
> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
>>
>>
>>
>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
>>>
>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
>>>>
>>>>
>>>>
>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>
>>>>> Hi Chuck,
>>>>>
>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
>>>>
>>>> Possibly the macro that defines the maximum size of the reply
>>>> is incorrect.
>>>>
>>>
>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
>>> that it probably breaks whatever else it modified.
>>
>> It modifies READ, READDIR, and READLINK. Are those broken?
>
> I don't know how to test READLINK.. but I think READ/READDIR work OK
> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> testing doesn't happen frequently).

I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
tests in my automated unit tests.


>>> The question is: can't we just revert it??
>>
>> Why not "root cause" it first?
>
> I'm trying :-/ I was just fishing to see how important the change was.

Try reverting just this hunk:

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d0fa18d..6d9d5e2 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
#define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
#define encode_getacl_maxsz (encode_getattr_maxsz)
#define decode_getacl_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 1)
+ nfs4_fattr_bitmap_maxsz + 1 + 1)
#define encode_setacl_maxsz (op_encode_hdr_maxsz + \
encode_stateid_maxsz + 3)
#define decode_setacl_maxsz (decode_setattr_maxsz)
#define encode_fs_locations_maxsz \
(encode_getattr_maxsz)
#define decode_fs_locations_maxsz \
- (0)
+ (1)
#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))


>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>>>>>>
>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
>>>>>> is required to pad that payload so its length on the wire is a
>>>>>> multiple of 4 bytes. The constants that define the maximum size of
>>>>>> each NFS result do not appear to account for this extra word.
>>>>>>
>>>>>> In each case where the data payload is to be received into pages:
>>>>>>
>>>>>> - 1 word is added to the size of the receive buffer allocated by
>>>>>> call_allocate
>>>>>>
>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
>>>>>> extra buffer space falls into the rcv_buf's tail iovec
>>>>>>
>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
>>>>>> is thus removed from the tail
>>>>>>
>>>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>>>> ---
>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
>>>>>> net/sunrpc/clnt.c | 6 +++++-
>>>>>> net/sunrpc/xdr.c | 2 ++
>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
>>>>>>
>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
>>>>>> index 1dcd0fe..a7ed29d 100644
>>>>>> --- a/fs/nfs/nfs2xdr.c
>>>>>> +++ b/fs/nfs/nfs2xdr.c
>>>>>> @@ -56,11 +56,11 @@
>>>>>>
>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
>>>>>> -#define NFS_readlinkres_sz (2)
>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
>>>>>> +#define NFS_readlinkres_sz (2+1)
>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
>>>>>> #define NFS_stat_sz (1)
>>>>>> -#define NFS_readdirres_sz (1)
>>>>>> +#define NFS_readdirres_sz (1+1)
>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
>>>>>>
>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
>>>>>> index a54dcf4..110358f 100644
>>>>>> --- a/fs/nfs/nfs3xdr.c
>>>>>> +++ b/fs/nfs/nfs3xdr.c
>>>>>> @@ -69,13 +69,13 @@
>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
>>>>>> @@ -85,7 +85,7 @@
>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>>>>>>
>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>>> result->op_status = status;
>>>>>> if (status != NFS3_OK)
>>>>>> goto out_status;
>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>> error = decode_read3resok(xdr, result);
>>>>>> out:
>>>>>> return error;
>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>>> index d0fa18d..6d9d5e2 100644
>>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>> nfs4_fattr_bitmap_maxsz)
>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
>>>>>> encode_stateid_maxsz + 3)
>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
>>>>>> 2 + encode_verifier_maxsz + 5 + \
>>>>>> nfs4_label_maxsz)
>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
>>>>>> - decode_verifier_maxsz)
>>>>>> + decode_verifier_maxsz + 1)
>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
>>>>>> encode_stateid_maxsz + 4)
>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>>>>>> encode_stateid_maxsz + 3)
>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>>>>>> #define encode_fs_locations_maxsz \
>>>>>> (encode_getattr_maxsz)
>>>>>> #define decode_fs_locations_maxsz \
>>>>>> - (0)
>>>>>> + (1)
>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>>>>>
>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>> 1 /* opaque devaddr4 length */ + \
>>>>>> /* devaddr4 payload is read into page */ \
>>>>>> 1 /* notification bitmap length */ + \
>>>>>> - 1 /* notification bitmap, word 0 */)
>>>>>> + 1 /* notification bitmap, word 0 */ + \
>>>>>> + 1 /* possible XDR padding */)
>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
>>>>>> encode_stateid_maxsz)
>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
>>>>>> decode_stateid_maxsz + \
>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
>>>>>> 2 /* offset */ + \
>>>>>> 2 /* length */ + \
>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>> index f780605..4ea38b0 100644
>>>>>> --- a/net/sunrpc/clnt.c
>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
>>>>>> unsigned int base, unsigned int len,
>>>>>> unsigned int hdrsize)
>>>>>> {
>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
>>>>>> + /* Subtract one to force an extra word of buffer space for the
>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
>>>>>> + */
>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
>>>>>> +
>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
>>>>>> trace_rpc_reply_pages(req);
>>>>>> }
>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>>>>>> index 7cca515..aa8177d 100644
>>>>>> --- a/net/sunrpc/xdr.c
>>>>>> +++ b/net/sunrpc/xdr.c
>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>>>>>>
>>>>>> tail->iov_base = buf + offset;
>>>>>> tail->iov_len = buflen - offset;
>>>>>> + if ((xdr->page_len & 3) == 0)
>>>>>> + tail->iov_len -= sizeof(__be32);
>>>>>>
>>>>>> xdr->buflen += len;
>>>>>> }
>>>>>>
>>>>
>>>> --
>>>> Chuck Lever
>>
>> --
>> Chuck Lever

--
Chuck Lever




2019-04-08 14:36:49

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
>
>
>
> > On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
> >
> > On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
> >>
> >>
> >>
> >>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>
> >>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> >>>>
> >>>>
> >>>>
> >>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>>>
> >>>>> Hi Chuck,
> >>>>>
> >>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> >>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> >>>>
> >>>> Possibly the macro that defines the maximum size of the reply
> >>>> is incorrect.
> >>>>
> >>>
> >>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> >>> that it probably breaks whatever else it modified.
> >>
> >> It modifies READ, READDIR, and READLINK. Are those broken?
> >
> > I don't know how to test READLINK.. but I think READ/READDIR work OK
> > otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> > testing doesn't happen frequently).
>
> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
> tests in my automated unit tests.
>
>
> >>> The question is: can't we just revert it??
> >>
> >> Why not "root cause" it first?
> >
> > I'm trying :-/ I was just fishing to see how important the change was.
>
> Try reverting just this hunk:

That doesn't help. It seems to be this piece that's causing issues
hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1

With this there is an extra byte (in front) in the buffer when (ACL)
operation is decoded.

>
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index d0fa18d..6d9d5e2 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> #define encode_getacl_maxsz (encode_getattr_maxsz)
> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> - nfs4_fattr_bitmap_maxsz + 1)
> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> encode_stateid_maxsz + 3)
> #define decode_setacl_maxsz (decode_setattr_maxsz)
> #define encode_fs_locations_maxsz \
> (encode_getattr_maxsz)
> #define decode_fs_locations_maxsz \
> - (0)
> + (1)
> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>
>
> >>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> >>>>>>
> >>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
> >>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> >>>>>> is required to pad that payload so its length on the wire is a
> >>>>>> multiple of 4 bytes. The constants that define the maximum size of
> >>>>>> each NFS result do not appear to account for this extra word.
> >>>>>>
> >>>>>> In each case where the data payload is to be received into pages:
> >>>>>>
> >>>>>> - 1 word is added to the size of the receive buffer allocated by
> >>>>>> call_allocate
> >>>>>>
> >>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> >>>>>> extra buffer space falls into the rcv_buf's tail iovec
> >>>>>>
> >>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> >>>>>> is thus removed from the tail
> >>>>>>
> >>>>>> Signed-off-by: Chuck Lever <[email protected]>
> >>>>>> ---
> >>>>>> fs/nfs/nfs2xdr.c | 6 +++---
> >>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> >>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> >>>>>> net/sunrpc/clnt.c | 6 +++++-
> >>>>>> net/sunrpc/xdr.c | 2 ++
> >>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> >>>>>>
> >>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> >>>>>> index 1dcd0fe..a7ed29d 100644
> >>>>>> --- a/fs/nfs/nfs2xdr.c
> >>>>>> +++ b/fs/nfs/nfs2xdr.c
> >>>>>> @@ -56,11 +56,11 @@
> >>>>>>
> >>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> >>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> >>>>>> -#define NFS_readlinkres_sz (2)
> >>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> >>>>>> +#define NFS_readlinkres_sz (2+1)
> >>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> >>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> >>>>>> #define NFS_stat_sz (1)
> >>>>>> -#define NFS_readdirres_sz (1)
> >>>>>> +#define NFS_readdirres_sz (1+1)
> >>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> >>>>>>
> >>>>>> static int nfs_stat_to_errno(enum nfs_stat);
> >>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> >>>>>> index a54dcf4..110358f 100644
> >>>>>> --- a/fs/nfs/nfs3xdr.c
> >>>>>> +++ b/fs/nfs/nfs3xdr.c
> >>>>>> @@ -69,13 +69,13 @@
> >>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> >>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> >>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> >>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> >>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> >>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> >>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> >>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> >>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> >>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> >>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> >>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> >>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> >>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> >>>>>> @@ -85,7 +85,7 @@
> >>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> >>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> >>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> >>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> >>>>>>
> >>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
> >>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> >>>>>> result->op_status = status;
> >>>>>> if (status != NFS3_OK)
> >>>>>> goto out_status;
> >>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>>>> error = decode_read3resok(xdr, result);
> >>>>>> out:
> >>>>>> return error;
> >>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >>>>>> index d0fa18d..6d9d5e2 100644
> >>>>>> --- a/fs/nfs/nfs4xdr.c
> >>>>>> +++ b/fs/nfs/nfs4xdr.c
> >>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>> nfs4_fattr_bitmap_maxsz)
> >>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> >>>>>> encode_stateid_maxsz + 3)
> >>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> >>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> >>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> >>>>>> 2 + encode_verifier_maxsz + 5 + \
> >>>>>> nfs4_label_maxsz)
> >>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> >>>>>> - decode_verifier_maxsz)
> >>>>>> + decode_verifier_maxsz + 1)
> >>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> >>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> >>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> >>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> >>>>>> encode_stateid_maxsz + 4)
> >>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> >>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> >>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> >>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> >>>>>> - nfs4_fattr_bitmap_maxsz + 1)
> >>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> >>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> >>>>>> encode_stateid_maxsz + 3)
> >>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> >>>>>> #define encode_fs_locations_maxsz \
> >>>>>> (encode_getattr_maxsz)
> >>>>>> #define decode_fs_locations_maxsz \
> >>>>>> - (0)
> >>>>>> + (1)
> >>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> >>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> >>>>>>
> >>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>> 1 /* opaque devaddr4 length */ + \
> >>>>>> /* devaddr4 payload is read into page */ \
> >>>>>> 1 /* notification bitmap length */ + \
> >>>>>> - 1 /* notification bitmap, word 0 */)
> >>>>>> + 1 /* notification bitmap, word 0 */ + \
> >>>>>> + 1 /* possible XDR padding */)
> >>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> >>>>>> encode_stateid_maxsz)
> >>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> >>>>>> decode_stateid_maxsz + \
> >>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> >>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> >>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> >>>>>> 2 /* offset */ + \
> >>>>>> 2 /* length */ + \
> >>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> >>>>>> index f780605..4ea38b0 100644
> >>>>>> --- a/net/sunrpc/clnt.c
> >>>>>> +++ b/net/sunrpc/clnt.c
> >>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> >>>>>> unsigned int base, unsigned int len,
> >>>>>> unsigned int hdrsize)
> >>>>>> {
> >>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> >>>>>> + /* Subtract one to force an extra word of buffer space for the
> >>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> >>>>>> + */
> >>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> >>>>>> +
> >>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> >>>>>> trace_rpc_reply_pages(req);
> >>>>>> }
> >>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> >>>>>> index 7cca515..aa8177d 100644
> >>>>>> --- a/net/sunrpc/xdr.c
> >>>>>> +++ b/net/sunrpc/xdr.c
> >>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> >>>>>>
> >>>>>> tail->iov_base = buf + offset;
> >>>>>> tail->iov_len = buflen - offset;
> >>>>>> + if ((xdr->page_len & 3) == 0)
> >>>>>> + tail->iov_len -= sizeof(__be32);
> >>>>>>
> >>>>>> xdr->buflen += len;
> >>>>>> }
> >>>>>>
> >>>>
> >>>> --
> >>>> Chuck Lever
> >>
> >> --
> >> Chuck Lever
>
> --
> Chuck Lever
>
>
>

2019-04-08 14:43:19

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages



> On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
>
> On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
>>
>>
>>
>>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
>>>
>>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
>>>>
>>>>
>>>>
>>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>
>>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>>>
>>>>>>> Hi Chuck,
>>>>>>>
>>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
>>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
>>>>>>
>>>>>> Possibly the macro that defines the maximum size of the reply
>>>>>> is incorrect.
>>>>>>
>>>>>
>>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
>>>>> that it probably breaks whatever else it modified.
>>>>
>>>> It modifies READ, READDIR, and READLINK. Are those broken?
>>>
>>> I don't know how to test READLINK.. but I think READ/READDIR work OK
>>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
>>> testing doesn't happen frequently).
>>
>> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
>> tests in my automated unit tests.
>>
>>
>>>>> The question is: can't we just revert it??
>>>>
>>>> Why not "root cause" it first?
>>>
>>> I'm trying :-/ I was just fishing to see how important the change was.
>>
>> Try reverting just this hunk:
>
> That doesn't help. It seems to be this piece that's causing issues
> hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
>
> With this there is an extra byte (in front) in the buffer when (ACL)
> operation is decoded.

How do you know there isn't a latent bug in the getfacl decoder?

How are you reproducing this issue? I can try it here later today.


>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>> index d0fa18d..6d9d5e2 100644
>> --- a/fs/nfs/nfs4xdr.c
>> +++ b/fs/nfs/nfs4xdr.c
>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>> - nfs4_fattr_bitmap_maxsz + 1)
>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>> encode_stateid_maxsz + 3)
>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>> #define encode_fs_locations_maxsz \
>> (encode_getattr_maxsz)
>> #define decode_fs_locations_maxsz \
>> - (0)
>> + (1)
>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>
>>
>>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>>>>>>>>
>>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
>>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
>>>>>>>> is required to pad that payload so its length on the wire is a
>>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
>>>>>>>> each NFS result do not appear to account for this extra word.
>>>>>>>>
>>>>>>>> In each case where the data payload is to be received into pages:
>>>>>>>>
>>>>>>>> - 1 word is added to the size of the receive buffer allocated by
>>>>>>>> call_allocate
>>>>>>>>
>>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
>>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
>>>>>>>>
>>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
>>>>>>>> is thus removed from the tail
>>>>>>>>
>>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>>>>>> ---
>>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
>>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
>>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
>>>>>>>> net/sunrpc/clnt.c | 6 +++++-
>>>>>>>> net/sunrpc/xdr.c | 2 ++
>>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
>>>>>>>>
>>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
>>>>>>>> index 1dcd0fe..a7ed29d 100644
>>>>>>>> --- a/fs/nfs/nfs2xdr.c
>>>>>>>> +++ b/fs/nfs/nfs2xdr.c
>>>>>>>> @@ -56,11 +56,11 @@
>>>>>>>>
>>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
>>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
>>>>>>>> -#define NFS_readlinkres_sz (2)
>>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
>>>>>>>> +#define NFS_readlinkres_sz (2+1)
>>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
>>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
>>>>>>>> #define NFS_stat_sz (1)
>>>>>>>> -#define NFS_readdirres_sz (1)
>>>>>>>> +#define NFS_readdirres_sz (1+1)
>>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
>>>>>>>>
>>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
>>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
>>>>>>>> index a54dcf4..110358f 100644
>>>>>>>> --- a/fs/nfs/nfs3xdr.c
>>>>>>>> +++ b/fs/nfs/nfs3xdr.c
>>>>>>>> @@ -69,13 +69,13 @@
>>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
>>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
>>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
>>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
>>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
>>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
>>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
>>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
>>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
>>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
>>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
>>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
>>>>>>>> @@ -85,7 +85,7 @@
>>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
>>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
>>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
>>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>>>>>>>>
>>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
>>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>>>>> result->op_status = status;
>>>>>>>> if (status != NFS3_OK)
>>>>>>>> goto out_status;
>>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>>>> error = decode_read3resok(xdr, result);
>>>>>>>> out:
>>>>>>>> return error;
>>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>>>>> index d0fa18d..6d9d5e2 100644
>>>>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>> nfs4_fattr_bitmap_maxsz)
>>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
>>>>>>>> encode_stateid_maxsz + 3)
>>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
>>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
>>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
>>>>>>>> 2 + encode_verifier_maxsz + 5 + \
>>>>>>>> nfs4_label_maxsz)
>>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
>>>>>>>> - decode_verifier_maxsz)
>>>>>>>> + decode_verifier_maxsz + 1)
>>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
>>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
>>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
>>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
>>>>>>>> encode_stateid_maxsz + 4)
>>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
>>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
>>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>>>>>>>> encode_stateid_maxsz + 3)
>>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>>>>>>>> #define encode_fs_locations_maxsz \
>>>>>>>> (encode_getattr_maxsz)
>>>>>>>> #define decode_fs_locations_maxsz \
>>>>>>>> - (0)
>>>>>>>> + (1)
>>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>>>>>>>
>>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>> 1 /* opaque devaddr4 length */ + \
>>>>>>>> /* devaddr4 payload is read into page */ \
>>>>>>>> 1 /* notification bitmap length */ + \
>>>>>>>> - 1 /* notification bitmap, word 0 */)
>>>>>>>> + 1 /* notification bitmap, word 0 */ + \
>>>>>>>> + 1 /* possible XDR padding */)
>>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
>>>>>>>> encode_stateid_maxsz)
>>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
>>>>>>>> decode_stateid_maxsz + \
>>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
>>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
>>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
>>>>>>>> 2 /* offset */ + \
>>>>>>>> 2 /* length */ + \
>>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>>>> index f780605..4ea38b0 100644
>>>>>>>> --- a/net/sunrpc/clnt.c
>>>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
>>>>>>>> unsigned int base, unsigned int len,
>>>>>>>> unsigned int hdrsize)
>>>>>>>> {
>>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
>>>>>>>> + /* Subtract one to force an extra word of buffer space for the
>>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
>>>>>>>> + */
>>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
>>>>>>>> +
>>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
>>>>>>>> trace_rpc_reply_pages(req);
>>>>>>>> }
>>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>>>>>>>> index 7cca515..aa8177d 100644
>>>>>>>> --- a/net/sunrpc/xdr.c
>>>>>>>> +++ b/net/sunrpc/xdr.c
>>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>>>>>>>>
>>>>>>>> tail->iov_base = buf + offset;
>>>>>>>> tail->iov_len = buflen - offset;
>>>>>>>> + if ((xdr->page_len & 3) == 0)
>>>>>>>> + tail->iov_len -= sizeof(__be32);
>>>>>>>>
>>>>>>>> xdr->buflen += len;
>>>>>>>> }
>>>>>>>>
>>>>>>
>>>>>> --
>>>>>> Chuck Lever
>>>>
>>>> --
>>>> Chuck Lever
>>
>> --
>> Chuck Lever

--
Chuck Lever




2019-04-08 15:22:07

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Mon, Apr 8, 2019 at 10:43 AM Chuck Lever <[email protected]> wrote:
>
>
>
> > On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
> >
> > On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
> >>
> >>
> >>
> >>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>
> >>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
> >>>>
> >>>>
> >>>>
> >>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>>>
> >>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> >>>>>>
> >>>>>>
> >>>>>>
> >>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> >>>>>>>
> >>>>>>> Hi Chuck,
> >>>>>>>
> >>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> >>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> >>>>>>
> >>>>>> Possibly the macro that defines the maximum size of the reply
> >>>>>> is incorrect.
> >>>>>>
> >>>>>
> >>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> >>>>> that it probably breaks whatever else it modified.
> >>>>
> >>>> It modifies READ, READDIR, and READLINK. Are those broken?
> >>>
> >>> I don't know how to test READLINK.. but I think READ/READDIR work OK
> >>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> >>> testing doesn't happen frequently).
> >>
> >> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
> >> tests in my automated unit tests.
> >>
> >>
> >>>>> The question is: can't we just revert it??
> >>>>
> >>>> Why not "root cause" it first?
> >>>
> >>> I'm trying :-/ I was just fishing to see how important the change was.
> >>
> >> Try reverting just this hunk:
> >
> > That doesn't help. It seems to be this piece that's causing issues
> > hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
> >
> > With this there is an extra byte (in front) in the buffer when (ACL)
> > operation is decoded.
>
> How do you know there isn't a latent bug in the getfacl decoder?

I don't. All I know is that it passed tests before and now it doesn't.

> How are you reproducing this issue? I can try it here later today.

The issue was found running xfstest nfs/001. However, you don't need
that: (1) mount (2) nfs4_getfacl <file>

To understand a patch, does it fix a problem with READLINK or is the
an optimization?

>
>
> >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >> index d0fa18d..6d9d5e2 100644
> >> --- a/fs/nfs/nfs4xdr.c
> >> +++ b/fs/nfs/nfs4xdr.c
> >> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> >> #define encode_getacl_maxsz (encode_getattr_maxsz)
> >> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> >> - nfs4_fattr_bitmap_maxsz + 1)
> >> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> >> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> >> encode_stateid_maxsz + 3)
> >> #define decode_setacl_maxsz (decode_setattr_maxsz)
> >> #define encode_fs_locations_maxsz \
> >> (encode_getattr_maxsz)
> >> #define decode_fs_locations_maxsz \
> >> - (0)
> >> + (1)
> >> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> >> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> >>
> >>
> >>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> >>>>>>>>
> >>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
> >>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> >>>>>>>> is required to pad that payload so its length on the wire is a
> >>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
> >>>>>>>> each NFS result do not appear to account for this extra word.
> >>>>>>>>
> >>>>>>>> In each case where the data payload is to be received into pages:
> >>>>>>>>
> >>>>>>>> - 1 word is added to the size of the receive buffer allocated by
> >>>>>>>> call_allocate
> >>>>>>>>
> >>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> >>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
> >>>>>>>>
> >>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> >>>>>>>> is thus removed from the tail
> >>>>>>>>
> >>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
> >>>>>>>> ---
> >>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
> >>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> >>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> >>>>>>>> net/sunrpc/clnt.c | 6 +++++-
> >>>>>>>> net/sunrpc/xdr.c | 2 ++
> >>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> >>>>>>>>
> >>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> >>>>>>>> index 1dcd0fe..a7ed29d 100644
> >>>>>>>> --- a/fs/nfs/nfs2xdr.c
> >>>>>>>> +++ b/fs/nfs/nfs2xdr.c
> >>>>>>>> @@ -56,11 +56,11 @@
> >>>>>>>>
> >>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> >>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> >>>>>>>> -#define NFS_readlinkres_sz (2)
> >>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> >>>>>>>> +#define NFS_readlinkres_sz (2+1)
> >>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> >>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> >>>>>>>> #define NFS_stat_sz (1)
> >>>>>>>> -#define NFS_readdirres_sz (1)
> >>>>>>>> +#define NFS_readdirres_sz (1+1)
> >>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> >>>>>>>>
> >>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
> >>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> >>>>>>>> index a54dcf4..110358f 100644
> >>>>>>>> --- a/fs/nfs/nfs3xdr.c
> >>>>>>>> +++ b/fs/nfs/nfs3xdr.c
> >>>>>>>> @@ -69,13 +69,13 @@
> >>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> >>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> >>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> >>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> >>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> >>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> >>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> >>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> >>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> >>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> >>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> >>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> >>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> >>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> >>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> >>>>>>>> @@ -85,7 +85,7 @@
> >>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> >>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> >>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> >>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> >>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> >>>>>>>>
> >>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
> >>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> >>>>>>>> result->op_status = status;
> >>>>>>>> if (status != NFS3_OK)
> >>>>>>>> goto out_status;
> >>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> >>>>>>>> error = decode_read3resok(xdr, result);
> >>>>>>>> out:
> >>>>>>>> return error;
> >>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> >>>>>>>> index d0fa18d..6d9d5e2 100644
> >>>>>>>> --- a/fs/nfs/nfs4xdr.c
> >>>>>>>> +++ b/fs/nfs/nfs4xdr.c
> >>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>>>> nfs4_fattr_bitmap_maxsz)
> >>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> >>>>>>>> encode_stateid_maxsz + 3)
> >>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> >>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> >>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> >>>>>>>> 2 + encode_verifier_maxsz + 5 + \
> >>>>>>>> nfs4_label_maxsz)
> >>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> >>>>>>>> - decode_verifier_maxsz)
> >>>>>>>> + decode_verifier_maxsz + 1)
> >>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> >>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> >>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> >>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> >>>>>>>> encode_stateid_maxsz + 4)
> >>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> >>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> >>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> >>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> >>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
> >>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> >>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> >>>>>>>> encode_stateid_maxsz + 3)
> >>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> >>>>>>>> #define encode_fs_locations_maxsz \
> >>>>>>>> (encode_getattr_maxsz)
> >>>>>>>> #define decode_fs_locations_maxsz \
> >>>>>>>> - (0)
> >>>>>>>> + (1)
> >>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> >>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> >>>>>>>>
> >>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> >>>>>>>> 1 /* opaque devaddr4 length */ + \
> >>>>>>>> /* devaddr4 payload is read into page */ \
> >>>>>>>> 1 /* notification bitmap length */ + \
> >>>>>>>> - 1 /* notification bitmap, word 0 */)
> >>>>>>>> + 1 /* notification bitmap, word 0 */ + \
> >>>>>>>> + 1 /* possible XDR padding */)
> >>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> >>>>>>>> encode_stateid_maxsz)
> >>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> >>>>>>>> decode_stateid_maxsz + \
> >>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> >>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> >>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> >>>>>>>> 2 /* offset */ + \
> >>>>>>>> 2 /* length */ + \
> >>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> >>>>>>>> index f780605..4ea38b0 100644
> >>>>>>>> --- a/net/sunrpc/clnt.c
> >>>>>>>> +++ b/net/sunrpc/clnt.c
> >>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> >>>>>>>> unsigned int base, unsigned int len,
> >>>>>>>> unsigned int hdrsize)
> >>>>>>>> {
> >>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> >>>>>>>> + /* Subtract one to force an extra word of buffer space for the
> >>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> >>>>>>>> + */
> >>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> >>>>>>>> +
> >>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> >>>>>>>> trace_rpc_reply_pages(req);
> >>>>>>>> }
> >>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> >>>>>>>> index 7cca515..aa8177d 100644
> >>>>>>>> --- a/net/sunrpc/xdr.c
> >>>>>>>> +++ b/net/sunrpc/xdr.c
> >>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> >>>>>>>>
> >>>>>>>> tail->iov_base = buf + offset;
> >>>>>>>> tail->iov_len = buflen - offset;
> >>>>>>>> + if ((xdr->page_len & 3) == 0)
> >>>>>>>> + tail->iov_len -= sizeof(__be32);
> >>>>>>>>
> >>>>>>>> xdr->buflen += len;
> >>>>>>>> }
> >>>>>>>>
> >>>>>>
> >>>>>> --
> >>>>>> Chuck Lever
> >>>>
> >>>> --
> >>>> Chuck Lever
> >>
> >> --
> >> Chuck Lever
>
> --
> Chuck Lever
>
>
>

2019-04-08 15:26:43

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Mon, Apr 8, 2019 at 11:21 AM Olga Kornievskaia <[email protected]> wrote:
>
> On Mon, Apr 8, 2019 at 10:43 AM Chuck Lever <[email protected]> wrote:
> >
> >
> >
> > > On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
> > >
> > > On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
> > >>
> > >>
> > >>
> > >>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
> > >>>
> > >>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
> > >>>>
> > >>>>
> > >>>>
> > >>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> > >>>>>
> > >>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> > >>>>>>
> > >>>>>>
> > >>>>>>
> > >>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> > >>>>>>>
> > >>>>>>> Hi Chuck,
> > >>>>>>>
> > >>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> > >>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> > >>>>>>
> > >>>>>> Possibly the macro that defines the maximum size of the reply
> > >>>>>> is incorrect.
> > >>>>>>
> > >>>>>
> > >>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> > >>>>> that it probably breaks whatever else it modified.
> > >>>>
> > >>>> It modifies READ, READDIR, and READLINK. Are those broken?
> > >>>
> > >>> I don't know how to test READLINK.. but I think READ/READDIR work OK
> > >>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> > >>> testing doesn't happen frequently).
> > >>
> > >> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
> > >> tests in my automated unit tests.
> > >>
> > >>
> > >>>>> The question is: can't we just revert it??
> > >>>>
> > >>>> Why not "root cause" it first?
> > >>>
> > >>> I'm trying :-/ I was just fishing to see how important the change was.
> > >>
> > >> Try reverting just this hunk:
> > >
> > > That doesn't help. It seems to be this piece that's causing issues
> > > hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
> > >
> > > With this there is an extra byte (in front) in the buffer when (ACL)
> > > operation is decoded.
> >
> > How do you know there isn't a latent bug in the getfacl decoder?
>
> I don't. All I know is that it passed tests before and now it doesn't.

Also this bug will have to be in both getfacl and fs_location code.
What they both share is xd_enter_page() code that now with new
semantics makes the buffer point to the wrong place.

> > How are you reproducing this issue? I can try it here later today.
>
> The issue was found running xfstest nfs/001. However, you don't need
> that: (1) mount (2) nfs4_getfacl <file>
>
> To understand a patch, does it fix a problem with READLINK or is the
> an optimization?
>
> >
> >
> > >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > >> index d0fa18d..6d9d5e2 100644
> > >> --- a/fs/nfs/nfs4xdr.c
> > >> +++ b/fs/nfs/nfs4xdr.c
> > >> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > >> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > >> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > >> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > >> - nfs4_fattr_bitmap_maxsz + 1)
> > >> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > >> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > >> encode_stateid_maxsz + 3)
> > >> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > >> #define encode_fs_locations_maxsz \
> > >> (encode_getattr_maxsz)
> > >> #define decode_fs_locations_maxsz \
> > >> - (0)
> > >> + (1)
> > >> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > >> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > >>
> > >>
> > >>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> > >>>>>>>>
> > >>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
> > >>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> > >>>>>>>> is required to pad that payload so its length on the wire is a
> > >>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
> > >>>>>>>> each NFS result do not appear to account for this extra word.
> > >>>>>>>>
> > >>>>>>>> In each case where the data payload is to be received into pages:
> > >>>>>>>>
> > >>>>>>>> - 1 word is added to the size of the receive buffer allocated by
> > >>>>>>>> call_allocate
> > >>>>>>>>
> > >>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> > >>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
> > >>>>>>>>
> > >>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> > >>>>>>>> is thus removed from the tail
> > >>>>>>>>
> > >>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
> > >>>>>>>> ---
> > >>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
> > >>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> > >>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> > >>>>>>>> net/sunrpc/clnt.c | 6 +++++-
> > >>>>>>>> net/sunrpc/xdr.c | 2 ++
> > >>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> > >>>>>>>>
> > >>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> > >>>>>>>> index 1dcd0fe..a7ed29d 100644
> > >>>>>>>> --- a/fs/nfs/nfs2xdr.c
> > >>>>>>>> +++ b/fs/nfs/nfs2xdr.c
> > >>>>>>>> @@ -56,11 +56,11 @@
> > >>>>>>>>
> > >>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> > >>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> > >>>>>>>> -#define NFS_readlinkres_sz (2)
> > >>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> > >>>>>>>> +#define NFS_readlinkres_sz (2+1)
> > >>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> > >>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> > >>>>>>>> #define NFS_stat_sz (1)
> > >>>>>>>> -#define NFS_readdirres_sz (1)
> > >>>>>>>> +#define NFS_readdirres_sz (1+1)
> > >>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> > >>>>>>>>
> > >>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
> > >>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> > >>>>>>>> index a54dcf4..110358f 100644
> > >>>>>>>> --- a/fs/nfs/nfs3xdr.c
> > >>>>>>>> +++ b/fs/nfs/nfs3xdr.c
> > >>>>>>>> @@ -69,13 +69,13 @@
> > >>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> > >>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> > >>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> > >>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> > >>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> > >>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> > >>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> > >>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> > >>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > >>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> > >>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > >>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> > >>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> > >>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> > >>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> > >>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> > >>>>>>>> @@ -85,7 +85,7 @@
> > >>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> > >>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > >>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> > >>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > >>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> > >>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> > >>>>>>>>
> > >>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
> > >>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> > >>>>>>>> result->op_status = status;
> > >>>>>>>> if (status != NFS3_OK)
> > >>>>>>>> goto out_status;
> > >>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > >>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > >>>>>>>> error = decode_read3resok(xdr, result);
> > >>>>>>>> out:
> > >>>>>>>> return error;
> > >>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > >>>>>>>> index d0fa18d..6d9d5e2 100644
> > >>>>>>>> --- a/fs/nfs/nfs4xdr.c
> > >>>>>>>> +++ b/fs/nfs/nfs4xdr.c
> > >>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > >>>>>>>> nfs4_fattr_bitmap_maxsz)
> > >>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> > >>>>>>>> encode_stateid_maxsz + 3)
> > >>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> > >>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> > >>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> > >>>>>>>> 2 + encode_verifier_maxsz + 5 + \
> > >>>>>>>> nfs4_label_maxsz)
> > >>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> > >>>>>>>> - decode_verifier_maxsz)
> > >>>>>>>> + decode_verifier_maxsz + 1)
> > >>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> > >>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> > >>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> > >>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> > >>>>>>>> encode_stateid_maxsz + 4)
> > >>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> > >>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > >>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > >>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > >>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > >>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
> > >>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > >>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > >>>>>>>> encode_stateid_maxsz + 3)
> > >>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > >>>>>>>> #define encode_fs_locations_maxsz \
> > >>>>>>>> (encode_getattr_maxsz)
> > >>>>>>>> #define decode_fs_locations_maxsz \
> > >>>>>>>> - (0)
> > >>>>>>>> + (1)
> > >>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > >>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > >>>>>>>>
> > >>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > >>>>>>>> 1 /* opaque devaddr4 length */ + \
> > >>>>>>>> /* devaddr4 payload is read into page */ \
> > >>>>>>>> 1 /* notification bitmap length */ + \
> > >>>>>>>> - 1 /* notification bitmap, word 0 */)
> > >>>>>>>> + 1 /* notification bitmap, word 0 */ + \
> > >>>>>>>> + 1 /* possible XDR padding */)
> > >>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> > >>>>>>>> encode_stateid_maxsz)
> > >>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> > >>>>>>>> decode_stateid_maxsz + \
> > >>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> > >>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> > >>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> > >>>>>>>> 2 /* offset */ + \
> > >>>>>>>> 2 /* length */ + \
> > >>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > >>>>>>>> index f780605..4ea38b0 100644
> > >>>>>>>> --- a/net/sunrpc/clnt.c
> > >>>>>>>> +++ b/net/sunrpc/clnt.c
> > >>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> > >>>>>>>> unsigned int base, unsigned int len,
> > >>>>>>>> unsigned int hdrsize)
> > >>>>>>>> {
> > >>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> > >>>>>>>> + /* Subtract one to force an extra word of buffer space for the
> > >>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> > >>>>>>>> + */
> > >>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> > >>>>>>>> +
> > >>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> > >>>>>>>> trace_rpc_reply_pages(req);
> > >>>>>>>> }
> > >>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> > >>>>>>>> index 7cca515..aa8177d 100644
> > >>>>>>>> --- a/net/sunrpc/xdr.c
> > >>>>>>>> +++ b/net/sunrpc/xdr.c
> > >>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> > >>>>>>>>
> > >>>>>>>> tail->iov_base = buf + offset;
> > >>>>>>>> tail->iov_len = buflen - offset;
> > >>>>>>>> + if ((xdr->page_len & 3) == 0)
> > >>>>>>>> + tail->iov_len -= sizeof(__be32);
> > >>>>>>>>
> > >>>>>>>> xdr->buflen += len;
> > >>>>>>>> }
> > >>>>>>>>
> > >>>>>>
> > >>>>>> --
> > >>>>>> Chuck Lever
> > >>>>
> > >>>> --
> > >>>> Chuck Lever
> > >>
> > >> --
> > >> Chuck Lever
> >
> > --
> > Chuck Lever
> >
> >
> >

2019-04-08 15:51:06

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Mon, Apr 8, 2019 at 11:26 AM Olga Kornievskaia <[email protected]> wrote:
>
> On Mon, Apr 8, 2019 at 11:21 AM Olga Kornievskaia <[email protected]> wrote:
> >
> > On Mon, Apr 8, 2019 at 10:43 AM Chuck Lever <[email protected]> wrote:
> > >
> > >
> > >
> > > > On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
> > > >
> > > > On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
> > > >>
> > > >>
> > > >>
> > > >>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
> > > >>>
> > > >>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
> > > >>>>
> > > >>>>
> > > >>>>
> > > >>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> > > >>>>>
> > > >>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> > > >>>>>>
> > > >>>>>>
> > > >>>>>>
> > > >>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> > > >>>>>>>
> > > >>>>>>> Hi Chuck,
> > > >>>>>>>
> > > >>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> > > >>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> > > >>>>>>
> > > >>>>>> Possibly the macro that defines the maximum size of the reply
> > > >>>>>> is incorrect.
> > > >>>>>>
> > > >>>>>
> > > >>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> > > >>>>> that it probably breaks whatever else it modified.
> > > >>>>
> > > >>>> It modifies READ, READDIR, and READLINK. Are those broken?
> > > >>>
> > > >>> I don't know how to test READLINK.. but I think READ/READDIR work OK
> > > >>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> > > >>> testing doesn't happen frequently).
> > > >>
> > > >> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
> > > >> tests in my automated unit tests.
> > > >>
> > > >>
> > > >>>>> The question is: can't we just revert it??
> > > >>>>
> > > >>>> Why not "root cause" it first?
> > > >>>
> > > >>> I'm trying :-/ I was just fishing to see how important the change was.
> > > >>
> > > >> Try reverting just this hunk:
> > > >
> > > > That doesn't help. It seems to be this piece that's causing issues
> > > > hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
> > > >
> > > > With this there is an extra byte (in front) in the buffer when (ACL)
> > > > operation is decoded.
> > >
> > > How do you know there isn't a latent bug in the getfacl decoder?
> >
> > I don't. All I know is that it passed tests before and now it doesn't.
>
> Also this bug will have to be in both getfacl and fs_location code.
> What they both share is xd_enter_page() code that now with new
> semantics makes the buffer point to the wrong place.
>
> > > How are you reproducing this issue? I can try it here later today.
> >
> > The issue was found running xfstest nfs/001. However, you don't need
> > that: (1) mount (2) nfs4_getfacl <file>
> >
> > To understand a patch, does it fix a problem with READLINK or is the
> > an optimization?

So this "fixes" it but this don't look really good.

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index cfcabc3..f2a5553 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c

@@ -5280,6 +5280,7 @@ static int decode_getacl(struct xdr_stream *xdr,
struct rpc_rqst *req,
goto out;
xdr_enter_page(xdr, xdr->buf->page_len);
+ xdr->p++;

/* Calculate the offset of the page data */
pg_offset = xdr->buf->head[0].iov_len;
@@ -6949,6 +6950,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
goto out;
if (res->migration) {
xdr_enter_page(xdr, PAGE_SIZE);
+ xdr->p++;
status = decode_getfattr_generic(xdr,
&res->fs_locations->fattr,
NULL, res->fs_locations,
@@ -6962,6 +6964,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
if (status)
goto out;
xdr_enter_page(xdr, PAGE_SIZE);
+ xdr->p++;
status = decode_getfattr_generic(xdr,
&res->fs_locations->fattr,
NULL, res->fs_locations,


> >
> > >
> > >
> > > >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > >> index d0fa18d..6d9d5e2 100644
> > > >> --- a/fs/nfs/nfs4xdr.c
> > > >> +++ b/fs/nfs/nfs4xdr.c
> > > >> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > >> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > > >> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > > >> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > > >> - nfs4_fattr_bitmap_maxsz + 1)
> > > >> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > > >> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > > >> encode_stateid_maxsz + 3)
> > > >> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > > >> #define encode_fs_locations_maxsz \
> > > >> (encode_getattr_maxsz)
> > > >> #define decode_fs_locations_maxsz \
> > > >> - (0)
> > > >> + (1)
> > > >> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > > >> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > > >>
> > > >>
> > > >>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> > > >>>>>>>>
> > > >>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
> > > >>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> > > >>>>>>>> is required to pad that payload so its length on the wire is a
> > > >>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
> > > >>>>>>>> each NFS result do not appear to account for this extra word.
> > > >>>>>>>>
> > > >>>>>>>> In each case where the data payload is to be received into pages:
> > > >>>>>>>>
> > > >>>>>>>> - 1 word is added to the size of the receive buffer allocated by
> > > >>>>>>>> call_allocate
> > > >>>>>>>>
> > > >>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> > > >>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
> > > >>>>>>>>
> > > >>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> > > >>>>>>>> is thus removed from the tail
> > > >>>>>>>>
> > > >>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
> > > >>>>>>>> ---
> > > >>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
> > > >>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> > > >>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> > > >>>>>>>> net/sunrpc/clnt.c | 6 +++++-
> > > >>>>>>>> net/sunrpc/xdr.c | 2 ++
> > > >>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> > > >>>>>>>>
> > > >>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> > > >>>>>>>> index 1dcd0fe..a7ed29d 100644
> > > >>>>>>>> --- a/fs/nfs/nfs2xdr.c
> > > >>>>>>>> +++ b/fs/nfs/nfs2xdr.c
> > > >>>>>>>> @@ -56,11 +56,11 @@
> > > >>>>>>>>
> > > >>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> > > >>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> > > >>>>>>>> -#define NFS_readlinkres_sz (2)
> > > >>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> > > >>>>>>>> +#define NFS_readlinkres_sz (2+1)
> > > >>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> > > >>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> > > >>>>>>>> #define NFS_stat_sz (1)
> > > >>>>>>>> -#define NFS_readdirres_sz (1)
> > > >>>>>>>> +#define NFS_readdirres_sz (1+1)
> > > >>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> > > >>>>>>>>
> > > >>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
> > > >>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> > > >>>>>>>> index a54dcf4..110358f 100644
> > > >>>>>>>> --- a/fs/nfs/nfs3xdr.c
> > > >>>>>>>> +++ b/fs/nfs/nfs3xdr.c
> > > >>>>>>>> @@ -69,13 +69,13 @@
> > > >>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> > > >>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> > > >>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> > > >>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> > > >>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> > > >>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> > > >>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> > > >>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> > > >>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > > >>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> > > >>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > > >>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> > > >>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> > > >>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> > > >>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> > > >>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> > > >>>>>>>> @@ -85,7 +85,7 @@
> > > >>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> > > >>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > > >>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> > > >>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > > >>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> > > >>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> > > >>>>>>>>
> > > >>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
> > > >>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> > > >>>>>>>> result->op_status = status;
> > > >>>>>>>> if (status != NFS3_OK)
> > > >>>>>>>> goto out_status;
> > > >>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > > >>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > > >>>>>>>> error = decode_read3resok(xdr, result);
> > > >>>>>>>> out:
> > > >>>>>>>> return error;
> > > >>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > >>>>>>>> index d0fa18d..6d9d5e2 100644
> > > >>>>>>>> --- a/fs/nfs/nfs4xdr.c
> > > >>>>>>>> +++ b/fs/nfs/nfs4xdr.c
> > > >>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > >>>>>>>> nfs4_fattr_bitmap_maxsz)
> > > >>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> > > >>>>>>>> encode_stateid_maxsz + 3)
> > > >>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> > > >>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> > > >>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> > > >>>>>>>> 2 + encode_verifier_maxsz + 5 + \
> > > >>>>>>>> nfs4_label_maxsz)
> > > >>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> > > >>>>>>>> - decode_verifier_maxsz)
> > > >>>>>>>> + decode_verifier_maxsz + 1)
> > > >>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> > > >>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> > > >>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> > > >>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> > > >>>>>>>> encode_stateid_maxsz + 4)
> > > >>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> > > >>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > >>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > > >>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > > >>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > > >>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
> > > >>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > > >>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > > >>>>>>>> encode_stateid_maxsz + 3)
> > > >>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > > >>>>>>>> #define encode_fs_locations_maxsz \
> > > >>>>>>>> (encode_getattr_maxsz)
> > > >>>>>>>> #define decode_fs_locations_maxsz \
> > > >>>>>>>> - (0)
> > > >>>>>>>> + (1)
> > > >>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > > >>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > > >>>>>>>>
> > > >>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > >>>>>>>> 1 /* opaque devaddr4 length */ + \
> > > >>>>>>>> /* devaddr4 payload is read into page */ \
> > > >>>>>>>> 1 /* notification bitmap length */ + \
> > > >>>>>>>> - 1 /* notification bitmap, word 0 */)
> > > >>>>>>>> + 1 /* notification bitmap, word 0 */ + \
> > > >>>>>>>> + 1 /* possible XDR padding */)
> > > >>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> > > >>>>>>>> encode_stateid_maxsz)
> > > >>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> > > >>>>>>>> decode_stateid_maxsz + \
> > > >>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> > > >>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> > > >>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> > > >>>>>>>> 2 /* offset */ + \
> > > >>>>>>>> 2 /* length */ + \
> > > >>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > >>>>>>>> index f780605..4ea38b0 100644
> > > >>>>>>>> --- a/net/sunrpc/clnt.c
> > > >>>>>>>> +++ b/net/sunrpc/clnt.c
> > > >>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> > > >>>>>>>> unsigned int base, unsigned int len,
> > > >>>>>>>> unsigned int hdrsize)
> > > >>>>>>>> {
> > > >>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> > > >>>>>>>> + /* Subtract one to force an extra word of buffer space for the
> > > >>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> > > >>>>>>>> + */
> > > >>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> > > >>>>>>>> +
> > > >>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> > > >>>>>>>> trace_rpc_reply_pages(req);
> > > >>>>>>>> }
> > > >>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> > > >>>>>>>> index 7cca515..aa8177d 100644
> > > >>>>>>>> --- a/net/sunrpc/xdr.c
> > > >>>>>>>> +++ b/net/sunrpc/xdr.c
> > > >>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> > > >>>>>>>>
> > > >>>>>>>> tail->iov_base = buf + offset;
> > > >>>>>>>> tail->iov_len = buflen - offset;
> > > >>>>>>>> + if ((xdr->page_len & 3) == 0)
> > > >>>>>>>> + tail->iov_len -= sizeof(__be32);
> > > >>>>>>>>
> > > >>>>>>>> xdr->buflen += len;
> > > >>>>>>>> }
> > > >>>>>>>>
> > > >>>>>>
> > > >>>>>> --
> > > >>>>>> Chuck Lever
> > > >>>>
> > > >>>> --
> > > >>>> Chuck Lever
> > > >>
> > > >> --
> > > >> Chuck Lever
> > >
> > > --
> > > Chuck Lever
> > >
> > >
> > >

2019-04-08 16:02:47

by Olga Kornievskaia

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages

On Mon, Apr 8, 2019 at 11:50 AM Olga Kornievskaia <[email protected]> wrote:
>
> On Mon, Apr 8, 2019 at 11:26 AM Olga Kornievskaia <[email protected]> wrote:
> >
> > On Mon, Apr 8, 2019 at 11:21 AM Olga Kornievskaia <[email protected]> wrote:
> > >
> > > On Mon, Apr 8, 2019 at 10:43 AM Chuck Lever <[email protected]> wrote:
> > > >
> > > >
> > > >
> > > > > On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
> > > > >
> > > > > On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
> > > > >>
> > > > >>
> > > > >>
> > > > >>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
> > > > >>>
> > > > >>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
> > > > >>>>
> > > > >>>>
> > > > >>>>
> > > > >>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
> > > > >>>>>
> > > > >>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
> > > > >>>>>>
> > > > >>>>>>
> > > > >>>>>>
> > > > >>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
> > > > >>>>>>>
> > > > >>>>>>> Hi Chuck,
> > > > >>>>>>>
> > > > >>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
> > > > >>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
> > > > >>>>>>
> > > > >>>>>> Possibly the macro that defines the maximum size of the reply
> > > > >>>>>> is incorrect.
> > > > >>>>>>
> > > > >>>>>
> > > > >>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
> > > > >>>>> that it probably breaks whatever else it modified.
> > > > >>>>
> > > > >>>> It modifies READ, READDIR, and READLINK. Are those broken?
> > > > >>>
> > > > >>> I don't know how to test READLINK.. but I think READ/READDIR work OK
> > > > >>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
> > > > >>> testing doesn't happen frequently).
> > > > >>
> > > > >> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
> > > > >> tests in my automated unit tests.
> > > > >>
> > > > >>
> > > > >>>>> The question is: can't we just revert it??
> > > > >>>>
> > > > >>>> Why not "root cause" it first?
> > > > >>>
> > > > >>> I'm trying :-/ I was just fishing to see how important the change was.
> > > > >>
> > > > >> Try reverting just this hunk:
> > > > >
> > > > > That doesn't help. It seems to be this piece that's causing issues
> > > > > hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
> > > > >
> > > > > With this there is an extra byte (in front) in the buffer when (ACL)
> > > > > operation is decoded.
> > > >
> > > > How do you know there isn't a latent bug in the getfacl decoder?
> > >
> > > I don't. All I know is that it passed tests before and now it doesn't.
> >
> > Also this bug will have to be in both getfacl and fs_location code.
> > What they both share is xd_enter_page() code that now with new
> > semantics makes the buffer point to the wrong place.

READLINK uses xdr_read_pages() function which sets the xdr->p and
accounts for padding which xdr_enter_page() does not.

> >
> > > > How are you reproducing this issue? I can try it here later today.
> > >
> > > The issue was found running xfstest nfs/001. However, you don't need
> > > that: (1) mount (2) nfs4_getfacl <file>
> > >
> > > To understand a patch, does it fix a problem with READLINK or is the
> > > an optimization?
>
> So this "fixes" it but this don't look really good.
>
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index cfcabc3..f2a5553 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
>
> @@ -5280,6 +5280,7 @@ static int decode_getacl(struct xdr_stream *xdr,
> struct rpc_rqst *req,
> goto out;
> xdr_enter_page(xdr, xdr->buf->page_len);
> + xdr->p++;
>
> /* Calculate the offset of the page data */
> pg_offset = xdr->buf->head[0].iov_len;
> @@ -6949,6 +6950,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
> goto out;
> if (res->migration) {
> xdr_enter_page(xdr, PAGE_SIZE);
> + xdr->p++;
> status = decode_getfattr_generic(xdr,
> &res->fs_locations->fattr,
> NULL, res->fs_locations,
> @@ -6962,6 +6964,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
> if (status)
> goto out;
> xdr_enter_page(xdr, PAGE_SIZE);
> + xdr->p++;
> status = decode_getfattr_generic(xdr,
> &res->fs_locations->fattr,
> NULL, res->fs_locations,
>
>
> > >
> > > >
> > > >
> > > > >> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > >> index d0fa18d..6d9d5e2 100644
> > > > >> --- a/fs/nfs/nfs4xdr.c
> > > > >> +++ b/fs/nfs/nfs4xdr.c
> > > > >> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > > >> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > > > >> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > > > >> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > > > >> - nfs4_fattr_bitmap_maxsz + 1)
> > > > >> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > > > >> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > > > >> encode_stateid_maxsz + 3)
> > > > >> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > > > >> #define encode_fs_locations_maxsz \
> > > > >> (encode_getattr_maxsz)
> > > > >> #define decode_fs_locations_maxsz \
> > > > >> - (0)
> > > > >> + (1)
> > > > >> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > > > >> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > > > >>
> > > > >>
> > > > >>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
> > > > >>>>>>>>
> > > > >>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
> > > > >>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
> > > > >>>>>>>> is required to pad that payload so its length on the wire is a
> > > > >>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
> > > > >>>>>>>> each NFS result do not appear to account for this extra word.
> > > > >>>>>>>>
> > > > >>>>>>>> In each case where the data payload is to be received into pages:
> > > > >>>>>>>>
> > > > >>>>>>>> - 1 word is added to the size of the receive buffer allocated by
> > > > >>>>>>>> call_allocate
> > > > >>>>>>>>
> > > > >>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
> > > > >>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
> > > > >>>>>>>>
> > > > >>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
> > > > >>>>>>>> is thus removed from the tail
> > > > >>>>>>>>
> > > > >>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
> > > > >>>>>>>> ---
> > > > >>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
> > > > >>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
> > > > >>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
> > > > >>>>>>>> net/sunrpc/clnt.c | 6 +++++-
> > > > >>>>>>>> net/sunrpc/xdr.c | 2 ++
> > > > >>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
> > > > >>>>>>>>
> > > > >>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
> > > > >>>>>>>> index 1dcd0fe..a7ed29d 100644
> > > > >>>>>>>> --- a/fs/nfs/nfs2xdr.c
> > > > >>>>>>>> +++ b/fs/nfs/nfs2xdr.c
> > > > >>>>>>>> @@ -56,11 +56,11 @@
> > > > >>>>>>>>
> > > > >>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
> > > > >>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
> > > > >>>>>>>> -#define NFS_readlinkres_sz (2)
> > > > >>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
> > > > >>>>>>>> +#define NFS_readlinkres_sz (2+1)
> > > > >>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
> > > > >>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
> > > > >>>>>>>> #define NFS_stat_sz (1)
> > > > >>>>>>>> -#define NFS_readdirres_sz (1)
> > > > >>>>>>>> +#define NFS_readdirres_sz (1+1)
> > > > >>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
> > > > >>>>>>>>
> > > > >>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
> > > > >>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
> > > > >>>>>>>> index a54dcf4..110358f 100644
> > > > >>>>>>>> --- a/fs/nfs/nfs3xdr.c
> > > > >>>>>>>> +++ b/fs/nfs/nfs3xdr.c
> > > > >>>>>>>> @@ -69,13 +69,13 @@
> > > > >>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
> > > > >>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
> > > > >>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
> > > > >>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
> > > > >>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
> > > > >>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
> > > > >>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
> > > > >>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
> > > > >>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > > > >>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
> > > > >>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
> > > > >>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
> > > > >>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
> > > > >>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
> > > > >>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
> > > > >>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
> > > > >>>>>>>> @@ -85,7 +85,7 @@
> > > > >>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
> > > > >>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > > > >>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
> > > > >>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
> > > > >>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
> > > > >>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
> > > > >>>>>>>>
> > > > >>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
> > > > >>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
> > > > >>>>>>>> result->op_status = status;
> > > > >>>>>>>> if (status != NFS3_OK)
> > > > >>>>>>>> goto out_status;
> > > > >>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > > > >>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
> > > > >>>>>>>> error = decode_read3resok(xdr, result);
> > > > >>>>>>>> out:
> > > > >>>>>>>> return error;
> > > > >>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> > > > >>>>>>>> index d0fa18d..6d9d5e2 100644
> > > > >>>>>>>> --- a/fs/nfs/nfs4xdr.c
> > > > >>>>>>>> +++ b/fs/nfs/nfs4xdr.c
> > > > >>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > > >>>>>>>> nfs4_fattr_bitmap_maxsz)
> > > > >>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
> > > > >>>>>>>> encode_stateid_maxsz + 3)
> > > > >>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
> > > > >>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
> > > > >>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
> > > > >>>>>>>> 2 + encode_verifier_maxsz + 5 + \
> > > > >>>>>>>> nfs4_label_maxsz)
> > > > >>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
> > > > >>>>>>>> - decode_verifier_maxsz)
> > > > >>>>>>>> + decode_verifier_maxsz + 1)
> > > > >>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
> > > > >>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
> > > > >>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
> > > > >>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
> > > > >>>>>>>> encode_stateid_maxsz + 4)
> > > > >>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
> > > > >>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > > >>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
> > > > >>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
> > > > >>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
> > > > >>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
> > > > >>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
> > > > >>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
> > > > >>>>>>>> encode_stateid_maxsz + 3)
> > > > >>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
> > > > >>>>>>>> #define encode_fs_locations_maxsz \
> > > > >>>>>>>> (encode_getattr_maxsz)
> > > > >>>>>>>> #define decode_fs_locations_maxsz \
> > > > >>>>>>>> - (0)
> > > > >>>>>>>> + (1)
> > > > >>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
> > > > >>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
> > > > >>>>>>>>
> > > > >>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
> > > > >>>>>>>> 1 /* opaque devaddr4 length */ + \
> > > > >>>>>>>> /* devaddr4 payload is read into page */ \
> > > > >>>>>>>> 1 /* notification bitmap length */ + \
> > > > >>>>>>>> - 1 /* notification bitmap, word 0 */)
> > > > >>>>>>>> + 1 /* notification bitmap, word 0 */ + \
> > > > >>>>>>>> + 1 /* possible XDR padding */)
> > > > >>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
> > > > >>>>>>>> encode_stateid_maxsz)
> > > > >>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
> > > > >>>>>>>> decode_stateid_maxsz + \
> > > > >>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
> > > > >>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
> > > > >>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
> > > > >>>>>>>> 2 /* offset */ + \
> > > > >>>>>>>> 2 /* length */ + \
> > > > >>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
> > > > >>>>>>>> index f780605..4ea38b0 100644
> > > > >>>>>>>> --- a/net/sunrpc/clnt.c
> > > > >>>>>>>> +++ b/net/sunrpc/clnt.c
> > > > >>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
> > > > >>>>>>>> unsigned int base, unsigned int len,
> > > > >>>>>>>> unsigned int hdrsize)
> > > > >>>>>>>> {
> > > > >>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
> > > > >>>>>>>> + /* Subtract one to force an extra word of buffer space for the
> > > > >>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
> > > > >>>>>>>> + */
> > > > >>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
> > > > >>>>>>>> +
> > > > >>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
> > > > >>>>>>>> trace_rpc_reply_pages(req);
> > > > >>>>>>>> }
> > > > >>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> > > > >>>>>>>> index 7cca515..aa8177d 100644
> > > > >>>>>>>> --- a/net/sunrpc/xdr.c
> > > > >>>>>>>> +++ b/net/sunrpc/xdr.c
> > > > >>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
> > > > >>>>>>>>
> > > > >>>>>>>> tail->iov_base = buf + offset;
> > > > >>>>>>>> tail->iov_len = buflen - offset;
> > > > >>>>>>>> + if ((xdr->page_len & 3) == 0)
> > > > >>>>>>>> + tail->iov_len -= sizeof(__be32);
> > > > >>>>>>>>
> > > > >>>>>>>> xdr->buflen += len;
> > > > >>>>>>>> }
> > > > >>>>>>>>
> > > > >>>>>>
> > > > >>>>>> --
> > > > >>>>>> Chuck Lever
> > > > >>>>
> > > > >>>> --
> > > > >>>> Chuck Lever
> > > > >>
> > > > >> --
> > > > >> Chuck Lever
> > > >
> > > > --
> > > > Chuck Lever
> > > >
> > > >
> > > >

2019-04-08 16:30:10

by Chuck Lever III

[permalink] [raw]
Subject: Re: [PATCH v1 20/23] NFS: Account for XDR pad of buf->pages



> On Apr 8, 2019, at 11:50 AM, Olga Kornievskaia <[email protected]> wrote:
>
> On Mon, Apr 8, 2019 at 11:26 AM Olga Kornievskaia <[email protected]> wrote:
>>
>> On Mon, Apr 8, 2019 at 11:21 AM Olga Kornievskaia <[email protected]> wrote:
>>>
>>> On Mon, Apr 8, 2019 at 10:43 AM Chuck Lever <[email protected]> wrote:
>>>>
>>>>
>>>>
>>>>> On Apr 8, 2019, at 10:36 AM, Olga Kornievskaia <[email protected]> wrote:
>>>>>
>>>>> On Fri, Apr 5, 2019 at 3:42 PM Chuck Lever <[email protected]> wrote:
>>>>>>
>>>>>>
>>>>>>
>>>>>>> On Apr 5, 2019, at 3:27 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>>>
>>>>>>> On Fri, Apr 5, 2019 at 3:23 PM Chuck Lever <[email protected]> wrote:
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>> On Apr 5, 2019, at 3:17 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>>>>>
>>>>>>>>> On Fri, Apr 5, 2019 at 1:51 PM Chuck Lever <[email protected]> wrote:
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>> On Apr 5, 2019, at 1:36 PM, Olga Kornievskaia <[email protected]> wrote:
>>>>>>>>>>>
>>>>>>>>>>> Hi Chuck,
>>>>>>>>>>>
>>>>>>>>>>> This patch break ACLs. After applying this patch nfs4_getfacl fails
>>>>>>>>>>> (it fails within xdr and returns ENOTSUPP). Any ideas why?
>>>>>>>>>>
>>>>>>>>>> Possibly the macro that defines the maximum size of the reply
>>>>>>>>>> is incorrect.
>>>>>>>>>>
>>>>>>>>>
>>>>>>>>> This also breaks FS_LOCATION. I'm going to go on the limb here and say
>>>>>>>>> that it probably breaks whatever else it modified.
>>>>>>>>
>>>>>>>> It modifies READ, READDIR, and READLINK. Are those broken?
>>>>>>>
>>>>>>> I don't know how to test READLINK.. but I think READ/READDIR work OK
>>>>>>> otherwise folks would have noticed it (I gather ACL and FS_LOCATION
>>>>>>> testing doesn't happen frequently).
>>>>>>
>>>>>> I guess I don't have any NFSv4 ACL or FS_LOCATIONS regressions
>>>>>> tests in my automated unit tests.
>>>>>>
>>>>>>
>>>>>>>>> The question is: can't we just revert it??
>>>>>>>>
>>>>>>>> Why not "root cause" it first?
>>>>>>>
>>>>>>> I'm trying :-/ I was just fishing to see how important the change was.
>>>>>>
>>>>>> Try reverting just this hunk:
>>>>>
>>>>> That doesn't help. It seems to be this piece that's causing issues
>>>>> hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1
>>>>>
>>>>> With this there is an extra byte (in front) in the buffer when (ACL)
>>>>> operation is decoded.
>>>>
>>>> How do you know there isn't a latent bug in the getfacl decoder?
>>>
>>> I don't. All I know is that it passed tests before and now it doesn't.
>>
>> Also this bug will have to be in both getfacl and fs_location code.
>> What they both share is xd_enter_page() code that now with new
>> semantics makes the buffer point to the wrong place.
>>
>>>> How are you reproducing this issue? I can try it here later today.
>>>
>>> The issue was found running xfstest nfs/001. However, you don't need
>>> that: (1) mount (2) nfs4_getfacl <file>
>>>
>>> To understand a patch, does it fix a problem with READLINK or is the
>>> an optimization?
>
> So this "fixes" it but this don't look really good.
>
> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
> index cfcabc3..f2a5553 100644
> --- a/fs/nfs/nfs4xdr.c
> +++ b/fs/nfs/nfs4xdr.c
>
> @@ -5280,6 +5280,7 @@ static int decode_getacl(struct xdr_stream *xdr,
> struct rpc_rqst *req,
> goto out;
> xdr_enter_page(xdr, xdr->buf->page_len);
> + xdr->p++;
>
> /* Calculate the offset of the page data */
> pg_offset = xdr->buf->head[0].iov_len;
> @@ -6949,6 +6950,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
> goto out;
> if (res->migration) {
> xdr_enter_page(xdr, PAGE_SIZE);
> + xdr->p++;
> status = decode_getfattr_generic(xdr,
> &res->fs_locations->fattr,
> NULL, res->fs_locations,
> @@ -6962,6 +6964,7 @@ static int nfs4_xdr_dec_fs_locations(struct rpc_rqst *req,
> if (status)
> goto out;
> xdr_enter_page(xdr, PAGE_SIZE);
> + xdr->p++;
> status = decode_getfattr_generic(xdr,
> &res->fs_locations->fattr,
> NULL, res->fs_locations,

Your workaround appears to advance xdr->p one quad into the first page in
the xdr_buf. We really want that XDR data item to fall right at offset 0
of that page.

I'm looking into it.


>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>>> index d0fa18d..6d9d5e2 100644
>>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>>>>>> encode_stateid_maxsz + 3)
>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>>>>>> #define encode_fs_locations_maxsz \
>>>>>> (encode_getattr_maxsz)
>>>>>> #define decode_fs_locations_maxsz \
>>>>>> - (0)
>>>>>> + (1)
>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>>>>>
>>>>>>
>>>>>>>>>>> On Mon, Feb 11, 2019 at 11:25 AM Chuck Lever <[email protected]> wrote:
>>>>>>>>>>>>
>>>>>>>>>>>> Certain NFS results (eg. READLINK) might expect a data payload that
>>>>>>>>>>>> is not an exact multiple of 4 bytes. In this case, XDR encoding
>>>>>>>>>>>> is required to pad that payload so its length on the wire is a
>>>>>>>>>>>> multiple of 4 bytes. The constants that define the maximum size of
>>>>>>>>>>>> each NFS result do not appear to account for this extra word.
>>>>>>>>>>>>
>>>>>>>>>>>> In each case where the data payload is to be received into pages:
>>>>>>>>>>>>
>>>>>>>>>>>> - 1 word is added to the size of the receive buffer allocated by
>>>>>>>>>>>> call_allocate
>>>>>>>>>>>>
>>>>>>>>>>>> - rpc_inline_rcv_pages subtracts 1 word from @hdrsize so that the
>>>>>>>>>>>> extra buffer space falls into the rcv_buf's tail iovec
>>>>>>>>>>>>
>>>>>>>>>>>> - If buf->pagelen is word-aligned, an XDR pad is not needed and
>>>>>>>>>>>> is thus removed from the tail
>>>>>>>>>>>>
>>>>>>>>>>>> Signed-off-by: Chuck Lever <[email protected]>
>>>>>>>>>>>> ---
>>>>>>>>>>>> fs/nfs/nfs2xdr.c | 6 +++---
>>>>>>>>>>>> fs/nfs/nfs3xdr.c | 10 +++++-----
>>>>>>>>>>>> fs/nfs/nfs4xdr.c | 15 ++++++++-------
>>>>>>>>>>>> net/sunrpc/clnt.c | 6 +++++-
>>>>>>>>>>>> net/sunrpc/xdr.c | 2 ++
>>>>>>>>>>>> 5 files changed, 23 insertions(+), 16 deletions(-)
>>>>>>>>>>>>
>>>>>>>>>>>> diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
>>>>>>>>>>>> index 1dcd0fe..a7ed29d 100644
>>>>>>>>>>>> --- a/fs/nfs/nfs2xdr.c
>>>>>>>>>>>> +++ b/fs/nfs/nfs2xdr.c
>>>>>>>>>>>> @@ -56,11 +56,11 @@
>>>>>>>>>>>>
>>>>>>>>>>>> #define NFS_attrstat_sz (1+NFS_fattr_sz)
>>>>>>>>>>>> #define NFS_diropres_sz (1+NFS_fhandle_sz+NFS_fattr_sz)
>>>>>>>>>>>> -#define NFS_readlinkres_sz (2)
>>>>>>>>>>>> -#define NFS_readres_sz (1+NFS_fattr_sz+1)
>>>>>>>>>>>> +#define NFS_readlinkres_sz (2+1)
>>>>>>>>>>>> +#define NFS_readres_sz (1+NFS_fattr_sz+1+1)
>>>>>>>>>>>> #define NFS_writeres_sz (NFS_attrstat_sz)
>>>>>>>>>>>> #define NFS_stat_sz (1)
>>>>>>>>>>>> -#define NFS_readdirres_sz (1)
>>>>>>>>>>>> +#define NFS_readdirres_sz (1+1)
>>>>>>>>>>>> #define NFS_statfsres_sz (1+NFS_info_sz)
>>>>>>>>>>>>
>>>>>>>>>>>> static int nfs_stat_to_errno(enum nfs_stat);
>>>>>>>>>>>> diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
>>>>>>>>>>>> index a54dcf4..110358f 100644
>>>>>>>>>>>> --- a/fs/nfs/nfs3xdr.c
>>>>>>>>>>>> +++ b/fs/nfs/nfs3xdr.c
>>>>>>>>>>>> @@ -69,13 +69,13 @@
>>>>>>>>>>>> #define NFS3_removeres_sz (NFS3_setattrres_sz)
>>>>>>>>>>>> #define NFS3_lookupres_sz (1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
>>>>>>>>>>>> #define NFS3_accessres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>>>>>>>> -#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1)
>>>>>>>>>>>> -#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3)
>>>>>>>>>>>> +#define NFS3_readlinkres_sz (1+NFS3_post_op_attr_sz+1+1)
>>>>>>>>>>>> +#define NFS3_readres_sz (1+NFS3_post_op_attr_sz+3+1)
>>>>>>>>>>>> #define NFS3_writeres_sz (1+NFS3_wcc_data_sz+4)
>>>>>>>>>>>> #define NFS3_createres_sz (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>>>>>>>> #define NFS3_renameres_sz (1+(2 * NFS3_wcc_data_sz))
>>>>>>>>>>>> #define NFS3_linkres_sz (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
>>>>>>>>>>>> -#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2)
>>>>>>>>>>>> +#define NFS3_readdirres_sz (1+NFS3_post_op_attr_sz+2+1)
>>>>>>>>>>>> #define NFS3_fsstatres_sz (1+NFS3_post_op_attr_sz+13)
>>>>>>>>>>>> #define NFS3_fsinfores_sz (1+NFS3_post_op_attr_sz+12)
>>>>>>>>>>>> #define NFS3_pathconfres_sz (1+NFS3_post_op_attr_sz+6)
>>>>>>>>>>>> @@ -85,7 +85,7 @@
>>>>>>>>>>>> #define ACL3_setaclargs_sz (NFS3_fh_sz+1+ \
>>>>>>>>>>>> XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>>>>>>>> #define ACL3_getaclres_sz (1+NFS3_post_op_attr_sz+1+ \
>>>>>>>>>>>> - XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
>>>>>>>>>>>> + XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
>>>>>>>>>>>> #define ACL3_setaclres_sz (1+NFS3_post_op_attr_sz)
>>>>>>>>>>>>
>>>>>>>>>>>> static int nfs3_stat_to_errno(enum nfs_stat);
>>>>>>>>>>>> @@ -1629,7 +1629,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
>>>>>>>>>>>> result->op_status = status;
>>>>>>>>>>>> if (status != NFS3_OK)
>>>>>>>>>>>> goto out_status;
>>>>>>>>>>>> - result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>>>>>>>> + result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
>>>>>>>>>>>> error = decode_read3resok(xdr, result);
>>>>>>>>>>>> out:
>>>>>>>>>>>> return error;
>>>>>>>>>>>> diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
>>>>>>>>>>>> index d0fa18d..6d9d5e2 100644
>>>>>>>>>>>> --- a/fs/nfs/nfs4xdr.c
>>>>>>>>>>>> +++ b/fs/nfs/nfs4xdr.c
>>>>>>>>>>>> @@ -215,14 +215,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>>>>>> nfs4_fattr_bitmap_maxsz)
>>>>>>>>>>>> #define encode_read_maxsz (op_encode_hdr_maxsz + \
>>>>>>>>>>>> encode_stateid_maxsz + 3)
>>>>>>>>>>>> -#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
>>>>>>>>>>>> +#define decode_read_maxsz (op_decode_hdr_maxsz + 2 + 1)
>>>>>>>>>>>> #define encode_readdir_maxsz (op_encode_hdr_maxsz + \
>>>>>>>>>>>> 2 + encode_verifier_maxsz + 5 + \
>>>>>>>>>>>> nfs4_label_maxsz)
>>>>>>>>>>>> #define decode_readdir_maxsz (op_decode_hdr_maxsz + \
>>>>>>>>>>>> - decode_verifier_maxsz)
>>>>>>>>>>>> + decode_verifier_maxsz + 1)
>>>>>>>>>>>> #define encode_readlink_maxsz (op_encode_hdr_maxsz)
>>>>>>>>>>>> -#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1)
>>>>>>>>>>>> +#define decode_readlink_maxsz (op_decode_hdr_maxsz + 1 + 1)
>>>>>>>>>>>> #define encode_write_maxsz (op_encode_hdr_maxsz + \
>>>>>>>>>>>> encode_stateid_maxsz + 4)
>>>>>>>>>>>> #define decode_write_maxsz (op_decode_hdr_maxsz + \
>>>>>>>>>>>> @@ -284,14 +284,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>>>>>> #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
>>>>>>>>>>>> #define encode_getacl_maxsz (encode_getattr_maxsz)
>>>>>>>>>>>> #define decode_getacl_maxsz (op_decode_hdr_maxsz + \
>>>>>>>>>>>> - nfs4_fattr_bitmap_maxsz + 1)
>>>>>>>>>>>> + nfs4_fattr_bitmap_maxsz + 1 + 1)
>>>>>>>>>>>> #define encode_setacl_maxsz (op_encode_hdr_maxsz + \
>>>>>>>>>>>> encode_stateid_maxsz + 3)
>>>>>>>>>>>> #define decode_setacl_maxsz (decode_setattr_maxsz)
>>>>>>>>>>>> #define encode_fs_locations_maxsz \
>>>>>>>>>>>> (encode_getattr_maxsz)
>>>>>>>>>>>> #define decode_fs_locations_maxsz \
>>>>>>>>>>>> - (0)
>>>>>>>>>>>> + (1)
>>>>>>>>>>>> #define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
>>>>>>>>>>>> #define decode_secinfo_maxsz (op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
>>>>>>>>>>>>
>>>>>>>>>>>> @@ -392,12 +392,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
>>>>>>>>>>>> 1 /* opaque devaddr4 length */ + \
>>>>>>>>>>>> /* devaddr4 payload is read into page */ \
>>>>>>>>>>>> 1 /* notification bitmap length */ + \
>>>>>>>>>>>> - 1 /* notification bitmap, word 0 */)
>>>>>>>>>>>> + 1 /* notification bitmap, word 0 */ + \
>>>>>>>>>>>> + 1 /* possible XDR padding */)
>>>>>>>>>>>> #define encode_layoutget_maxsz (op_encode_hdr_maxsz + 10 + \
>>>>>>>>>>>> encode_stateid_maxsz)
>>>>>>>>>>>> #define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
>>>>>>>>>>>> decode_stateid_maxsz + \
>>>>>>>>>>>> - XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
>>>>>>>>>>>> + XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
>>>>>>>>>>>> #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
>>>>>>>>>>>> 2 /* offset */ + \
>>>>>>>>>>>> 2 /* length */ + \
>>>>>>>>>>>> diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
>>>>>>>>>>>> index f780605..4ea38b0 100644
>>>>>>>>>>>> --- a/net/sunrpc/clnt.c
>>>>>>>>>>>> +++ b/net/sunrpc/clnt.c
>>>>>>>>>>>> @@ -1177,7 +1177,11 @@ void rpc_prepare_reply_pages(struct rpc_rqst *req, struct page **pages,
>>>>>>>>>>>> unsigned int base, unsigned int len,
>>>>>>>>>>>> unsigned int hdrsize)
>>>>>>>>>>>> {
>>>>>>>>>>>> - hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack;
>>>>>>>>>>>> + /* Subtract one to force an extra word of buffer space for the
>>>>>>>>>>>> + * payload's XDR pad to fall into the rcv_buf's tail iovec.
>>>>>>>>>>>> + */
>>>>>>>>>>>> + hdrsize += RPC_REPHDRSIZE + req->rq_cred->cr_auth->au_rslack - 1;
>>>>>>>>>>>> +
>>>>>>>>>>>> xdr_inline_pages(&req->rq_rcv_buf, hdrsize << 2, pages, base, len);
>>>>>>>>>>>> trace_rpc_reply_pages(req);
>>>>>>>>>>>> }
>>>>>>>>>>>> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
>>>>>>>>>>>> index 7cca515..aa8177d 100644
>>>>>>>>>>>> --- a/net/sunrpc/xdr.c
>>>>>>>>>>>> +++ b/net/sunrpc/xdr.c
>>>>>>>>>>>> @@ -189,6 +189,8 @@ __be32 *xdr_encode_opaque(__be32 *p, const void *ptr, unsigned int nbytes)
>>>>>>>>>>>>
>>>>>>>>>>>> tail->iov_base = buf + offset;
>>>>>>>>>>>> tail->iov_len = buflen - offset;
>>>>>>>>>>>> + if ((xdr->page_len & 3) == 0)
>>>>>>>>>>>> + tail->iov_len -= sizeof(__be32);
>>>>>>>>>>>>
>>>>>>>>>>>> xdr->buflen += len;
>>>>>>>>>>>> }
>>>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>>>>> Chuck Lever
>>>>>>>>
>>>>>>>> --
>>>>>>>> Chuck Lever
>>>>>>
>>>>>> --
>>>>>> Chuck Lever
>>>>
>>>> --
>>>> Chuck Lever

--
Chuck Lever