2023-01-08 16:41:30

by Chuck Lever

[permalink] [raw]
Subject: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

From: Chuck Lever <[email protected]>

To navigate around the space that svcauth_gss_accept() reserves
for the RPC payload body length and sequence number fields,
svcauth_gss_release() does a little dance with the reply's
accept_stat, moving the accept_stat value in the response buffer
down by two words.

Instead, let's have the ->accept() methods each set the proper
final location of the accept_stat to avoid having to move
things.

Signed-off-by: Chuck Lever <[email protected]>
---
include/linux/sunrpc/svc.h | 19 +++++++++++++++++++
net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++++++++-----------
net/sunrpc/svc.c | 2 --
net/sunrpc/svcauth_unix.c | 6 ++++++
4 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index f40a90ca5de6..392d2d2620fa 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -544,4 +544,23 @@ static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack)
WARN_ON(xdr->p > xdr->end);
}

+/**
+ * svcxdr_set_accept_stat - Reserve space for the accept_stat field
+ * @rqstp: RPC transaction context
+ *
+ * Return values:
+ * %true: Success
+ * %false: No response buffer space was available
+ */
+static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp)
+{
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
+
+ rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT);
+ if (unlikely(!rqstp->rq_accept_statp))
+ return false;
+ *rqstp->rq_accept_statp = rpc_success;
+ return true;
+}
+
#endif /* SUNRPC_SVC_H */
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 560fb8a2803d..333873abb7d9 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1220,7 +1220,7 @@ svcauth_gss_legacy_init(struct svc_rqst *rqstp,
if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle,
&rsip->major_status, GSS_SEQ_WIN))
goto out;
- if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
+ if (!svcxdr_set_accept_stat(rqstp))
goto out;
if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle,
&rsip->out_token, rsip->major_status,
@@ -1348,7 +1348,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle,
&ud.major_status, GSS_SEQ_WIN))
goto out;
- if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
+ if (!svcxdr_set_accept_stat(rqstp))
goto out;
if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle,
&ud.out_token, ud.major_status,
@@ -1640,16 +1640,18 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
case RPC_GSS_PROC_DESTROY:
if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
+ goto auth_err;
/* Delete the entry from the cache_list and call cache_put */
sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
- if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
- goto auth_err;
goto complete;
case RPC_GSS_PROC_DATA:
rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
svcdata->verf_start = xdr_reserve_space(&rqstp->rq_res_stream, 0);
if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
+ goto auth_err;
rqstp->rq_cred = rsci->cred;
get_group_info(rsci->cred.cr_group_info);
rqstp->rq_auth_stat = rpc_autherr_badcred;
@@ -1706,7 +1708,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
static __be32 *
svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
{
- struct xdr_buf *resbuf = &rqstp->rq_res;
__be32 *p;
u32 verf_len;

@@ -1721,13 +1722,11 @@ svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
p += 1;
verf_len = ntohl(*p++);
p += XDR_QUADLEN(verf_len);
- /* move accept_stat to right place: */
- memcpy(p, p + 2, 4);
- /* Also don't wrap if the accept stat is nonzero: */
- if (*p != rpc_success) {
- resbuf->head[0].iov_len -= 2 * 4;
+
+ /* Also don't wrap if the accept_stat is nonzero: */
+ if (*rqstp->rq_accept_statp != rpc_success)
return NULL;
- }
+
p++;
return p;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 3c194e6f8f5e..c2ed8b06fadb 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1314,8 +1314,6 @@ svc_process_common(struct svc_rqst *rqstp)
trace_svc_process(rqstp, progp->pg_name);

aoffset = xdr_stream_pos(xdr);
- rqstp->rq_accept_statp = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT);
- *rqstp->rq_accept_statp = rpc_success;

/* un-reserve some of the out-queue now that we have a
* better idea of reply size
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index b101700d155c..62dfc8cdf8c5 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -775,6 +775,8 @@ svcauth_null_accept(struct svc_rqst *rqstp)
if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
RPC_AUTH_NULL, NULL, 0) < 0)
return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;

rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
return SVC_OK;
@@ -866,6 +868,8 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
RPC_AUTH_NULL, NULL, 0) < 0)
return SVC_CLOSE;
}
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;

rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
return SVC_OK;
@@ -960,6 +964,8 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
RPC_AUTH_NULL, NULL, 0) < 0)
return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;

rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
return SVC_OK;



2023-05-02 11:04:19

by Jiri Slaby

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

On 08. 01. 23, 17:31, Chuck Lever wrote:
> From: Chuck Lever <[email protected]>
>
> To navigate around the space that svcauth_gss_accept() reserves
> for the RPC payload body length and sequence number fields,
> svcauth_gss_release() does a little dance with the reply's
> accept_stat, moving the accept_stat value in the response buffer
> down by two words.
>
> Instead, let's have the ->accept() methods each set the proper
> final location of the accept_stat to avoid having to move
> things.

Hi,

I bisected to this (4bcf0343e8) as it breaks nfs3-only servers in 6.3.
I.e. /etc/nfs.conf containing:
[nfsd]
vers4=no

The client sees:
mount("10.0.2.15:/tmp", "/mnt", "nfs", 0,
"vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
write(2, "mount.nfs: mount system call fai"..., 45
mount.nfs: mount system call failed for /mnt

And the kernel says:
nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO

I reported in downstream as:
https://bugzilla.suse.com/show_bug.cgi?id=1210995

It cannot be reverted cleanly on the top of 6.3.

Any ideas?

Thanks.

> Signed-off-by: Chuck Lever <[email protected]>
> ---
> include/linux/sunrpc/svc.h | 19 +++++++++++++++++++
> net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++++++++-----------
> net/sunrpc/svc.c | 2 --
> net/sunrpc/svcauth_unix.c | 6 ++++++
> 4 files changed, 35 insertions(+), 13 deletions(-)
>
> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
> index f40a90ca5de6..392d2d2620fa 100644
> --- a/include/linux/sunrpc/svc.h
> +++ b/include/linux/sunrpc/svc.h
> @@ -544,4 +544,23 @@ static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack)
> WARN_ON(xdr->p > xdr->end);
> }
>
> +/**
> + * svcxdr_set_accept_stat - Reserve space for the accept_stat field
> + * @rqstp: RPC transaction context
> + *
> + * Return values:
> + * %true: Success
> + * %false: No response buffer space was available
> + */
> +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp)
> +{
> + struct xdr_stream *xdr = &rqstp->rq_res_stream;
> +
> + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT);
> + if (unlikely(!rqstp->rq_accept_statp))
> + return false;
> + *rqstp->rq_accept_statp = rpc_success;
> + return true;
> +}
> +
> #endif /* SUNRPC_SVC_H */
> diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
> index 560fb8a2803d..333873abb7d9 100644
> --- a/net/sunrpc/auth_gss/svcauth_gss.c
> +++ b/net/sunrpc/auth_gss/svcauth_gss.c
> @@ -1220,7 +1220,7 @@ svcauth_gss_legacy_init(struct svc_rqst *rqstp,
> if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle,
> &rsip->major_status, GSS_SEQ_WIN))
> goto out;
> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
> + if (!svcxdr_set_accept_stat(rqstp))
> goto out;
> if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle,
> &rsip->out_token, rsip->major_status,
> @@ -1348,7 +1348,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
> if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle,
> &ud.major_status, GSS_SEQ_WIN))
> goto out;
> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
> + if (!svcxdr_set_accept_stat(rqstp))
> goto out;
> if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle,
> &ud.out_token, ud.major_status,
> @@ -1640,16 +1640,18 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
> case RPC_GSS_PROC_DESTROY:
> if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
> goto auth_err;
> + if (!svcxdr_set_accept_stat(rqstp))
> + goto auth_err;
> /* Delete the entry from the cache_list and call cache_put */
> sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
> - goto auth_err;
> goto complete;
> case RPC_GSS_PROC_DATA:
> rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
> svcdata->verf_start = xdr_reserve_space(&rqstp->rq_res_stream, 0);
> if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
> goto auth_err;
> + if (!svcxdr_set_accept_stat(rqstp))
> + goto auth_err;
> rqstp->rq_cred = rsci->cred;
> get_group_info(rsci->cred.cr_group_info);
> rqstp->rq_auth_stat = rpc_autherr_badcred;
> @@ -1706,7 +1708,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
> static __be32 *
> svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
> {
> - struct xdr_buf *resbuf = &rqstp->rq_res;
> __be32 *p;
> u32 verf_len;
>
> @@ -1721,13 +1722,11 @@ svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
> p += 1;
> verf_len = ntohl(*p++);
> p += XDR_QUADLEN(verf_len);
> - /* move accept_stat to right place: */
> - memcpy(p, p + 2, 4);
> - /* Also don't wrap if the accept stat is nonzero: */
> - if (*p != rpc_success) {
> - resbuf->head[0].iov_len -= 2 * 4;
> +
> + /* Also don't wrap if the accept_stat is nonzero: */
> + if (*rqstp->rq_accept_statp != rpc_success)
> return NULL;
> - }
> +
> p++;
> return p;
> }
> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
> index 3c194e6f8f5e..c2ed8b06fadb 100644
> --- a/net/sunrpc/svc.c
> +++ b/net/sunrpc/svc.c
> @@ -1314,8 +1314,6 @@ svc_process_common(struct svc_rqst *rqstp)
> trace_svc_process(rqstp, progp->pg_name);
>
> aoffset = xdr_stream_pos(xdr);
> - rqstp->rq_accept_statp = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT);
> - *rqstp->rq_accept_statp = rpc_success;
>
> /* un-reserve some of the out-queue now that we have a
> * better idea of reply size
> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
> index b101700d155c..62dfc8cdf8c5 100644
> --- a/net/sunrpc/svcauth_unix.c
> +++ b/net/sunrpc/svcauth_unix.c
> @@ -775,6 +775,8 @@ svcauth_null_accept(struct svc_rqst *rqstp)
> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
> RPC_AUTH_NULL, NULL, 0) < 0)
> return SVC_CLOSE;
> + if (!svcxdr_set_accept_stat(rqstp))
> + return SVC_CLOSE;
>
> rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
> return SVC_OK;
> @@ -866,6 +868,8 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
> RPC_AUTH_NULL, NULL, 0) < 0)
> return SVC_CLOSE;
> }
> + if (!svcxdr_set_accept_stat(rqstp))
> + return SVC_CLOSE;
>
> rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
> return SVC_OK;
> @@ -960,6 +964,8 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
> RPC_AUTH_NULL, NULL, 0) < 0)
> return SVC_CLOSE;
> + if (!svcxdr_set_accept_stat(rqstp))
> + return SVC_CLOSE;
>
> rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
> return SVC_OK;
>
>
>

--
js
suse labs

2023-05-02 14:17:04

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods



> On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
>
> On 08. 01. 23, 17:31, Chuck Lever wrote:
>> From: Chuck Lever <[email protected]>
>> To navigate around the space that svcauth_gss_accept() reserves
>> for the RPC payload body length and sequence number fields,
>> svcauth_gss_release() does a little dance with the reply's
>> accept_stat, moving the accept_stat value in the response buffer
>> down by two words.
>> Instead, let's have the ->accept() methods each set the proper
>> final location of the accept_stat to avoid having to move
>> things.
>
> Hi,
>
> I bisected to this (4bcf0343e8)

Assuming you did the bisect on the NFS server's kernel?


> as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
> [nfsd]
> vers4=no

Note: Changing the settings in /etc/nfs.conf had no effect
on my server, so I effected the change by stopping the
server and poking values into /proc/fs/nfsd/versions by
hand.

Steve?


> The client sees:
> mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
> write(2, "mount.nfs: mount system call fai"..., 45
> mount.nfs: mount system call failed for /mnt
>
> And the kernel says:
> nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
>
> I reported in downstream as:
> https://bugzilla.suse.com/show_bug.cgi?id=1210995
>
> It cannot be reverted cleanly on the top of 6.3.
>
> Any ideas?

I can reproduce a similar problem. Network capture shows
that the server is responding with NFS4ERR_NOENT to the
EXCHANGE_ID operation, and the client kernel log says:

> nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO

That's not the failure mode I expected given the commit
you bisected to, so it might not be the same problem you've
hit. I'll troubleshoot this and send a fix for testing.


> Thanks.
>
>> Signed-off-by: Chuck Lever <[email protected]>
>> ---
>> include/linux/sunrpc/svc.h | 19 +++++++++++++++++++
>> net/sunrpc/auth_gss/svcauth_gss.c | 21 ++++++++++-----------
>> net/sunrpc/svc.c | 2 --
>> net/sunrpc/svcauth_unix.c | 6 ++++++
>> 4 files changed, 35 insertions(+), 13 deletions(-)
>> diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
>> index f40a90ca5de6..392d2d2620fa 100644
>> --- a/include/linux/sunrpc/svc.h
>> +++ b/include/linux/sunrpc/svc.h
>> @@ -544,4 +544,23 @@ static inline void svcxdr_set_auth_slack(struct svc_rqst *rqstp, int slack)
>> WARN_ON(xdr->p > xdr->end);
>> }
>> +/**
>> + * svcxdr_set_accept_stat - Reserve space for the accept_stat field
>> + * @rqstp: RPC transaction context
>> + *
>> + * Return values:
>> + * %true: Success
>> + * %false: No response buffer space was available
>> + */
>> +static inline bool svcxdr_set_accept_stat(struct svc_rqst *rqstp)
>> +{
>> + struct xdr_stream *xdr = &rqstp->rq_res_stream;
>> +
>> + rqstp->rq_accept_statp = xdr_reserve_space(xdr, XDR_UNIT);
>> + if (unlikely(!rqstp->rq_accept_statp))
>> + return false;
>> + *rqstp->rq_accept_statp = rpc_success;
>> + return true;
>> +}
>> +
>> #endif /* SUNRPC_SVC_H */
>> diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
>> index 560fb8a2803d..333873abb7d9 100644
>> --- a/net/sunrpc/auth_gss/svcauth_gss.c
>> +++ b/net/sunrpc/auth_gss/svcauth_gss.c
>> @@ -1220,7 +1220,7 @@ svcauth_gss_legacy_init(struct svc_rqst *rqstp,
>> if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle,
>> &rsip->major_status, GSS_SEQ_WIN))
>> goto out;
>> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
>> + if (!svcxdr_set_accept_stat(rqstp))
>> goto out;
>> if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle,
>> &rsip->out_token, rsip->major_status,
>> @@ -1348,7 +1348,7 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
>> if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle,
>> &ud.major_status, GSS_SEQ_WIN))
>> goto out;
>> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
>> + if (!svcxdr_set_accept_stat(rqstp))
>> goto out;
>> if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle,
>> &ud.out_token, ud.major_status,
>> @@ -1640,16 +1640,18 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
>> case RPC_GSS_PROC_DESTROY:
>> if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
>> goto auth_err;
>> + if (!svcxdr_set_accept_stat(rqstp))
>> + goto auth_err;
>> /* Delete the entry from the cache_list and call cache_put */
>> sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
>> - if (xdr_stream_encode_u32(&rqstp->rq_res_stream, RPC_SUCCESS) < 0)
>> - goto auth_err;
>> goto complete;
>> case RPC_GSS_PROC_DATA:
>> rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
>> svcdata->verf_start = xdr_reserve_space(&rqstp->rq_res_stream, 0);
>> if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
>> goto auth_err;
>> + if (!svcxdr_set_accept_stat(rqstp))
>> + goto auth_err;
>> rqstp->rq_cred = rsci->cred;
>> get_group_info(rsci->cred.cr_group_info);
>> rqstp->rq_auth_stat = rpc_autherr_badcred;
>> @@ -1706,7 +1708,6 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
>> static __be32 *
>> svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
>> {
>> - struct xdr_buf *resbuf = &rqstp->rq_res;
>> __be32 *p;
>> u32 verf_len;
>> @@ -1721,13 +1722,11 @@ svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
>> p += 1;
>> verf_len = ntohl(*p++);
>> p += XDR_QUADLEN(verf_len);
>> - /* move accept_stat to right place: */
>> - memcpy(p, p + 2, 4);
>> - /* Also don't wrap if the accept stat is nonzero: */
>> - if (*p != rpc_success) {
>> - resbuf->head[0].iov_len -= 2 * 4;
>> +
>> + /* Also don't wrap if the accept_stat is nonzero: */
>> + if (*rqstp->rq_accept_statp != rpc_success)
>> return NULL;
>> - }
>> +
>> p++;
>> return p;
>> }
>> diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
>> index 3c194e6f8f5e..c2ed8b06fadb 100644
>> --- a/net/sunrpc/svc.c
>> +++ b/net/sunrpc/svc.c
>> @@ -1314,8 +1314,6 @@ svc_process_common(struct svc_rqst *rqstp)
>> trace_svc_process(rqstp, progp->pg_name);
>> aoffset = xdr_stream_pos(xdr);
>> - rqstp->rq_accept_statp = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT);
>> - *rqstp->rq_accept_statp = rpc_success;
>> /* un-reserve some of the out-queue now that we have a
>> * better idea of reply size
>> diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
>> index b101700d155c..62dfc8cdf8c5 100644
>> --- a/net/sunrpc/svcauth_unix.c
>> +++ b/net/sunrpc/svcauth_unix.c
>> @@ -775,6 +775,8 @@ svcauth_null_accept(struct svc_rqst *rqstp)
>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
>> RPC_AUTH_NULL, NULL, 0) < 0)
>> return SVC_CLOSE;
>> + if (!svcxdr_set_accept_stat(rqstp))
>> + return SVC_CLOSE;
>> rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
>> return SVC_OK;
>> @@ -866,6 +868,8 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
>> RPC_AUTH_NULL, NULL, 0) < 0)
>> return SVC_CLOSE;
>> }
>> + if (!svcxdr_set_accept_stat(rqstp))
>> + return SVC_CLOSE;
>> rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
>> return SVC_OK;
>> @@ -960,6 +964,8 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
>> if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
>> RPC_AUTH_NULL, NULL, 0) < 0)
>> return SVC_CLOSE;
>> + if (!svcxdr_set_accept_stat(rqstp))
>> + return SVC_CLOSE;
>> rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
>> return SVC_OK;
>
> --
> js
> suse labs


--
Chuck Lever


2023-05-02 21:41:26

by NeilBrown

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

On Wed, 03 May 2023, Chuck Lever III wrote:
>
> > On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
>
> > as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
> > [nfsd]
> > vers4=no
>
> Note: Changing the settings in /etc/nfs.conf had no effect
> on my server, so I effected the change by stopping the
> server and poking values into /proc/fs/nfsd/versions by
> hand.
>
> Steve?

Fixed in nfs-utils-2-3-4-rc1~7
Commit: d68be5d6ae50 ("nfs.conf: fail to disable major NFS version 4 using "vers4=n" in /etc/nfs.conf")

NeilBrown

2023-05-16 19:31:05

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

On Tue, 2023-05-02 at 14:14 +0000, Chuck Lever III wrote:
>
> > On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
> >
> > On 08. 01. 23, 17:31, Chuck Lever wrote:
> > > From: Chuck Lever <[email protected]>
> > > To navigate around the space that svcauth_gss_accept() reserves
> > > for the RPC payload body length and sequence number fields,
> > > svcauth_gss_release() does a little dance with the reply's
> > > accept_stat, moving the accept_stat value in the response buffer
> > > down by two words.
> > > Instead, let's have the ->accept() methods each set the proper
> > > final location of the accept_stat to avoid having to move
> > > things.
> >
> > Hi,
> >
> > I bisected to this (4bcf0343e8)
>
> Assuming you did the bisect on the NFS server's kernel?
>
>
> > as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
> > [nfsd]
> > vers4=no
>
> Note: Changing the settings in /etc/nfs.conf had no effect
> on my server, so I effected the change by stopping the
> server and poking values into /proc/fs/nfsd/versions by
> hand.
>
> Steve?
>
>
> > The client sees:
> > mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
> > write(2, "mount.nfs: mount system call fai"..., 45
> > mount.nfs: mount system call failed for /mnt
> >
> > And the kernel says:
> > nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
> >
> > I reported in downstream as:
> > https://bugzilla.suse.com/show_bug.cgi?id=1210995
> >
> > It cannot be reverted cleanly on the top of 6.3.
> >
> > Any ideas?
>
> I can reproduce a similar problem. Network capture shows
> that the server is responding with NFS4ERR_NOENT to the
> EXCHANGE_ID operation, and the client kernel log says:
>
> > nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO
>
> That's not the failure mode I expected given the commit
> you bisected to, so it might not be the same problem you've
> hit. I'll troubleshoot this and send a fix for testing.
>

Alex hit this problem in testing too, and I took a quick look.

In the attached capture, the client should have gotten back a
RPC_PROG_MISMATCH error, but the server has recorded an extra successful
accept state before encoding the RPC_PROG_MISMATCH error, leading to a
malformed reply.

I think that the problem is that encoding the accept status too early
means that we can't properly handle failures from the pg_init_request
call.

Chuck, any thoughts on how you'd like to handle this?
--
Jeff Layton <[email protected]>


Attachments:
bad-fallback.pcapng.gz (1.95 kB)

2023-05-16 19:31:33

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods



> On May 16, 2023, at 3:23 PM, Jeff Layton <[email protected]> wrote:
>
> On Tue, 2023-05-02 at 14:14 +0000, Chuck Lever III wrote:
>>
>>> On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
>>>
>>> On 08. 01. 23, 17:31, Chuck Lever wrote:
>>>> From: Chuck Lever <[email protected]>
>>>> To navigate around the space that svcauth_gss_accept() reserves
>>>> for the RPC payload body length and sequence number fields,
>>>> svcauth_gss_release() does a little dance with the reply's
>>>> accept_stat, moving the accept_stat value in the response buffer
>>>> down by two words.
>>>> Instead, let's have the ->accept() methods each set the proper
>>>> final location of the accept_stat to avoid having to move
>>>> things.
>>>
>>> Hi,
>>>
>>> I bisected to this (4bcf0343e8)
>>
>> Assuming you did the bisect on the NFS server's kernel?
>>
>>
>>> as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
>>> [nfsd]
>>> vers4=no
>>
>> Note: Changing the settings in /etc/nfs.conf had no effect
>> on my server, so I effected the change by stopping the
>> server and poking values into /proc/fs/nfsd/versions by
>> hand.
>>
>> Steve?
>>
>>
>>> The client sees:
>>> mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
>>> write(2, "mount.nfs: mount system call fai"..., 45
>>> mount.nfs: mount system call failed for /mnt
>>>
>>> And the kernel says:
>>> nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
>>>
>>> I reported in downstream as:
>>> https://bugzilla.suse.com/show_bug.cgi?id=1210995
>>>
>>> It cannot be reverted cleanly on the top of 6.3.
>>>
>>> Any ideas?
>>
>> I can reproduce a similar problem. Network capture shows
>> that the server is responding with NFS4ERR_NOENT to the
>> EXCHANGE_ID operation, and the client kernel log says:
>>
>>> nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO
>>
>> That's not the failure mode I expected given the commit
>> you bisected to, so it might not be the same problem you've
>> hit. I'll troubleshoot this and send a fix for testing.
>>
>
> Alex hit this problem in testing too, and I took a quick look.
>
> In the attached capture, the client should have gotten back a
> RPC_PROG_MISMATCH error, but the server has recorded an extra successful
> accept state before encoding the RPC_PROG_MISMATCH error, leading to a
> malformed reply.
>
> I think that the problem is that encoding the accept status too early
> means that we can't properly handle failures from the pg_init_request
> call.
>
> Chuck, any thoughts on how you'd like to handle this?

With this:

https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/?h=nfsd-fixes&id=29cd2927fb914cc53b5ba4f67d2b74695c994ba4

I plan to send the fix to Linus tomorrow.


--
Chuck Lever



2023-05-16 21:31:37

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

On Tue, 2023-05-16 at 19:25 +0000, Chuck Lever III wrote:
>
> > On May 16, 2023, at 3:23 PM, Jeff Layton <[email protected]> wrote:
> >
> > On Tue, 2023-05-02 at 14:14 +0000, Chuck Lever III wrote:
> > >
> > > > On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
> > > >
> > > > On 08. 01. 23, 17:31, Chuck Lever wrote:
> > > > > From: Chuck Lever <[email protected]>
> > > > > To navigate around the space that svcauth_gss_accept() reserves
> > > > > for the RPC payload body length and sequence number fields,
> > > > > svcauth_gss_release() does a little dance with the reply's
> > > > > accept_stat, moving the accept_stat value in the response buffer
> > > > > down by two words.
> > > > > Instead, let's have the ->accept() methods each set the proper
> > > > > final location of the accept_stat to avoid having to move
> > > > > things.
> > > >
> > > > Hi,
> > > >
> > > > I bisected to this (4bcf0343e8)
> > >
> > > Assuming you did the bisect on the NFS server's kernel?
> > >
> > >
> > > > as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
> > > > [nfsd]
> > > > vers4=no
> > >
> > > Note: Changing the settings in /etc/nfs.conf had no effect
> > > on my server, so I effected the change by stopping the
> > > server and poking values into /proc/fs/nfsd/versions by
> > > hand.
> > >
> > > Steve?
> > >
> > >
> > > > The client sees:
> > > > mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
> > > > write(2, "mount.nfs: mount system call fai"..., 45
> > > > mount.nfs: mount system call failed for /mnt
> > > >
> > > > And the kernel says:
> > > > nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
> > > >
> > > > I reported in downstream as:
> > > > https://bugzilla.suse.com/show_bug.cgi?id=1210995
> > > >
> > > > It cannot be reverted cleanly on the top of 6.3.
> > > >
> > > > Any ideas?
> > >
> > > I can reproduce a similar problem. Network capture shows
> > > that the server is responding with NFS4ERR_NOENT to the
> > > EXCHANGE_ID operation, and the client kernel log says:
> > >
> > > > nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO
> > >
> > > That's not the failure mode I expected given the commit
> > > you bisected to, so it might not be the same problem you've
> > > hit. I'll troubleshoot this and send a fix for testing.
> > >
> >
> > Alex hit this problem in testing too, and I took a quick look.
> >
> > In the attached capture, the client should have gotten back a
> > RPC_PROG_MISMATCH error, but the server has recorded an extra successful
> > accept state before encoding the RPC_PROG_MISMATCH error, leading to a
> > malformed reply.
> >
> > I think that the problem is that encoding the accept status too early
> > means that we can't properly handle failures from the pg_init_request
> > call.
> >
> > Chuck, any thoughts on how you'd like to handle this?
>
> With this:
>
> https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/?h=nfsd-fixes&id=29cd2927fb914cc53b5ba4f67d2b74695c994ba4
>
> I plan to send the fix to Linus tomorrow.
>
>

Oh! I hadn't seen that cross the list. Did I miss it?
--
Jeff Layton <[email protected]>

2023-05-16 21:34:11

by Chuck Lever

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods



> On May 16, 2023, at 5:25 PM, Jeff Layton <[email protected]> wrote:
>
> On Tue, 2023-05-16 at 19:25 +0000, Chuck Lever III wrote:
>>
>>> On May 16, 2023, at 3:23 PM, Jeff Layton <[email protected]> wrote:
>>>
>>> On Tue, 2023-05-02 at 14:14 +0000, Chuck Lever III wrote:
>>>>
>>>>> On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
>>>>>
>>>>> On 08. 01. 23, 17:31, Chuck Lever wrote:
>>>>>> From: Chuck Lever <[email protected]>
>>>>>> To navigate around the space that svcauth_gss_accept() reserves
>>>>>> for the RPC payload body length and sequence number fields,
>>>>>> svcauth_gss_release() does a little dance with the reply's
>>>>>> accept_stat, moving the accept_stat value in the response buffer
>>>>>> down by two words.
>>>>>> Instead, let's have the ->accept() methods each set the proper
>>>>>> final location of the accept_stat to avoid having to move
>>>>>> things.
>>>>>
>>>>> Hi,
>>>>>
>>>>> I bisected to this (4bcf0343e8)
>>>>
>>>> Assuming you did the bisect on the NFS server's kernel?
>>>>
>>>>
>>>>> as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
>>>>> [nfsd]
>>>>> vers4=no
>>>>
>>>> Note: Changing the settings in /etc/nfs.conf had no effect
>>>> on my server, so I effected the change by stopping the
>>>> server and poking values into /proc/fs/nfsd/versions by
>>>> hand.
>>>>
>>>> Steve?
>>>>
>>>>
>>>>> The client sees:
>>>>> mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
>>>>> write(2, "mount.nfs: mount system call fai"..., 45
>>>>> mount.nfs: mount system call failed for /mnt
>>>>>
>>>>> And the kernel says:
>>>>> nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
>>>>>
>>>>> I reported in downstream as:
>>>>> https://bugzilla.suse.com/show_bug.cgi?id=1210995
>>>>>
>>>>> It cannot be reverted cleanly on the top of 6.3.
>>>>>
>>>>> Any ideas?
>>>>
>>>> I can reproduce a similar problem. Network capture shows
>>>> that the server is responding with NFS4ERR_NOENT to the
>>>> EXCHANGE_ID operation, and the client kernel log says:
>>>>
>>>>> nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO
>>>>
>>>> That's not the failure mode I expected given the commit
>>>> you bisected to, so it might not be the same problem you've
>>>> hit. I'll troubleshoot this and send a fix for testing.
>>>>
>>>
>>> Alex hit this problem in testing too, and I took a quick look.
>>>
>>> In the attached capture, the client should have gotten back a
>>> RPC_PROG_MISMATCH error, but the server has recorded an extra successful
>>> accept state before encoding the RPC_PROG_MISMATCH error, leading to a
>>> malformed reply.
>>>
>>> I think that the problem is that encoding the accept status too early
>>> means that we can't properly handle failures from the pg_init_request
>>> call.
>>>
>>> Chuck, any thoughts on how you'd like to handle this?
>>
>> With this:
>>
>> https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/?h=nfsd-fixes&id=29cd2927fb914cc53b5ba4f67d2b74695c994ba4
>>
>> I plan to send the fix to Linus tomorrow.
>>
>>
>
> Oh! I hadn't seen that cross the list. Did I miss it?

https://lore.kernel.org/linux-nfs/[email protected]/T/#t


--
Chuck Lever



2023-05-16 22:30:41

by Jeff Layton

[permalink] [raw]
Subject: Re: [PATCH v1 26/27] SUNRPC: Set rq_accept_statp inside ->accept methods

On Tue, 2023-05-16 at 21:27 +0000, Chuck Lever III wrote:
>
> > On May 16, 2023, at 5:25 PM, Jeff Layton <[email protected]> wrote:
> >
> > On Tue, 2023-05-16 at 19:25 +0000, Chuck Lever III wrote:
> > >
> > > > On May 16, 2023, at 3:23 PM, Jeff Layton <[email protected]> wrote:
> > > >
> > > > On Tue, 2023-05-02 at 14:14 +0000, Chuck Lever III wrote:
> > > > >
> > > > > > On May 2, 2023, at 7:01 AM, Jiri Slaby <[email protected]> wrote:
> > > > > >
> > > > > > On 08. 01. 23, 17:31, Chuck Lever wrote:
> > > > > > > From: Chuck Lever <[email protected]>
> > > > > > > To navigate around the space that svcauth_gss_accept() reserves
> > > > > > > for the RPC payload body length and sequence number fields,
> > > > > > > svcauth_gss_release() does a little dance with the reply's
> > > > > > > accept_stat, moving the accept_stat value in the response buffer
> > > > > > > down by two words.
> > > > > > > Instead, let's have the ->accept() methods each set the proper
> > > > > > > final location of the accept_stat to avoid having to move
> > > > > > > things.
> > > > > >
> > > > > > Hi,
> > > > > >
> > > > > > I bisected to this (4bcf0343e8)
> > > > >
> > > > > Assuming you did the bisect on the NFS server's kernel?
> > > > >
> > > > >
> > > > > > as it breaks nfs3-only servers in 6.3. I.e. /etc/nfs.conf containing:
> > > > > > [nfsd]
> > > > > > vers4=no
> > > > >
> > > > > Note: Changing the settings in /etc/nfs.conf had no effect
> > > > > on my server, so I effected the change by stopping the
> > > > > server and poking values into /proc/fs/nfsd/versions by
> > > > > hand.
> > > > >
> > > > > Steve?
> > > > >
> > > > >
> > > > > > The client sees:
> > > > > > mount("10.0.2.15:/tmp", "/mnt", "nfs", 0, "vers=4.2,addr=10.0.2.15,clientad"...) = -1 EIO (Input/output error)
> > > > > > write(2, "mount.nfs: mount system call fai"..., 45
> > > > > > mount.nfs: mount system call failed for /mnt
> > > > > >
> > > > > > And the kernel says:
> > > > > > nfs4_discover_server_trunking unhandled error -5. Exiting with error EIO
> > > > > >
> > > > > > I reported in downstream as:
> > > > > > https://bugzilla.suse.com/show_bug.cgi?id=1210995
> > > > > >
> > > > > > It cannot be reverted cleanly on the top of 6.3.
> > > > > >
> > > > > > Any ideas?
> > > > >
> > > > > I can reproduce a similar problem. Network capture shows
> > > > > that the server is responding with NFS4ERR_NOENT to the
> > > > > EXCHANGE_ID operation, and the client kernel log says:
> > > > >
> > > > > > nfs4_discover_server_trunking unhandled error -121. Exiting with error EIO
> > > > >
> > > > > That's not the failure mode I expected given the commit
> > > > > you bisected to, so it might not be the same problem you've
> > > > > hit. I'll troubleshoot this and send a fix for testing.
> > > > >
> > > >
> > > > Alex hit this problem in testing too, and I took a quick look.
> > > >
> > > > In the attached capture, the client should have gotten back a
> > > > RPC_PROG_MISMATCH error, but the server has recorded an extra successful
> > > > accept state before encoding the RPC_PROG_MISMATCH error, leading to a
> > > > malformed reply.
> > > >
> > > > I think that the problem is that encoding the accept status too early
> > > > means that we can't properly handle failures from the pg_init_request
> > > > call.
> > > >
> > > > Chuck, any thoughts on how you'd like to handle this?
> > >
> > > With this:
> > >
> > > https://git.kernel.org/pub/scm/linux/kernel/git/cel/linux.git/commit/?h=nfsd-fixes&id=29cd2927fb914cc53b5ba4f67d2b74695c994ba4
> > >
> > > I plan to send the fix to Linus tomorrow.
> > >
> > >
> >
> > Oh! I hadn't seen that cross the list. Did I miss it?
>
> https://lore.kernel.org/linux-nfs/[email protected]/T/#t
>
>

Ahh ok, it wasn't under its own title. LGTM. You can add:

Reviewed-by: Jeff Layton <[email protected]>