The reclen is taken directly from the first four bytes of the message
with the highest bit stripped, which makes it ripe for protocol mixups.
For example, if someone tries to send a HTTP GET request to us, we'll
interpret it as a 1195725856-sized fragment (ie. (u32)'GET '), and print
a ratelimited KERN_NOTICE with that number verbatim.
This can be confusing for downstream users, who don't know what messages
like "fragment too large: 1195725856" actually mean, or that they
indicate some misconfigured infrastructure elsewhere.
To allow users to more easily understand and debug these cases, add the
number interpreted as ASCII if all characters are printable:
RPC: fragment too large: 1195725856 (ASCII "GET ")
If demand grows elsewhere, a new printk format that takes a number and
outputs it in various formats is also a possible solution. For now, it
seems reasonable to put this here since this particular code path is the
one that has repeatedly come up in production.
Signed-off-by: Chris Down <[email protected]>
Cc: Chuck Lever <[email protected]>
Cc: J. Bruce Fields <[email protected]>
Cc: Trond Myklebust <[email protected]>
Cc: David S. Miller <[email protected]>
---
net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++--
1 file changed, 37 insertions(+), 2 deletions(-)
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2e2f007dfc9f..046b1d104340 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -46,6 +46,7 @@
#include <linux/uaccess.h>
#include <linux/highmem.h>
#include <asm/ioctls.h>
+#include <linux/ctype.h>
#include <linux/sunrpc/types.h>
#include <linux/sunrpc/clnt.h>
@@ -863,6 +864,34 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
svsk->sk_datalen = 0;
}
+/* The reclen is taken directly from the first four bytes of the message with
+ * the highest bit stripped, which makes it ripe for protocol mixups. For
+ * example, if someone tries to send a HTTP GET request to us, we'll interpret
+ * it as a 1195725856-sized fragment (ie. (u32)'GET '), and print a ratelimited
+ * KERN_NOTICE with that number verbatim.
+ *
+ * To allow users to more easily understand and debug these cases, this
+ * function decodes the purported length as ASCII, and returns it if all
+ * characters were printable. Otherwise, we return NULL.
+ *
+ * WARNING: Since we reuse the u32 directly, the return value is not null
+ * terminated, and must be printed using %.*s with
+ * sizeof(svc_sock_reclen(svsk)).
+ */
+static char *svc_sock_reclen_ascii(struct svc_sock *svsk)
+{
+ u32 len_be = cpu_to_be32(svc_sock_reclen(svsk));
+ char *len_be_ascii = (char *)&len_be;
+ size_t i;
+
+ for (i = 0; i < sizeof(len_be); i++) {
+ if (!isprint(len_be_ascii[i]))
+ return NULL;
+ }
+
+ return len_be_ascii;
+}
+
/*
* Receive fragment record header into sk_marker.
*/
@@ -870,6 +899,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
struct svc_rqst *rqstp)
{
ssize_t want, len;
+ char *reclen_ascii;
/* If we haven't gotten the record length yet,
* get the next four bytes.
@@ -898,9 +928,14 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
return svc_sock_reclen(svsk);
err_too_large:
- net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
+ reclen_ascii = svc_sock_reclen_ascii(svsk);
+ net_notice_ratelimited("svc: %s %s RPC fragment too large: %d%s%.*s%s\n",
__func__, svsk->sk_xprt.xpt_server->sv_name,
- svc_sock_reclen(svsk));
+ svc_sock_reclen(svsk),
+ reclen_ascii ? " (ASCII \"" : "",
+ (int)sizeof(u32),
+ reclen_ascii ?: "",
+ reclen_ascii ? "\")" : "");
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
err_short:
return -EAGAIN;
--
2.30.2
Hi Chris-
> On Mar 19, 2021, at 10:54 AM, Chris Down <[email protected]> wrote:
>
> The reclen is taken directly from the first four bytes of the message
> with the highest bit stripped, which makes it ripe for protocol mixups.
> For example, if someone tries to send a HTTP GET request to us, we'll
> interpret it as a 1195725856-sized fragment (ie. (u32)'GET '), and print
> a ratelimited KERN_NOTICE with that number verbatim.
>
> This can be confusing for downstream users, who don't know what messages
> like "fragment too large: 1195725856" actually mean, or that they
> indicate some misconfigured infrastructure elsewhere.
One wonders whether that error message is actually useful at all.
We could, for example, turn this into a tracepoint, or just get
rid of it.
> To allow users to more easily understand and debug these cases, add the
> number interpreted as ASCII if all characters are printable:
>
> RPC: fragment too large: 1195725856 (ASCII "GET ")
>
> If demand grows elsewhere, a new printk format that takes a number and
> outputs it in various formats is also a possible solution. For now, it
> seems reasonable to put this here since this particular code path is the
> one that has repeatedly come up in production.
>
> Signed-off-by: Chris Down <[email protected]>
> Cc: Chuck Lever <[email protected]>
> Cc: J. Bruce Fields <[email protected]>
> Cc: Trond Myklebust <[email protected]>
> Cc: David S. Miller <[email protected]>
> ---
> net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++--
> 1 file changed, 37 insertions(+), 2 deletions(-)
>
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 2e2f007dfc9f..046b1d104340 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -46,6 +46,7 @@
> #include <linux/uaccess.h>
> #include <linux/highmem.h>
> #include <asm/ioctls.h>
> +#include <linux/ctype.h>
>
> #include <linux/sunrpc/types.h>
> #include <linux/sunrpc/clnt.h>
> @@ -863,6 +864,34 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
> svsk->sk_datalen = 0;
> }
>
> +/* The reclen is taken directly from the first four bytes of the message with
> + * the highest bit stripped, which makes it ripe for protocol mixups. For
> + * example, if someone tries to send a HTTP GET request to us, we'll interpret
> + * it as a 1195725856-sized fragment (ie. (u32)'GET '), and print a ratelimited
> + * KERN_NOTICE with that number verbatim.
> + *
> + * To allow users to more easily understand and debug these cases, this
> + * function decodes the purported length as ASCII, and returns it if all
> + * characters were printable. Otherwise, we return NULL.
> + *
> + * WARNING: Since we reuse the u32 directly, the return value is not null
> + * terminated, and must be printed using %.*s with
> + * sizeof(svc_sock_reclen(svsk)).
> + */
> +static char *svc_sock_reclen_ascii(struct svc_sock *svsk)
> +{
> + u32 len_be = cpu_to_be32(svc_sock_reclen(svsk));
> + char *len_be_ascii = (char *)&len_be;
> + size_t i;
> +
> + for (i = 0; i < sizeof(len_be); i++) {
> + if (!isprint(len_be_ascii[i]))
> + return NULL;
> + }
> +
> + return len_be_ascii;
> +}
> +
> /*
> * Receive fragment record header into sk_marker.
> */
> @@ -870,6 +899,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
> struct svc_rqst *rqstp)
> {
> ssize_t want, len;
> + char *reclen_ascii;
>
> /* If we haven't gotten the record length yet,
> * get the next four bytes.
> @@ -898,9 +928,14 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
> return svc_sock_reclen(svsk);
>
> err_too_large:
> - net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
> + reclen_ascii = svc_sock_reclen_ascii(svsk);
> + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d%s%.*s%s\n",
> __func__, svsk->sk_xprt.xpt_server->sv_name,
> - svc_sock_reclen(svsk));
> + svc_sock_reclen(svsk),
> + reclen_ascii ? " (ASCII \"" : "",
> + (int)sizeof(u32),
> + reclen_ascii ?: "",
> + reclen_ascii ? "\")" : "");
> set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> err_short:
> return -EAGAIN;
> --
> 2.30.2
>
--
Chuck Lever
Hey folks,
Let me know if you'd like more evidence that this is a persisting problem. Also
more than happy to change the generation of the whole debug string to go into
svc_sock_reclen_ascii or use LOG_CONT if you'd prefer to avoid the multiple
ternaries (but the latter probably needs some thought about how it interacts
with ratelimit).
Thanks,
Chris
Hey Chuck,
Thanks for the (very) fast reply! :-)
Chuck Lever III writes:
>> This can be confusing for downstream users, who don't know what messages
>> like "fragment too large: 1195725856" actually mean, or that they
>> indicate some misconfigured infrastructure elsewhere.
>
>One wonders whether that error message is actually useful at all.
>We could, for example, turn this into a tracepoint, or just get
>rid of it.
Indeed, that's also a good outcome. Personally I've never seen these
legitimately fire in production outside of cases like the one described, and we
historically ran a pretty diverse set of use cases for NFS.
Maybe safer to convert to a tracepoint just in case? Either way sounds fine
though -- let me know what you'd like for v2 and I'll send it over. :-)
Thanks!
Chris
On Fri, Mar 19, 2021 at 02:58:14PM +0000, Chuck Lever III wrote:
> Hi Chris-
>
> > On Mar 19, 2021, at 10:54 AM, Chris Down <[email protected]> wrote:
> >
> > The reclen is taken directly from the first four bytes of the message
> > with the highest bit stripped, which makes it ripe for protocol mixups.
> > For example, if someone tries to send a HTTP GET request to us, we'll
> > interpret it as a 1195725856-sized fragment (ie. (u32)'GET '), and print
> > a ratelimited KERN_NOTICE with that number verbatim.
> >
> > This can be confusing for downstream users, who don't know what messages
> > like "fragment too large: 1195725856" actually mean, or that they
> > indicate some misconfigured infrastructure elsewhere.
>
> One wonders whether that error message is actually useful at all.
> We could, for example, turn this into a tracepoint, or just get
> rid of it.
Just going on vague memories here, but: I think we've seen both spurious
and real bugs reported based on this.
I'm inclined to go with a dprintk or tracepoint but not removing it
entirely.
--b.
>
>
> > To allow users to more easily understand and debug these cases, add the
> > number interpreted as ASCII if all characters are printable:
> >
> > RPC: fragment too large: 1195725856 (ASCII "GET ")
> >
> > If demand grows elsewhere, a new printk format that takes a number and
> > outputs it in various formats is also a possible solution. For now, it
> > seems reasonable to put this here since this particular code path is the
> > one that has repeatedly come up in production.
> >
> > Signed-off-by: Chris Down <[email protected]>
> > Cc: Chuck Lever <[email protected]>
> > Cc: J. Bruce Fields <[email protected]>
> > Cc: Trond Myklebust <[email protected]>
> > Cc: David S. Miller <[email protected]>
> > ---
> > net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++--
> > 1 file changed, 37 insertions(+), 2 deletions(-)
> >
> > diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> > index 2e2f007dfc9f..046b1d104340 100644
> > --- a/net/sunrpc/svcsock.c
> > +++ b/net/sunrpc/svcsock.c
> > @@ -46,6 +46,7 @@
> > #include <linux/uaccess.h>
> > #include <linux/highmem.h>
> > #include <asm/ioctls.h>
> > +#include <linux/ctype.h>
> >
> > #include <linux/sunrpc/types.h>
> > #include <linux/sunrpc/clnt.h>
> > @@ -863,6 +864,34 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
> > svsk->sk_datalen = 0;
> > }
> >
> > +/* The reclen is taken directly from the first four bytes of the message with
> > + * the highest bit stripped, which makes it ripe for protocol mixups. For
> > + * example, if someone tries to send a HTTP GET request to us, we'll interpret
> > + * it as a 1195725856-sized fragment (ie. (u32)'GET '), and print a ratelimited
> > + * KERN_NOTICE with that number verbatim.
> > + *
> > + * To allow users to more easily understand and debug these cases, this
> > + * function decodes the purported length as ASCII, and returns it if all
> > + * characters were printable. Otherwise, we return NULL.
> > + *
> > + * WARNING: Since we reuse the u32 directly, the return value is not null
> > + * terminated, and must be printed using %.*s with
> > + * sizeof(svc_sock_reclen(svsk)).
> > + */
> > +static char *svc_sock_reclen_ascii(struct svc_sock *svsk)
> > +{
> > + u32 len_be = cpu_to_be32(svc_sock_reclen(svsk));
> > + char *len_be_ascii = (char *)&len_be;
> > + size_t i;
> > +
> > + for (i = 0; i < sizeof(len_be); i++) {
> > + if (!isprint(len_be_ascii[i]))
> > + return NULL;
> > + }
> > +
> > + return len_be_ascii;
> > +}
> > +
> > /*
> > * Receive fragment record header into sk_marker.
> > */
> > @@ -870,6 +899,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
> > struct svc_rqst *rqstp)
> > {
> > ssize_t want, len;
> > + char *reclen_ascii;
> >
> > /* If we haven't gotten the record length yet,
> > * get the next four bytes.
> > @@ -898,9 +928,14 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
> > return svc_sock_reclen(svsk);
> >
> > err_too_large:
> > - net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
> > + reclen_ascii = svc_sock_reclen_ascii(svsk);
> > + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d%s%.*s%s\n",
> > __func__, svsk->sk_xprt.xpt_server->sv_name,
> > - svc_sock_reclen(svsk));
> > + svc_sock_reclen(svsk),
> > + reclen_ascii ? " (ASCII \"" : "",
> > + (int)sizeof(u32),
> > + reclen_ascii ?: "",
> > + reclen_ascii ? "\")" : "");
> > set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
> > err_short:
> > return -EAGAIN;
> > --
> > 2.30.2
> >
>
> --
> Chuck Lever
>
>
>
> On Mar 19, 2021, at 6:08 PM, J. Bruce Fields <[email protected]> wrote:
>
> On Fri, Mar 19, 2021 at 02:58:14PM +0000, Chuck Lever III wrote:
>> Hi Chris-
>>
>>> On Mar 19, 2021, at 10:54 AM, Chris Down <[email protected]> wrote:
>>>
>>> The reclen is taken directly from the first four bytes of the message
>>> with the highest bit stripped, which makes it ripe for protocol mixups.
>>> For example, if someone tries to send a HTTP GET request to us, we'll
>>> interpret it as a 1195725856-sized fragment (ie. (u32)'GET '), and print
>>> a ratelimited KERN_NOTICE with that number verbatim.
>>>
>>> This can be confusing for downstream users, who don't know what messages
>>> like "fragment too large: 1195725856" actually mean, or that they
>>> indicate some misconfigured infrastructure elsewhere.
>>
>> One wonders whether that error message is actually useful at all.
>> We could, for example, turn this into a tracepoint, or just get
>> rid of it.
>
> Just going on vague memories here, but: I think we've seen both spurious
> and real bugs reported based on this.
>
> I'm inclined to go with a dprintk or tracepoint but not removing it
> entirely.
Because this event can be chatty in some cases, I would prefer making
it a tracepoint rather than directing it to the log. Note also it would
be helpful if the server's net namespace and the client's IP address
and port were recorded.
Chris, there exists some boilerplate in fs/nfsd/trace.h to help with
the latter (just so you can see how to build the trace event definition;
you don't have to copy the macros to include/trace/events/sunrpc.h).
> --b.
>
>>
>>
>>> To allow users to more easily understand and debug these cases, add the
>>> number interpreted as ASCII if all characters are printable:
>>>
>>> RPC: fragment too large: 1195725856 (ASCII "GET ")
>>>
>>> If demand grows elsewhere, a new printk format that takes a number and
>>> outputs it in various formats is also a possible solution. For now, it
>>> seems reasonable to put this here since this particular code path is the
>>> one that has repeatedly come up in production.
>>>
>>> Signed-off-by: Chris Down <[email protected]>
>>> Cc: Chuck Lever <[email protected]>
>>> Cc: J. Bruce Fields <[email protected]>
>>> Cc: Trond Myklebust <[email protected]>
>>> Cc: David S. Miller <[email protected]>
>>> ---
>>> net/sunrpc/svcsock.c | 39 +++++++++++++++++++++++++++++++++++++--
>>> 1 file changed, 37 insertions(+), 2 deletions(-)
>>>
>>> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
>>> index 2e2f007dfc9f..046b1d104340 100644
>>> --- a/net/sunrpc/svcsock.c
>>> +++ b/net/sunrpc/svcsock.c
>>> @@ -46,6 +46,7 @@
>>> #include <linux/uaccess.h>
>>> #include <linux/highmem.h>
>>> #include <asm/ioctls.h>
>>> +#include <linux/ctype.h>
>>>
>>> #include <linux/sunrpc/types.h>
>>> #include <linux/sunrpc/clnt.h>
>>> @@ -863,6 +864,34 @@ static void svc_tcp_clear_pages(struct svc_sock *svsk)
>>> svsk->sk_datalen = 0;
>>> }
>>>
>>> +/* The reclen is taken directly from the first four bytes of the message with
>>> + * the highest bit stripped, which makes it ripe for protocol mixups. For
>>> + * example, if someone tries to send a HTTP GET request to us, we'll interpret
>>> + * it as a 1195725856-sized fragment (ie. (u32)'GET '), and print a ratelimited
>>> + * KERN_NOTICE with that number verbatim.
>>> + *
>>> + * To allow users to more easily understand and debug these cases, this
>>> + * function decodes the purported length as ASCII, and returns it if all
>>> + * characters were printable. Otherwise, we return NULL.
>>> + *
>>> + * WARNING: Since we reuse the u32 directly, the return value is not null
>>> + * terminated, and must be printed using %.*s with
>>> + * sizeof(svc_sock_reclen(svsk)).
>>> + */
>>> +static char *svc_sock_reclen_ascii(struct svc_sock *svsk)
>>> +{
>>> + u32 len_be = cpu_to_be32(svc_sock_reclen(svsk));
>>> + char *len_be_ascii = (char *)&len_be;
>>> + size_t i;
>>> +
>>> + for (i = 0; i < sizeof(len_be); i++) {
>>> + if (!isprint(len_be_ascii[i]))
>>> + return NULL;
>>> + }
>>> +
>>> + return len_be_ascii;
>>> +}
>>> +
>>> /*
>>> * Receive fragment record header into sk_marker.
>>> */
>>> @@ -870,6 +899,7 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
>>> struct svc_rqst *rqstp)
>>> {
>>> ssize_t want, len;
>>> + char *reclen_ascii;
>>>
>>> /* If we haven't gotten the record length yet,
>>> * get the next four bytes.
>>> @@ -898,9 +928,14 @@ static ssize_t svc_tcp_read_marker(struct svc_sock *svsk,
>>> return svc_sock_reclen(svsk);
>>>
>>> err_too_large:
>>> - net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n",
>>> + reclen_ascii = svc_sock_reclen_ascii(svsk);
>>> + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d%s%.*s%s\n",
>>> __func__, svsk->sk_xprt.xpt_server->sv_name,
>>> - svc_sock_reclen(svsk));
>>> + svc_sock_reclen(svsk),
>>> + reclen_ascii ? " (ASCII \"" : "",
>>> + (int)sizeof(u32),
>>> + reclen_ascii ?: "",
>>> + reclen_ascii ? "\")" : "");
>>> set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
>>> err_short:
>>> return -EAGAIN;
>>> --
>>> 2.30.2
>>>
>>
>> --
>> Chuck Lever
--
Chuck Lever