Hi,
This patch adds the address of the client that caused an
error in sunrpc/svc.c so that you get errors that look like:
svc: 192.168.66.28, port=709 :unknown version (3 for prog 100003, nfsd)
I've seen machines which get bunches of unknown version or similar
errors from time to time, and while the recent patch to add
the service helps to find which service has the wrong version it doesn't
help find the potentially bad client.
The patch is against a checkout of Linus's git tree made
on 2007-08-24.
One observation is that the svc_print_addr function
prints to a buffer which in this case makes life a little more complex;
it just feels as if there must be lots of places that print a
connection address - is there a better function to use anywhere?
I think actually there are a few places with semi duplicated code; e.g.
one_sock_name switches on the address family but only currently
has IPV4; I wonder how many other places are similar.
Dave
Signed-off-by: Dave Gilbert <[email protected]>
--
-----Open up your eyes, open up your mind, open up your code -------
/ Dr. David Alan Gilbert | Running GNU/Linux on Alpha,68K| Happy \
\ gro.gilbert @ treblig.org | MIPS,x86,ARM,SPARC,PPC & HPPA | In Hex /
\ _________________________|_____ http://www.treblig.org |_______/
--- linux-2.6/net/sunrpc/svc.c.predag 2007-08-25 00:50:06.000000000 +0100
+++ linux-2.6/net/sunrpc/svc.c 2007-08-25 01:34:56.000000000 +0100
@@ -777,6 +777,28 @@ svc_register(struct svc_serv *serv, int
}
/*
+ * Printk the given error with the address of the client that caused it.
+ */
+static int
+svc_printkerr(struct svc_rqst *rqstp, const char* fmt,...)
+{
+ va_list args;
+ int r;
+ char buf[RPC_MAX_ADDRBUFLEN];
+
+ if (!net_ratelimit())
+ return 0;
+
+ printk("svc: %s :", svc_print_addr(rqstp, buf, sizeof(buf)));
+
+ va_start(args, fmt);
+ r = vprintk(fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+/*
* Process the RPC request.
*/
int
@@ -963,14 +985,13 @@ svc_process(struct svc_rqst *rqstp)
return 0;
err_short_len:
- if (net_ratelimit())
- printk("svc: short len %Zd, dropping request\n", argv->iov_len);
+ svc_printkerr(rqstp, "short len %Zd, dropping request\n",
+ argv->iov_len);
goto dropit; /* drop request */
err_bad_dir:
- if (net_ratelimit())
- printk("svc: bad direction %d, dropping request\n", dir);
+ svc_printkerr(rqstp, "bad direction %d, dropping request\n", dir);
serv->sv_stats->rpcbadfmt++;
goto dropit; /* drop request */
@@ -1000,8 +1021,7 @@ err_bad_prog:
goto sendit;
err_bad_vers:
- if (net_ratelimit())
- printk("svc: unknown version (%d for prog %d, %s)\n",
+ svc_printkerr(rqstp, "unknown version (%d for prog %d, %s)\n",
vers, prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
@@ -1011,16 +1031,14 @@ err_bad_vers:
goto sendit;
err_bad_proc:
- if (net_ratelimit())
- printk("svc: unknown procedure (%d)\n", proc);
+ svc_printkerr(rqstp, "unknown procedure (%d)\n", proc);
serv->sv_stats->rpcbadfmt++;
svc_putnl(resv, RPC_PROC_UNAVAIL);
goto sendit;
err_garbage:
- if (net_ratelimit())
- printk("svc: failed to decode args\n");
+ svc_printkerr(rqstp, "svc: failed to decode args\n");
rpc_stat = rpc_garbage_args;
err_bad:
On Sat, 25 Aug 2007 02:26:30 +0100 Dr. David Alan Gilbert wrote:
> Hi,
> This patch adds the address of the client that caused an
> error in sunrpc/svc.c so that you get errors that look like:
>
> svc: 192.168.66.28, port=709 :unknown version (3 for prog 100003, nfsd)
>
> I've seen machines which get bunches of unknown version or similar
> errors from time to time, and while the recent patch to add
> the service helps to find which service has the wrong version it doesn't
> help find the potentially bad client.
>
> The patch is against a checkout of Linus's git tree made
> on 2007-08-24.
>
> One observation is that the svc_print_addr function
> prints to a buffer which in this case makes life a little more complex;
> it just feels as if there must be lots of places that print a
> connection address - is there a better function to use anywhere?
>
> I think actually there are a few places with semi duplicated code; e.g.
> one_sock_name switches on the address family but only currently
> has IPV4; I wonder how many other places are similar.
>
> Dave
>
> Signed-off-by: Dave Gilbert <[email protected]>
>
> --
> -----Open up your eyes, open up your mind, open up your code -------
> / Dr. David Alan Gilbert | Running GNU/Linux on Alpha,68K| Happy \
> \ gro.gilbert @ treblig.org | MIPS,x86,ARM,SPARC,PPC & HPPA | In Hex /
> \ _________________________|_____ http://www.treblig.org |_______/
>
>
> --- linux-2.6/net/sunrpc/svc.c.predag 2007-08-25 00:50:06.000000000 +0100
> +++ linux-2.6/net/sunrpc/svc.c 2007-08-25 01:34:56.000000000 +0100
> @@ -777,6 +777,28 @@ svc_register(struct svc_serv *serv, int
> }
>
> /*
> + * Printk the given error with the address of the client that caused it.
> + */
> +static int
> +svc_printkerr(struct svc_rqst *rqstp, const char* fmt,...)
add:
__attribute__ ((format (printf, 2, 3)))
so that the compiler can check the args list.
> +{
> + va_list args;
> + int r;
> + char buf[RPC_MAX_ADDRBUFLEN];
> +
> + if (!net_ratelimit())
> + return 0;
> +
> + printk("svc: %s :", svc_print_addr(rqstp, buf, sizeof(buf)));
some KERN_* level, please.
> +
> + va_start(args, fmt);
> + r = vprintk(fmt, args);
> + va_end(args);
> +
> + return r;
> +}
> +
> +/*
> * Process the RPC request.
> */
> int
> @@ -963,14 +985,13 @@ svc_process(struct svc_rqst *rqstp)
> return 0;
>
> err_short_len:
> - if (net_ratelimit())
> - printk("svc: short len %Zd, dropping request\n", argv->iov_len);
> + svc_printkerr(rqstp, "short len %Zd, dropping request\n",
> + argv->iov_len);
>
> goto dropit; /* drop request */
>
> err_bad_dir:
> - if (net_ratelimit())
> - printk("svc: bad direction %d, dropping request\n", dir);
> + svc_printkerr(rqstp, "bad direction %d, dropping request\n", dir);
>
> serv->sv_stats->rpcbadfmt++;
> goto dropit; /* drop request */
> @@ -1000,8 +1021,7 @@ err_bad_prog:
> goto sendit;
>
> err_bad_vers:
> - if (net_ratelimit())
> - printk("svc: unknown version (%d for prog %d, %s)\n",
> + svc_printkerr(rqstp, "unknown version (%d for prog %d, %s)\n",
> vers, prog, progp->pg_name);
>
> serv->sv_stats->rpcbadfmt++;
> @@ -1011,16 +1031,14 @@ err_bad_vers:
> goto sendit;
>
> err_bad_proc:
> - if (net_ratelimit())
> - printk("svc: unknown procedure (%d)\n", proc);
> + svc_printkerr(rqstp, "unknown procedure (%d)\n", proc);
>
> serv->sv_stats->rpcbadfmt++;
> svc_putnl(resv, RPC_PROC_UNAVAIL);
> goto sendit;
>
> err_garbage:
> - if (net_ratelimit())
> - printk("svc: failed to decode args\n");
> + svc_printkerr(rqstp, "svc: failed to decode args\n");
>
> rpc_stat = rpc_garbage_args;
> err_bad:
> -
---
~Randy
*** Remember to use Documentation/SubmitChecklist when testing your code ***
Hi Randy,
Thanks for your comments,
* Randy Dunlap ([email protected]) wrote:
in reply to my patch:
> > +static int
> > +svc_printkerr(struct svc_rqst *rqstp, const char* fmt,...)
>
> add:
> __attribute__ ((format (printf, 2, 3)))
> so that the compiler can check the args list.
Added.
> > + printk("svc: %s :", svc_print_addr(rqstp, buf, sizeof(buf)));
>
> some KERN_* level, please.
I've gone with KERN_WARNING which seems about right.
Here is a v2 of the patch that adds those two fixes and also
tidies up the output a little.
Dave
--
-----Open up your eyes, open up your mind, open up your code -------
/ Dr. David Alan Gilbert | Running GNU/Linux on Alpha,68K| Happy \
\ gro.gilbert @ treblig.org | MIPS,x86,ARM,SPARC,PPC & HPPA | In Hex /
\ _________________________|_____ http://www.treblig.org |_______/
----------------------------------------------------------
Hi,
This patch adds the address of the client that caused an
error in sunrpc/svc.c so that you get errors that look like:
svc: 192.168.66.28, port=709: unknown version (3 for prog 100003, nfsd)
I've seen machines which get bunches of unknown version or similar
errors from time to time, and while the recent patch to add
the service helps to find which service has the wrong version it doesn't
help find the potentially bad client.
The patch is against a checkout of Linus's git tree made
on 2007-08-24.
One observation is that the svc_print_addr function
prints to a buffer which in this case makes life a little more complex;
it just feels as if there must be lots of places that print a
connection address - is there a better function to use anywhere?
I think actually there are a few places with semi duplicated code; e.g.
one_sock_name switches on the address family but only currently
has IPV4; I wonder how many other places are similar.
Dave
Signed-off-by: Dave Gilbert <[email protected]>
--- linux-2.6/net/sunrpc/svc.c.predag 2007-08-25 00:50:06.000000000 +0100
+++ linux-2.6/net/sunrpc/svc.c 2007-08-25 15:53:36.000000000 +0100
@@ -777,6 +777,30 @@ svc_register(struct svc_serv *serv, int
}
/*
+ * Printk the given error with the address of the client that caused it.
+ */
+static int
+__attribute__ ((format (printf, 2, 3)))
+svc_printkerr(struct svc_rqst *rqstp, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+ char buf[RPC_MAX_ADDRBUFLEN];
+
+ if (!net_ratelimit())
+ return 0;
+
+ printk(KERN_WARNING "svc: %s: ",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
+
+ va_start(args, fmt);
+ r = vprintk(fmt, args);
+ va_end(args);
+
+ return r;
+}
+
+/*
* Process the RPC request.
*/
int
@@ -963,14 +987,13 @@ svc_process(struct svc_rqst *rqstp)
return 0;
err_short_len:
- if (net_ratelimit())
- printk("svc: short len %Zd, dropping request\n", argv->iov_len);
+ svc_printkerr(rqstp, "short len %Zd, dropping request\n",
+ argv->iov_len);
goto dropit; /* drop request */
err_bad_dir:
- if (net_ratelimit())
- printk("svc: bad direction %d, dropping request\n", dir);
+ svc_printkerr(rqstp, "bad direction %d, dropping request\n", dir);
serv->sv_stats->rpcbadfmt++;
goto dropit; /* drop request */
@@ -1000,8 +1023,7 @@ err_bad_prog:
goto sendit;
err_bad_vers:
- if (net_ratelimit())
- printk("svc: unknown version (%d for prog %d, %s)\n",
+ svc_printkerr(rqstp, "unknown version (%d for prog %d, %s)\n",
vers, prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
@@ -1011,16 +1033,14 @@ err_bad_vers:
goto sendit;
err_bad_proc:
- if (net_ratelimit())
- printk("svc: unknown procedure (%d)\n", proc);
+ svc_printkerr(rqstp, "unknown procedure (%d)\n", proc);
serv->sv_stats->rpcbadfmt++;
svc_putnl(resv, RPC_PROC_UNAVAIL);
goto sendit;
err_garbage:
- if (net_ratelimit())
- printk("svc: failed to decode args\n");
+ svc_printkerr(rqstp, "failed to decode args\n");
rpc_stat = rpc_garbage_args;
err_bad:
On Sat, Aug 25, 2007 at 04:09:27PM +0100, Dr. David Alan Gilbert wrote:
> This patch adds the address of the client that caused an
> error in sunrpc/svc.c so that you get errors that look like:
>
> svc: 192.168.66.28, port=709: unknown version (3 for prog 100003, nfsd)
>
> I've seen machines which get bunches of unknown version or similar
> errors from time to time, and while the recent patch to add
> the service helps to find which service has the wrong version it doesn't
> help find the potentially bad client.
Looks like a reasonable idea to me, thanks! Any objection to just
calling it "svc_printk" instead of "svc_printkerr"?
I also wonder whether these shouldn't all be dprintk's instead of
printk's. One misbehaving client could create a lot of noise in the
logs.
--b.
* J. Bruce Fields ([email protected]) wrote:
> On Sat, Aug 25, 2007 at 04:09:27PM +0100, Dr. David Alan Gilbert wrote:
> > This patch adds the address of the client that caused an
> > error in sunrpc/svc.c so that you get errors that look like:
> >
> > svc: 192.168.66.28, port=709: unknown version (3 for prog 100003, nfsd)
> >
> > I've seen machines which get bunches of unknown version or similar
> > errors from time to time, and while the recent patch to add
> > the service helps to find which service has the wrong version it doesn't
> > help find the potentially bad client.
>
> Looks like a reasonable idea to me, thanks! Any objection to just
> calling it "svc_printk" instead of "svc_printkerr"?
No, that's fine.
> I also wonder whether these shouldn't all be dprintk's instead of
> printk's. One misbehaving client could create a lot of noise in the
> logs.
Yeh; I wasn't going to change anything else about it; the
rate limiting (that I think Neil put in a few months ago) means
that any one client doesn't get too noisy - I've got something
trying to do version 0 on nfs to a bunch of boxes and I do wonder
why; I suspect it's just a monitoring script - but it seems better
to know about it until I figure it out.
I'm not going to be able to recut the patch until the weekend;
do you just want to remove the 'err' in your copy and feed this
to the main tree with some of the rest of your patches?
Dave
--
-----Open up your eyes, open up your mind, open up your code -------
/ Dr. David Alan Gilbert | Running GNU/Linux on Alpha,68K| Happy \
\ gro.gilbert @ treblig.org | MIPS,x86,ARM,SPARC,PPC & HPPA | In Hex /
\ _________________________|_____ http://www.treblig.org |_______/
On Tue, Aug 28, 2007 at 07:03:06PM +0100, Dr. David Alan Gilbert wrote:
> * J. Bruce Fields ([email protected]) wrote:
> > I also wonder whether these shouldn't all be dprintk's instead of
> > printk's. One misbehaving client could create a lot of noise in the
> > logs.
>
> Yeh; I wasn't going to change anything else about it; the
> rate limiting (that I think Neil put in a few months ago) means
> that any one client doesn't get too noisy - I've got something
> trying to do version 0 on nfs to a bunch of boxes and I do wonder
> why; I suspect it's just a monitoring script - but it seems better
> to know about it until I figure it out.
OK.
> I'm not going to be able to recut the patch until the weekend;
> do you just want to remove the 'err' in your copy and feed this
> to the main tree with some of the rest of your patches?
Done; result at
git://linux-nfs.org/~bfields/linux.git nfs-server-stable
if you'd like to check.
--b.
On Mon, 27 Aug 2007 17:43:33 EDT, "J. Bruce Fields" said:
> Looks like a reasonable idea to me, thanks! Any objection to just
> calling it "svc_printk" instead of "svc_printkerr"?
>
> I also wonder whether these shouldn't all be dprintk's instead of
> printk's. One misbehaving client could create a lot of noise in the
> logs.
I shouldn't have to rebuild my kernel with debugging enabled just to see
who is throwing trash at my machine. printk(KERN_INFO maybe and/or using
a printk_ratelimit.
On Tue, Aug 28, 2007 at 03:12:26PM -0400, [email protected] wrote:
> On Mon, 27 Aug 2007 17:43:33 EDT, "J. Bruce Fields" said:
> > I also wonder whether these shouldn't all be dprintk's instead of
> > printk's. One misbehaving client could create a lot of noise in the
> > logs.
>
> I shouldn't have to rebuild my kernel with debugging enabled just to see
> who is throwing trash at my machine.
Fair enough.
The dprintk's throughout the nfs and sunrpc server and client code can
be selectively enabled at runtime using a set of sysctls in
sys/sunrpc/*_debug; see
include/linux/lockd/debug.h
include/linux/nfsd/debug.h
include/linux/sunrpc/debug.h
--b.
[email protected] wrote:
> On Mon, 27 Aug 2007 17:43:33 EDT, "J. Bruce Fields" said:
>
>
>> Looks like a reasonable idea to me, thanks! Any objection to just
>> calling it "svc_printk" instead of "svc_printkerr"?
>>
>> I also wonder whether these shouldn't all be dprintk's instead of
>> printk's. One misbehaving client could create a lot of noise in the
>> logs.
>>
>
> I shouldn't have to rebuild my kernel with debugging enabled just to see
> who is throwing trash at my machine. printk(KERN_INFO maybe and/or using
> a printk_ratelimit.
>
There are a lot of ways to discover who is throwing trash
at your system other than the kernel printing messages.
Tools such as tcpdump and tethereal/wireshark make much better
tools for this purpose.
Thanx...
ps
On Wed, Aug 29, 2007 at 09:37:00AM -0400, Peter Staubach wrote:
> There are a lot of ways to discover who is throwing trash
> at your system other than the kernel printing messages.
>
> Tools such as tcpdump and tethereal/wireshark make much better
> tools for this purpose.
The use of printk's and dprintk's in the rpc and nfsd code seems a
little arbitrary in places. It would be worth grepping for them all,
figuring out how they're currently used, thinking about what we need
them for (to help administrators identify problems? to help testers
provide debugging information to developers?), and how they might be
abused (remote hosts and unprivileged local users shouldn't be able to
spam the logs), developing some uniform policies, and submitting
patches.
I'm not volunteering, but if someone's interested I think it could be
really helpful, especially if it helps increase the usefulness of bug
reports.
--b.
On Tue, 28 Aug 2007 15:19:07 EDT, "J. Bruce Fields" said:
> On Tue, Aug 28, 2007 at 03:12:26PM -0400, [email protected] wrote:
> > On Mon, 27 Aug 2007 17:43:33 EDT, "J. Bruce Fields" said:
> > > I also wonder whether these shouldn't all be dprintk's instead of
> > > printk's. One misbehaving client could create a lot of noise in the
> > > logs.
> >
> > I shouldn't have to rebuild my kernel with debugging enabled just to see
> > who is throwing trash at my machine.
>
> Fair enough.
>
> The dprintk's throughout the nfs and sunrpc server and client code can
> be selectively enabled at runtime using a set of sysctls in
> sys/sunrpc/*_debug; see
>
> include/linux/lockd/debug.h
> include/linux/nfsd/debug.h
I looked at nfsd/debug.h, and saw the NFSD_DEBUG usage, and thought "Wow, if I
built it without RPC_DEBUG, I'm screwed". I didn't see where RPC_DEBUG was
enabled if CONFIG_SYSCTL was defined - having "compile in debugging code"
equated to "system has sysctl support" was as unexpected as the guy today who
didn't realize you needed SCSI support for an IDE CD/ROM. ;)
"You are trapped in a twisty maze of little #ifdef's, all different"... ;)
On Wed, 29 Aug 2007 09:37:00 EDT, Peter Staubach said:
> There are a lot of ways to discover who is throwing trash
> at your system other than the kernel printing messages.
>
> Tools such as tcpdump and tethereal/wireshark make much better
> tools for this purpose.
Given the number of times I've had to use tcpdump and wireshark to wade
through literally gigabyte traces looking for stuff specifically because
the kernel *didn't* printk information it had handy, I'm not too sympathetic.
Especially when the printk in question is currently saying "I know who's
causing the problem but I'm not going to tell you, nyah nyah"....
(And if there's better ways than kernel printing messages, does that mean we
should deprecate the iptables '-j LOG' target while we're at it?)
* J. Bruce Fields ([email protected]) wrote:
> On Tue, Aug 28, 2007 at 07:03:06PM +0100, Dr. David Alan Gilbert wrote:
<snip>
> > I'm not going to be able to recut the patch until the weekend;
> > do you just want to remove the 'err' in your copy and feed this
> > to the main tree with some of the rest of your patches?
>
> Done; result at
>
> git://linux-nfs.org/~bfields/linux.git nfs-server-stable
>
> if you'd like to check.
Yep, that's fine.
Dave
--
-----Open up your eyes, open up your mind, open up your code -------
/ Dr. David Alan Gilbert | Running GNU/Linux on Alpha,68K| Happy \
\ gro.gilbert @ treblig.org | MIPS,x86,ARM,SPARC,PPC & HPPA | In Hex /
\ _________________________|_____ http://www.treblig.org |_______/