2002-10-08 19:16:08

by Chuck Lever

[permalink] [raw]
Subject: [PATCH] better RPC statistics

hi Linus-

this is the only patch i have that probably should be in 2.5 before
October 20. the rest i will save until things calm down.

this patch, against 2.5.41, adds some new statistics in the RPC layer.
these are similar to stats kept in reference client implementations, and
are designed to help sysadmins track down NFS client issues more quickly.
since this changes a "kernel interface" it should go in now.

if there are no objections, please apply this.

diff -drN -U2 00-stock/include/linux/sunrpc/stats.h 01-counters/include/linux/sunrpc/stats.h
--- 00-stock/include/linux/sunrpc/stats.h Mon Oct 7 14:24:14 2002
+++ 01-counters/include/linux/sunrpc/stats.h Tue Oct 8 11:04:21 2002
@@ -2,5 +2,5 @@
* linux/include/linux/sunrpc/stats.h
*
- * Client statistics collection for SUN RPC
+ * Statistics collection for SUN RPC
*
* Copyright (C) 1996 Olaf Kirch <[email protected]>
@@ -12,31 +12,38 @@
#include <linux/config.h>
#include <linux/proc_fs.h>
+#include <linux/cache.h>

struct rpc_stat {
struct rpc_program * program;

- unsigned int netcnt,
+ unsigned long netcnt,
netudpcnt,
nettcpcnt,
nettcpconn,
netreconn;
- unsigned int rpccnt,
+ unsigned long rpccnt,
rpcretrans,
rpcauthrefresh,
- rpcgarbage;
-};
+ rpcgarbage,
+ rpcnospace,
+ rpcbadxids,
+ rpcbadverfs,
+ rpccantconn,
+ rpcnomem,
+ rpccantsend;
+} ____cacheline_aligned;

struct svc_stat {
struct svc_program * program;

- unsigned int netcnt,
+ unsigned long netcnt,
netudpcnt,
nettcpcnt,
nettcpconn;
- unsigned int rpccnt,
+ unsigned long rpccnt,
rpcbadfmt,
rpcbadauth,
rpcbadclnt;
-};
+} ____cacheline_aligned;

void rpc_proc_init(void);
diff -drN -U2 00-stock/net/sunrpc/clnt.c 01-counters/net/sunrpc/clnt.c
--- 00-stock/net/sunrpc/clnt.c Mon Oct 7 14:24:06 2002
+++ 01-counters/net/sunrpc/clnt.c Tue Oct 8 11:04:21 2002
@@ -454,4 +454,6 @@
}

+ task->tk_client->cl_stats->rpcnomem++;
+
switch (status) {
case -EAGAIN: /* woken up; retry */
@@ -495,4 +497,5 @@
xprt_release(task);
task->tk_action = call_reserve;
+ task->tk_client->cl_stats->rpcnomem++;
rpc_delay(task, HZ>>4);
return;
@@ -670,4 +673,5 @@
break;
case -EAGAIN:
+ clnt->cl_stats->rpcnospace++;
task->tk_action = call_transmit;
break;
@@ -720,5 +724,5 @@

retry:
- clnt->cl_stats->rpcretrans++;
+ clnt->cl_stats->netreconn++;
task->tk_action = call_bind;
task->tk_status = 0;
@@ -758,6 +762,8 @@

/* Verify the RPC header */
- if (!(p = call_verify(task)))
+ if (!(p = call_verify(task))) {
+ clnt->cl_stats->rpcbadverfs++;
return;
+ }

/*
@@ -765,5 +771,5 @@
* processes whose uid is mapped to nobody on the server.
*/
- if (task->tk_client->cl_droppriv &&
+ if (clnt->cl_droppriv &&
(ntohl(*p) == NFSERR_ACCES || ntohl(*p) == NFSERR_PERM)) {
if (RPC_IS_SETUID(task) && task->tk_suid_retry) {
diff -drN -U2 00-stock/net/sunrpc/stats.c 01-counters/net/sunrpc/stats.c
--- 00-stock/net/sunrpc/stats.c Mon Oct 7 14:25:21 2002
+++ 01-counters/net/sunrpc/stats.c Tue Oct 8 11:04:21 2002
@@ -41,5 +41,5 @@

len = sprintf(buffer,
- "net %d %d %d %d\n",
+ "net %lu %lu %lu %lu\n",
statp->netcnt,
statp->netudpcnt,
@@ -47,8 +47,15 @@
statp->nettcpconn);
len += sprintf(buffer + len,
- "rpc %d %d %d\n",
+ "rpc %lu %lu %lu %lu %lu %lu %lu %lu %lu %lu\n",
statp->rpccnt,
statp->rpcretrans,
- statp->rpcauthrefresh);
+ statp->rpcauthrefresh,
+ statp->rpcgarbage,
+ statp->rpcnospace,
+ statp->rpcbadxids,
+ statp->rpcbadverfs,
+ statp->rpccantconn,
+ statp->rpcnomem,
+ statp->rpccantsend);

for (i = 0; i < prog->nrvers; i++) {
@@ -89,5 +96,5 @@

len = sprintf(buffer,
- "net %d %d %d %d\n",
+ "net %lu %lu %lu %lu\n",
statp->netcnt,
statp->netudpcnt,
@@ -95,5 +102,5 @@
statp->nettcpconn);
len += sprintf(buffer + len,
- "rpc %d %d %d %d %d\n",
+ "rpc %lu %lu %lu %lu %lu\n",
statp->rpccnt,
statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
diff -drN -U2 00-stock/net/sunrpc/xprt.c 01-counters/net/sunrpc/xprt.c
--- 00-stock/net/sunrpc/xprt.c Mon Oct 7 14:24:45 2002
+++ 01-counters/net/sunrpc/xprt.c Tue Oct 8 11:04:21 2002
@@ -214,4 +214,5 @@
xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
+ struct rpc_stat *stats = req->rq_task->tk_client->cl_stats;
struct socket *sock = xprt->sock;
struct msghdr msg;
@@ -225,4 +226,10 @@
return -ENOTCONN;

+ stats->netcnt++;
+ if (xprt->stream)
+ stats->nettcpcnt++;
+ else
+ stats->netudpcnt++;
+
xprt_pktdump("packet data:",
req->rq_svec->iov_base,
@@ -259,4 +266,7 @@
* prompts ECONNREFUSED.
*/
+ if (!xprt->stream)
+ stats->rpccantsend++;
+ break;
case -EAGAIN:
break;
@@ -266,7 +276,11 @@
if (xprt->stream)
result = -ENOTCONN;
+ else
+ stats->rpccantsend++;
break;
default:
printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
+ stats->rpccantsend++;
+ break;
}
return result;
@@ -417,4 +431,5 @@
xprt_connect(struct rpc_task *task)
{
+ struct rpc_stat *stats = task->tk_client->cl_stats;
struct rpc_xprt *xprt = task->tk_xprt;
struct socket *sock = xprt->sock;
@@ -488,4 +503,5 @@
xprt_close(xprt);
task->tk_status = -EAGAIN;
+ stats->nettcpconn++;
goto out_write;
}
@@ -498,4 +514,5 @@
xprt_close(xprt);
task->tk_status = -ENOTCONN;
+ stats->rpccantconn++;
goto out_write;

@@ -504,4 +521,5 @@
* system is soft mounted, just error out, like Solaris. */
xprt_close(xprt);
+ stats->rpccantconn++;
if (task->tk_client->cl_softrtry) {
printk(KERN_WARNING
@@ -529,4 +547,5 @@
xprt_conn_status(struct rpc_task *task)
{
+ struct rpc_stat *stats = task->tk_client->cl_stats;
struct rpc_xprt *xprt = task->tk_xprt;

@@ -535,4 +554,5 @@
dprintk("RPC: %4d xprt_conn_status: connection established\n",
task->tk_pid);
+ stats->nettcpconn++;
goto out;
case -ETIMEDOUT:
@@ -549,4 +569,5 @@
break;
}
+ stats->rpccantconn++;
/* if soft mounted, cause this RPC to fail */
if (task->tk_client->cl_softrtry)

--

corporate: <cel at netapp dot com>
personal: <chucklever at bigfoot dot com>


2002-10-08 21:59:59

by Theodore Y. Ts'o

[permalink] [raw]
Subject: Re: [PATCH] better RPC statistics

On Tue, Oct 08, 2002 at 03:20:07PM -0400, Chuck Lever wrote:
> hi Linus-
>
> this is the only patch i have that probably should be in 2.5 before
> October 20. the rest i will save until things calm down.
>
> this patch, against 2.5.41, adds some new statistics in the RPC layer.
> these are similar to stats kept in reference client implementations, and
> are designed to help sysadmins track down NFS client issues more quickly.
> since this changes a "kernel interface" it should go in now.

It's not just "kernel interfaces" which should go in before October
20th.

If we really want the feature freeze to be real, we really will need
to be hard-nosed about what we accept after the freeze date. (Said he
who is frantically trying to finish up a last couple of ext2/3
features in before 2.6.)

So I'd encourage folks to at least submit patches before Oct. 20th
when they are ready, and not try to save stuff for after feature
freeze....

- Ted

2002-10-08 22:16:34

by Dave Jones

[permalink] [raw]
Subject: Re: [PATCH] better RPC statistics

On Tue, Oct 08, 2002 at 06:04:50PM -0400, Theodore Ts'o wrote:

> If we really want the feature freeze to be real, we really will need
> to be hard-nosed about what we accept after the freeze date.

Absolutely. I'm looking forward to seeing what ludicrous patches
turn up disguised as 'cleanup's and the likes 8-)
Given the amount of broken stuff we still have however, I'm
dubious about slippage of a few things. LVM, and ISDN spring
to mind.

Dave

--
| Dave Jones. http://www.codemonkey.org.uk

2002-10-08 22:42:04

by Alan Cox

[permalink] [raw]
Subject: Re: [PATCH] better RPC statistics

On Tue, 2002-10-08 at 23:04, Theodore Ts'o wrote:
> It's not just "kernel interfaces" which should go in before October
> 20th.
>
> If we really want the feature freeze to be real, we really will need
> to be hard-nosed about what we accept after the freeze date. (Said he
> who is frantically trying to finish up a last couple of ext2/3
> features in before 2.6.)
>
> So I'd encourage folks to at least submit patches before Oct. 20th
> when they are ready, and not try to save stuff for after feature
> freeze....

There is a lot of driver clean up which will take lots of time beyond
Oct 31st. IDE is going to take a whole ton of work yet, but that (well
barring what happens with driverfs now) should be independant of the
rest of the world.